Auto merge of #108268 - matthiaskrgr:rollup-4tdvnx6, r=matthiaskrgr

Rollup of 5 pull requests

Successful merges:

 - #108124 (Document that CStr::as_ptr returns a type alias)
 - #108171 (Improve building compiler artifacts output)
 - #108200 (Use restricted Damerau-Levenshtein distance for diagnostics)
 - #108259 (remove FIXME that doesn't require fixing)
 - #108265 ("`const` generic" -> "const parameter")

Failed merges:

r? `@ghost`
`@rustbot` modify labels: rollup
This commit is contained in:
bors 2023-02-20 16:19:06 +00:00
commit 8973049549
27 changed files with 506 additions and 325 deletions

View File

@ -13,7 +13,7 @@ use rustc_hir::def_id::{LocalDefId, CRATE_DEF_ID};
use rustc_hir::PredicateOrigin;
use rustc_index::vec::{Idx, IndexVec};
use rustc_middle::ty::{DefIdTree, ResolverAstLowering, TyCtxt};
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::source_map::DesugaringKind;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{Span, Symbol};

View File

@ -7,7 +7,7 @@ use rustc_hir::def_id::DefId;
use rustc_infer::traits::FulfillmentError;
use rustc_middle::ty::{self, Ty};
use rustc_session::parse::feature_err;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::symbol::{sym, Ident};
use rustc_span::{Span, Symbol, DUMMY_SP};

View File

@ -6,7 +6,7 @@ use crate::astconv::{
use crate::errors::AssocTypeBindingNotAllowed;
use crate::structured_errors::{GenericArgsInfo, StructuredDiagnostic, WrongNumberOfGenericArgs};
use rustc_ast::ast::ParamKindOrd;
use rustc_errors::{struct_span_err, Applicability, Diagnostic, MultiSpan};
use rustc_errors::{struct_span_err, Applicability, Diagnostic, ErrorGuaranteed, MultiSpan};
use rustc_hir as hir;
use rustc_hir::def::{DefKind, Res};
use rustc_hir::def_id::DefId;
@ -26,7 +26,7 @@ fn generic_arg_mismatch_err(
param: &GenericParamDef,
possible_ordering_error: bool,
help: Option<&str>,
) {
) -> ErrorGuaranteed {
let sess = tcx.sess;
let mut err = struct_span_err!(
sess,
@ -70,9 +70,9 @@ fn generic_arg_mismatch_err(
) => match path.res {
Res::Err => {
add_braces_suggestion(arg, &mut err);
err.set_primary_message("unresolved item provided when a constant was expected")
return err
.set_primary_message("unresolved item provided when a constant was expected")
.emit();
return;
}
Res::Def(DefKind::TyParam, src_def_id) => {
if let Some(param_local_id) = param.def_id.as_local() {
@ -81,7 +81,7 @@ fn generic_arg_mismatch_err(
if param_type.is_suggestable(tcx, false) {
err.span_suggestion(
tcx.def_span(src_def_id),
"consider changing this type parameter to be a `const` generic",
"consider changing this type parameter to a const parameter",
format!("const {}: {}", param_name, param_type),
Applicability::MaybeIncorrect,
);
@ -137,7 +137,7 @@ fn generic_arg_mismatch_err(
}
}
err.emit();
err.emit()
}
/// Creates the relevant generic argument substitutions

View File

@ -37,8 +37,8 @@ use rustc_middle::ty::DynKind;
use rustc_middle::ty::GenericParamDefKind;
use rustc_middle::ty::{self, Const, DefIdTree, IsSuggestable, Ty, TyCtxt, TypeVisitable};
use rustc_session::lint::builtin::{AMBIGUOUS_ASSOCIATED_ITEMS, BARE_TRAIT_OBJECTS};
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::edition::Edition;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, Ident, Symbol};
use rustc_span::{sym, Span, DUMMY_SP};
use rustc_target::spec::abi;

View File

@ -45,8 +45,8 @@ use rustc_middle::ty::subst::SubstsRef;
use rustc_middle::ty::{self, AdtKind, Ty, TypeVisitable};
use rustc_session::errors::ExprParenthesesNeeded;
use rustc_session::parse::feature_err;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::hygiene::DesugaringKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::{Span, Spanned};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_target::spec::abi::Abi::RustIntrinsic;

View File

@ -24,8 +24,8 @@ use rustc_middle::ty::{InternalSubsts, SubstsRef};
use rustc_session::lint;
use rustc_span::def_id::DefId;
use rustc_span::def_id::LocalDefId;
use rustc_span::lev_distance::{
find_best_match_for_name_with_substrings, lev_distance_with_substrings,
use rustc_span::edit_distance::{
edit_distance_with_substrings, find_best_match_for_name_with_substrings,
};
use rustc_span::symbol::sym;
use rustc_span::{symbol::Ident, Span, Symbol, DUMMY_SP};
@ -69,7 +69,7 @@ struct ProbeContext<'a, 'tcx> {
impl_dups: FxHashSet<DefId>,
/// When probing for names, include names that are close to the
/// requested name (by Levenshtein distance)
/// requested name (by edit distance)
allow_similar_names: bool,
/// Some(candidate) if there is a private candidate
@ -1793,7 +1793,7 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
/// Similarly to `probe_for_return_type`, this method attempts to find the best matching
/// candidate method where the method name may have been misspelled. Similarly to other
/// Levenshtein based suggestions, we provide at most one such suggestion.
/// edit distance based suggestions, we provide at most one such suggestion.
fn probe_for_similar_candidate(&mut self) -> Result<Option<ty::AssocItem>, MethodError<'tcx>> {
debug!("probing for method names similar to {:?}", self.method_name);
@ -2024,8 +2024,11 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
if self.matches_by_doc_alias(x.def_id) {
return true;
}
match lev_distance_with_substrings(name.as_str(), x.name.as_str(), max_dist)
{
match edit_distance_with_substrings(
name.as_str(),
x.name.as_str(),
max_dist,
) {
Some(d) => d > 0,
None => false,
}

View File

@ -31,7 +31,7 @@ use rustc_middle::ty::{self, DefIdTree, GenericArgKind, Ty, TyCtxt, TypeVisitabl
use rustc_middle::ty::{IsSuggestable, ToPolyTraitRef};
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::Symbol;
use rustc_span::{lev_distance, source_map, ExpnKind, FileName, MacroKind, Span};
use rustc_span::{edit_distance, source_map, ExpnKind, FileName, MacroKind, Span};
use rustc_trait_selection::traits::error_reporting::on_unimplemented::OnUnimplementedNote;
use rustc_trait_selection::traits::error_reporting::on_unimplemented::TypeErrCtxtExt as _;
use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt as _;
@ -1014,7 +1014,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
// that had unsatisfied trait bounds
if unsatisfied_predicates.is_empty() && rcvr_ty.is_enum() {
let adt_def = rcvr_ty.ty_adt_def().expect("enum is not an ADT");
if let Some(suggestion) = lev_distance::find_best_match_for_name(
if let Some(suggestion) = edit_distance::find_best_match_for_name(
&adt_def.variants().iter().map(|s| s.name).collect::<Vec<_>>(),
item_name.name,
None,

View File

@ -14,8 +14,8 @@ use rustc_infer::infer::type_variable::{TypeVariableOrigin, TypeVariableOriginKi
use rustc_middle::middle::stability::EvalResult;
use rustc_middle::ty::{self, Adt, BindingMode, Ty, TypeVisitable};
use rustc_session::lint::builtin::NON_EXHAUSTIVE_OMITTED_PATTERNS;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::hygiene::DesugaringKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::{Span, Spanned};
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{BytePos, DUMMY_SP};

View File

@ -14,8 +14,8 @@ use rustc_session::filesearch::sysroot_candidates;
use rustc_session::lint::{self, BuiltinLintDiagnostics, LintBuffer};
use rustc_session::parse::CrateConfig;
use rustc_session::{early_error, filesearch, output, Session};
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::edition::Edition;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::FileLoader;
use rustc_span::symbol::{sym, Symbol};
use session::CompilerIO;

View File

@ -39,7 +39,7 @@ use rustc_middle::ty::{self, print::Printer, subst::GenericArg, RegisteredTools,
use rustc_session::lint::{BuiltinLintDiagnostics, LintExpectationId};
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
use rustc_session::Session;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::symbol::{sym, Ident, Symbol};
use rustc_span::{BytePos, Span};
use rustc_target::abi;

View File

@ -19,8 +19,8 @@ use rustc_errors::{
struct_span_err, Applicability, DiagnosticBuilder, ErrorGuaranteed, IntoDiagnostic, PResult,
StashKey,
};
use rustc_span::edit_distance::edit_distance;
use rustc_span::edition::Edition;
use rustc_span::lev_distance::lev_distance;
use rustc_span::source_map::{self, Span};
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::DUMMY_SP;
@ -459,7 +459,8 @@ impl<'a> Parser<'a> {
// Maybe the user misspelled `macro_rules` (issue #91227)
if self.token.is_ident()
&& path.segments.len() == 1
&& lev_distance("macro_rules", &path.segments[0].ident.to_string(), 3).is_some()
&& edit_distance("macro_rules", &path.segments[0].ident.to_string(), 2)
.is_some()
{
err.span_suggestion(
path.span,

View File

@ -21,9 +21,9 @@ use rustc_session::lint::builtin::ABSOLUTE_PATHS_NOT_STARTING_WITH_CRATE;
use rustc_session::lint::builtin::MACRO_EXPANDED_MACRO_EXPORTS_ACCESSED_BY_ABSOLUTE_PATHS;
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::Session;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::edition::Edition;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::SourceMap;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, Span, SyntaxContext};

View File

@ -21,8 +21,8 @@ use rustc_middle::span_bug;
use rustc_middle::ty;
use rustc_session::lint::builtin::{PUB_USE_OF_PRIVATE_EXTERN_CRATE, UNUSED_IMPORTS};
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::hygiene::LocalExpnId;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, Ident, Symbol};
use rustc_span::Span;

View File

@ -4135,9 +4135,9 @@ impl<'a: 'ast, 'b, 'ast, 'tcx> LateResolutionVisitor<'a, 'b, 'ast, 'tcx> {
fn record_candidate_traits_for_expr_if_necessary(&mut self, expr: &'ast Expr) {
match expr.kind {
ExprKind::Field(_, ident) => {
// FIXME(#6890): Even though you can't treat a method like a
// field, we need to add any trait methods we find that match
// the field name so that we can do some nice error reporting
// #6890: Even though you can't treat a method like a field,
// we need to add any trait methods we find that match the
// field name so that we can do some nice error reporting
// later on in typeck.
let traits = self.traits_in_scope(ident, ValueNS);
self.r.trait_map.insert(expr.id, traits);

View File

@ -25,9 +25,9 @@ use rustc_middle::ty::DefIdTree;
use rustc_session::lint;
use rustc_session::parse::feature_err;
use rustc_session::Session;
use rustc_span::edit_distance::find_best_match_for_name;
use rustc_span::edition::Edition;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, Span};
@ -542,7 +542,7 @@ impl<'a: 'ast, 'ast, 'tcx> LateResolutionVisitor<'a, '_, 'ast, 'tcx> {
}
}
// Try Levenshtein algorithm.
// Try finding a suitable replacement.
let typo_sugg =
self.lookup_typo_candidate(path, source.namespace(), is_expected).to_opt_suggestion();
if path.len() == 1 && self.self_type_is_available() {
@ -770,7 +770,7 @@ impl<'a: 'ast, 'ast, 'tcx> LateResolutionVisitor<'a, '_, 'ast, 'tcx> {
_ => {}
}
// If the trait has a single item (which wasn't matched by Levenshtein), suggest it
// If the trait has a single item (which wasn't matched by the algorithm), suggest it
let suggestion = self.get_single_associated_item(&path, &source, is_expected);
if !self.r.add_typo_suggestion(err, suggestion, ident_span) {
fallback = !self.let_binding_suggestion(err, ident_span);

View File

@ -0,0 +1,229 @@
//! Edit distances.
//!
//! The [edit distance] is a metric for measuring the difference between two strings.
//!
//! [edit distance]: https://en.wikipedia.org/wiki/Edit_distance
// The current implementation is the restricted Damerau-Levenshtein algorithm. It is restricted
// because it does not permit modifying characters that have already been transposed. The specific
// algorithm should not matter to the caller of the methods, which is why it is not noted in the
// documentation.
use crate::symbol::Symbol;
use std::{cmp, mem};
#[cfg(test)]
mod tests;
/// Finds the [edit distance] between two strings.
///
/// Returns `None` if the distance exceeds the limit.
///
/// [edit distance]: https://en.wikipedia.org/wiki/Edit_distance
pub fn edit_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
let mut a = &a.chars().collect::<Vec<_>>()[..];
let mut b = &b.chars().collect::<Vec<_>>()[..];
// Ensure that `b` is the shorter string, minimizing memory use.
if a.len() < b.len() {
mem::swap(&mut a, &mut b);
}
let min_dist = a.len() - b.len();
// If we know the limit will be exceeded, we can return early.
if min_dist > limit {
return None;
}
// Strip common prefix.
while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_first().zip(a.split_first())
&& a_char == b_char
{
a = a_rest;
b = b_rest;
}
// Strip common suffix.
while let Some(((b_char, b_rest), (a_char, a_rest))) = b.split_last().zip(a.split_last())
&& a_char == b_char
{
a = a_rest;
b = b_rest;
}
// If either string is empty, the distance is the length of the other.
// We know that `b` is the shorter string, so we don't need to check `a`.
if b.len() == 0 {
return Some(min_dist);
}
let mut prev_prev = vec![usize::MAX; b.len() + 1];
let mut prev = (0..=b.len()).collect::<Vec<_>>();
let mut current = vec![0; b.len() + 1];
// row by row
for i in 1..=a.len() {
current[0] = i;
let a_idx = i - 1;
// column by column
for j in 1..=b.len() {
let b_idx = j - 1;
// There is no cost to substitute a character with itself.
let substitution_cost = if a[a_idx] == b[b_idx] { 0 } else { 1 };
current[j] = cmp::min(
// deletion
prev[j] + 1,
cmp::min(
// insertion
current[j - 1] + 1,
// substitution
prev[j - 1] + substitution_cost,
),
);
if (i > 1) && (j > 1) && (a[a_idx] == b[b_idx - 1]) && (a[a_idx - 1] == b[b_idx]) {
// transposition
current[j] = cmp::min(current[j], prev_prev[j - 2] + 1);
}
}
// Rotate the buffers, reusing the memory.
[prev_prev, prev, current] = [prev, current, prev_prev];
}
// `prev` because we already rotated the buffers.
let distance = prev[b.len()];
(distance <= limit).then_some(distance)
}
/// Provides a word similarity score between two words that accounts for substrings being more
/// meaningful than a typical edit distance. The lower the score, the closer the match. 0 is an
/// identical match.
///
/// Uses the edit distance between the two strings and removes the cost of the length difference.
/// If this is 0 then it is either a substring match or a full word match, in the substring match
/// case we detect this and return `1`. To prevent finding meaningless substrings, eg. "in" in
/// "shrink", we only perform this subtraction of length difference if one of the words is not
/// greater than twice the length of the other. For cases where the words are close in size but not
/// an exact substring then the cost of the length difference is discounted by half.
///
/// Returns `None` if the distance exceeds the limit.
pub fn edit_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
let n = a.chars().count();
let m = b.chars().count();
// Check one isn't less than half the length of the other. If this is true then there is a
// big difference in length.
let big_len_diff = (n * 2) < m || (m * 2) < n;
let len_diff = if n < m { m - n } else { n - m };
let distance = edit_distance(a, b, limit + len_diff)?;
// This is the crux, subtracting length difference means exact substring matches will now be 0
let score = distance - len_diff;
// If the score is 0 but the words have different lengths then it's a substring match not a full
// word match
let score = if score == 0 && len_diff > 0 && !big_len_diff {
1 // Exact substring match, but not a total word match so return non-zero
} else if !big_len_diff {
// Not a big difference in length, discount cost of length difference
score + (len_diff + 1) / 2
} else {
// A big difference in length, add back the difference in length to the score
score + len_diff
};
(score <= limit).then_some(score)
}
/// Finds the best match for given word in the given iterator where substrings are meaningful.
///
/// A version of [`find_best_match_for_name`] that uses [`edit_distance_with_substrings`] as the
/// score for word similarity. This takes an optional distance limit which defaults to one-third of
/// the given word.
///
/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case
/// letters mismatch.
pub fn find_best_match_for_name_with_substrings(
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
find_best_match_for_name_impl(true, candidates, lookup, dist)
}
/// Finds the best match for a given word in the given iterator.
///
/// As a loose rule to avoid the obviously incorrect suggestions, it takes
/// an optional limit for the maximum allowable edit distance, which defaults
/// to one-third of the given word.
///
/// We use case insensitive comparison to improve accuracy on an edge case with a lower(upper)case
/// letters mismatch.
pub fn find_best_match_for_name(
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
find_best_match_for_name_impl(false, candidates, lookup, dist)
}
#[cold]
fn find_best_match_for_name_impl(
use_substring_score: bool,
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
let lookup = lookup.as_str();
let lookup_uppercase = lookup.to_uppercase();
// Priority of matches:
// 1. Exact case insensitive match
// 2. Edit distance match
// 3. Sorted word match
if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) {
return Some(*c);
}
let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
let mut best = None;
for c in candidates {
match if use_substring_score {
edit_distance_with_substrings(lookup, c.as_str(), dist)
} else {
edit_distance(lookup, c.as_str(), dist)
} {
Some(0) => return Some(*c),
Some(d) => {
dist = d - 1;
best = Some(*c);
}
None => {}
}
}
if best.is_some() {
return best;
}
find_match_by_sorted_words(candidates, lookup)
}
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(candidate.as_str()) == sort_by_words(lookup) {
Some(*candidate)
} else {
result
}
})
}
fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect();
// We are sorting primitive &strs and can use unstable sort here.
split_words.sort_unstable();
split_words.join("_")
}

View File

@ -0,0 +1,80 @@
use super::*;
#[test]
fn test_edit_distance() {
// Test bytelength agnosticity
for c in (0..char::MAX as u32).filter_map(char::from_u32).map(|i| i.to_string()) {
assert_eq!(edit_distance(&c[..], &c[..], usize::MAX), Some(0));
}
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
assert_eq!(edit_distance(a, b, usize::MAX), Some(1));
assert_eq!(edit_distance(b, a, usize::MAX), Some(1));
assert_eq!(edit_distance(a, c, usize::MAX), Some(2));
assert_eq!(edit_distance(c, a, usize::MAX), Some(2));
assert_eq!(edit_distance(b, c, usize::MAX), Some(1));
assert_eq!(edit_distance(c, b, usize::MAX), Some(1));
}
#[test]
fn test_edit_distance_limit() {
assert_eq!(edit_distance("abc", "abcd", 1), Some(1));
assert_eq!(edit_distance("abc", "abcd", 0), None);
assert_eq!(edit_distance("abc", "xyz", 3), Some(3));
assert_eq!(edit_distance("abc", "xyz", 2), None);
}
#[test]
fn test_method_name_similarity_score() {
assert_eq!(edit_distance_with_substrings("empty", "is_empty", 1), Some(1));
assert_eq!(edit_distance_with_substrings("shrunk", "rchunks", 2), None);
assert_eq!(edit_distance_with_substrings("abc", "abcd", 1), Some(1));
assert_eq!(edit_distance_with_substrings("a", "abcd", 1), None);
assert_eq!(edit_distance_with_substrings("edf", "eq", 1), None);
assert_eq!(edit_distance_with_substrings("abc", "xyz", 3), Some(3));
assert_eq!(edit_distance_with_substrings("abcdef", "abcdef", 2), Some(0));
}
#[test]
fn test_find_best_match_for_name() {
use crate::create_default_session_globals_then;
create_default_session_globals_then(|| {
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("aaab"))
);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
Some(Symbol::intern("a_longer_variable_name"))
);
})
}
#[test]
fn test_precise_algorithm() {
// Not Levenshtein distance.
assert_ne!(edit_distance("ab", "ba", usize::MAX), Some(2));
// Not unrestricted Damerau-Levenshtein distance.
assert_ne!(edit_distance("abde", "bcaed", usize::MAX), Some(3));
// The current implementation is a restricted Damerau-Levenshtein distance.
assert_eq!(edit_distance("abde", "bcaed", usize::MAX), Some(4));
}

View File

@ -1,177 +0,0 @@
//! Levenshtein distances.
//!
//! The [Levenshtein distance] is a metric for measuring the difference between two strings.
//!
//! [Levenshtein distance]: https://en.wikipedia.org/wiki/Levenshtein_distance
use crate::symbol::Symbol;
use std::cmp;
#[cfg(test)]
mod tests;
/// Finds the Levenshtein distance between two strings.
///
/// Returns None if the distance exceeds the limit.
pub fn lev_distance(a: &str, b: &str, limit: usize) -> Option<usize> {
let n = a.chars().count();
let m = b.chars().count();
let min_dist = if n < m { m - n } else { n - m };
if min_dist > limit {
return None;
}
if n == 0 || m == 0 {
return (min_dist <= limit).then_some(min_dist);
}
let mut dcol: Vec<_> = (0..=m).collect();
for (i, sc) in a.chars().enumerate() {
let mut current = i;
dcol[0] = current + 1;
for (j, tc) in b.chars().enumerate() {
let next = dcol[j + 1];
if sc == tc {
dcol[j + 1] = current;
} else {
dcol[j + 1] = cmp::min(current, next);
dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
}
current = next;
}
}
(dcol[m] <= limit).then_some(dcol[m])
}
/// Provides a word similarity score between two words that accounts for substrings being more
/// meaningful than a typical Levenshtein distance. The lower the score, the closer the match.
/// 0 is an identical match.
///
/// Uses the Levenshtein distance between the two strings and removes the cost of the length
/// difference. If this is 0 then it is either a substring match or a full word match, in the
/// substring match case we detect this and return `1`. To prevent finding meaningless substrings,
/// eg. "in" in "shrink", we only perform this subtraction of length difference if one of the words
/// is not greater than twice the length of the other. For cases where the words are close in size
/// but not an exact substring then the cost of the length difference is discounted by half.
///
/// Returns `None` if the distance exceeds the limit.
pub fn lev_distance_with_substrings(a: &str, b: &str, limit: usize) -> Option<usize> {
let n = a.chars().count();
let m = b.chars().count();
// Check one isn't less than half the length of the other. If this is true then there is a
// big difference in length.
let big_len_diff = (n * 2) < m || (m * 2) < n;
let len_diff = if n < m { m - n } else { n - m };
let lev = lev_distance(a, b, limit + len_diff)?;
// This is the crux, subtracting length difference means exact substring matches will now be 0
let score = lev - len_diff;
// If the score is 0 but the words have different lengths then it's a substring match not a full
// word match
let score = if score == 0 && len_diff > 0 && !big_len_diff {
1 // Exact substring match, but not a total word match so return non-zero
} else if !big_len_diff {
// Not a big difference in length, discount cost of length difference
score + (len_diff + 1) / 2
} else {
// A big difference in length, add back the difference in length to the score
score + len_diff
};
(score <= limit).then_some(score)
}
/// Finds the best match for given word in the given iterator where substrings are meaningful.
///
/// A version of [`find_best_match_for_name`] that uses [`lev_distance_with_substrings`] as the score
/// for word similarity. This takes an optional distance limit which defaults to one-third of the
/// given word.
///
/// Besides the modified Levenshtein, we use case insensitive comparison to improve accuracy
/// on an edge case with a lower(upper)case letters mismatch.
pub fn find_best_match_for_name_with_substrings(
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
find_best_match_for_name_impl(true, candidates, lookup, dist)
}
/// Finds the best match for a given word in the given iterator.
///
/// As a loose rule to avoid the obviously incorrect suggestions, it takes
/// an optional limit for the maximum allowable edit distance, which defaults
/// to one-third of the given word.
///
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy
/// on an edge case with a lower(upper)case letters mismatch.
pub fn find_best_match_for_name(
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
find_best_match_for_name_impl(false, candidates, lookup, dist)
}
#[cold]
fn find_best_match_for_name_impl(
use_substring_score: bool,
candidates: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol> {
let lookup = lookup.as_str();
let lookup_uppercase = lookup.to_uppercase();
// Priority of matches:
// 1. Exact case insensitive match
// 2. Levenshtein distance match
// 3. Sorted word match
if let Some(c) = candidates.iter().find(|c| c.as_str().to_uppercase() == lookup_uppercase) {
return Some(*c);
}
let mut dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
let mut best = None;
for c in candidates {
match if use_substring_score {
lev_distance_with_substrings(lookup, c.as_str(), dist)
} else {
lev_distance(lookup, c.as_str(), dist)
} {
Some(0) => return Some(*c),
Some(d) => {
dist = d - 1;
best = Some(*c);
}
None => {}
}
}
if best.is_some() {
return best;
}
find_match_by_sorted_words(candidates, lookup)
}
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(candidate.as_str()) == sort_by_words(lookup) {
Some(*candidate)
} else {
result
}
})
}
fn sort_by_words(name: &str) -> String {
let mut split_words: Vec<&str> = name.split('_').collect();
// We are sorting primitive &strs and can use unstable sort here.
split_words.sort_unstable();
split_words.join("_")
}

View File

@ -1,70 +0,0 @@
use super::*;
#[test]
fn test_lev_distance() {
// Test bytelength agnosticity
for c in (0..char::MAX as u32).filter_map(char::from_u32).map(|i| i.to_string()) {
assert_eq!(lev_distance(&c[..], &c[..], usize::MAX), Some(0));
}
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
assert_eq!(lev_distance(a, b, usize::MAX), Some(1));
assert_eq!(lev_distance(b, a, usize::MAX), Some(1));
assert_eq!(lev_distance(a, c, usize::MAX), Some(2));
assert_eq!(lev_distance(c, a, usize::MAX), Some(2));
assert_eq!(lev_distance(b, c, usize::MAX), Some(1));
assert_eq!(lev_distance(c, b, usize::MAX), Some(1));
}
#[test]
fn test_lev_distance_limit() {
assert_eq!(lev_distance("abc", "abcd", 1), Some(1));
assert_eq!(lev_distance("abc", "abcd", 0), None);
assert_eq!(lev_distance("abc", "xyz", 3), Some(3));
assert_eq!(lev_distance("abc", "xyz", 2), None);
}
#[test]
fn test_method_name_similarity_score() {
assert_eq!(lev_distance_with_substrings("empty", "is_empty", 1), Some(1));
assert_eq!(lev_distance_with_substrings("shrunk", "rchunks", 2), None);
assert_eq!(lev_distance_with_substrings("abc", "abcd", 1), Some(1));
assert_eq!(lev_distance_with_substrings("a", "abcd", 1), None);
assert_eq!(lev_distance_with_substrings("edf", "eq", 1), None);
assert_eq!(lev_distance_with_substrings("abc", "xyz", 3), Some(3));
assert_eq!(lev_distance_with_substrings("abcdef", "abcdef", 2), Some(0));
}
#[test]
fn test_find_best_match_for_name() {
use crate::create_default_session_globals_then;
create_default_session_globals_then(|| {
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("aaab"))
);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
Some(Symbol::intern("a_longer_variable_name"))
);
})
}

View File

@ -19,6 +19,7 @@
#![feature(negative_impls)]
#![feature(min_specialization)]
#![feature(rustc_attrs)]
#![feature(let_chains)]
#![deny(rustc::untranslatable_diagnostic)]
#![deny(rustc::diagnostic_outside_of_impl)]
@ -46,7 +47,7 @@ pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
use rustc_data_structures::stable_hasher::HashingControls;
pub mod def_id;
use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
pub mod lev_distance;
pub mod edit_distance;
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};

View File

@ -457,6 +457,10 @@ impl CStr {
/// to a contiguous region of memory terminated with a 0 byte to represent
/// the end of the string.
///
/// The type of the returned pointer is
/// [`*const c_char`][crate::ffi::c_char], and whether it's
/// an alias for `*const i8` or `*const u8` is platform-specific.
///
/// **WARNING**
///
/// The returned pointer is read-only; writing to it (including passing it
@ -470,6 +474,7 @@ impl CStr {
/// # #![allow(unused_must_use)] #![allow(temporary_cstring_as_ptr)]
/// use std::ffi::CString;
///
/// // Do not do this:
/// let ptr = CString::new("Hello").expect("CString::new failed").as_ptr();
/// unsafe {
/// // `ptr` is dangling

View File

@ -111,10 +111,18 @@ impl Step for Std {
let compiler_to_use = builder.compiler_for(compiler.stage, compiler.host, target);
if compiler_to_use != compiler {
builder.ensure(Std::new(compiler_to_use, target));
builder.info(&format!(
"Uplifting stage1 library ({} -> {})",
compiler_to_use.host, target
));
let msg = if compiler_to_use.host == target {
format!(
"Uplifting library (stage{} -> stage{})",
compiler_to_use.stage, compiler.stage
)
} else {
format!(
"Uplifting library (stage{}:{} -> stage{}:{})",
compiler_to_use.stage, compiler_to_use.host, compiler.stage, target
)
};
builder.info(&msg);
// Even if we're not building std this stage, the new sysroot must
// still contain the third party objects needed by various targets.
@ -134,13 +142,23 @@ impl Step for Std {
cargo.arg("-p").arg(krate);
}
builder.info(&format!(
"Building{} stage{} library artifacts ({} -> {})",
crate_description(&self.crates),
compiler.stage,
&compiler.host,
target,
));
let msg = if compiler.host == target {
format!(
"Building{} stage{} library artifacts ({}) ",
crate_description(&self.crates),
compiler.stage,
compiler.host
)
} else {
format!(
"Building{} stage{} library artifacts ({} -> {})",
crate_description(&self.crates),
compiler.stage,
compiler.host,
target,
)
};
builder.info(&msg);
run_cargo(
builder,
cargo,
@ -438,10 +456,6 @@ impl Step for StdLink {
let compiler = self.compiler;
let target_compiler = self.target_compiler;
let target = self.target;
builder.info(&format!(
"Copying stage{} library from stage{} ({} -> {} / {})",
target_compiler.stage, compiler.stage, &compiler.host, target_compiler.host, target
));
let libdir = builder.sysroot_libdir(target_compiler, target);
let hostdir = builder.sysroot_libdir(target_compiler, compiler.host);
add_to_sysroot(builder, &libdir, &hostdir, &libstd_stamp(builder, compiler, target));
@ -715,8 +729,22 @@ impl Step for Rustc {
let compiler_to_use = builder.compiler_for(compiler.stage, compiler.host, target);
if compiler_to_use != compiler {
builder.ensure(Rustc::new(compiler_to_use, target));
builder
.info(&format!("Uplifting stage1 rustc ({} -> {})", builder.config.build, target));
let msg = if compiler_to_use.host == target {
format!(
"Uplifting rustc (stage{} -> stage{})",
compiler_to_use.stage,
compiler.stage + 1
)
} else {
format!(
"Uplifting rustc (stage{}:{} -> stage{}:{})",
compiler_to_use.stage,
compiler_to_use.host,
compiler.stage + 1,
target
)
};
builder.info(&msg);
builder.ensure(RustcLink::from_rustc(self, compiler_to_use));
return;
}
@ -810,13 +838,24 @@ impl Step for Rustc {
cargo.arg("-p").arg(krate);
}
builder.info(&format!(
"Building{} stage{} compiler artifacts ({} -> {})",
crate_description(&self.crates),
compiler.stage,
&compiler.host,
target,
));
let msg = if compiler.host == target {
format!(
"Building{} compiler artifacts (stage{} -> stage{})",
crate_description(&self.crates),
compiler.stage,
compiler.stage + 1
)
} else {
format!(
"Building{} compiler artifacts (stage{}:{} -> stage{}:{})",
crate_description(&self.crates),
compiler.stage,
compiler.host,
compiler.stage + 1,
target,
)
};
builder.info(&msg);
run_cargo(
builder,
cargo,
@ -1000,10 +1039,6 @@ impl Step for RustcLink {
let compiler = self.compiler;
let target_compiler = self.target_compiler;
let target = self.target;
builder.info(&format!(
"Copying stage{} rustc from stage{} ({} -> {} / {})",
target_compiler.stage, compiler.stage, &compiler.host, target_compiler.host, target
));
add_to_sysroot(
builder,
&builder.sysroot_libdir(target_compiler, target),
@ -1077,10 +1112,15 @@ impl Step for CodegenBackend {
let tmp_stamp = out_dir.join(".tmp.stamp");
builder.info(&format!(
"Building stage{} codegen backend {} ({} -> {})",
compiler.stage, backend, &compiler.host, target
));
let msg = if compiler.host == target {
format!("Building stage{} codegen backend {}", compiler.stage, backend)
} else {
format!(
"Building stage{} codegen backend {} ({} -> {})",
compiler.stage, backend, compiler.host, target
)
};
builder.info(&msg);
let files = run_cargo(builder, cargo, vec![], &tmp_stamp, vec![], false, false);
if builder.config.dry_run() {
return;
@ -1386,7 +1426,12 @@ impl Step for Assemble {
let stage = target_compiler.stage;
let host = target_compiler.host;
builder.info(&format!("Assembling stage{} compiler ({})", stage, host));
let msg = if build_compiler.host == host {
format!("Assembling stage{} compiler", stage)
} else {
format!("Assembling stage{} compiler ({})", stage, host)
};
builder.info(&msg);
// Link in all dylibs to the libdir
let stamp = librustc_stamp(builder, build_compiler, target_compiler.host);

View File

@ -33,6 +33,44 @@ struct ToolBuild {
allow_features: &'static str,
}
fn tooling_output(
mode: Mode,
tool: &str,
build_stage: u32,
host: &TargetSelection,
target: &TargetSelection,
) -> String {
match mode {
// depends on compiler stage, different to host compiler
Mode::ToolRustc => {
if host == target {
format!("Building tool {} (stage{} -> stage{})", tool, build_stage, build_stage + 1)
} else {
format!(
"Building tool {} (stage{}:{} -> stage{}:{})",
tool,
build_stage,
host,
build_stage + 1,
target
)
}
}
// doesn't depend on compiler, same as host compiler
Mode::ToolStd => {
if host == target {
format!("Building tool {} (stage{})", tool, build_stage)
} else {
format!(
"Building tool {} (stage{}:{} -> stage{}:{})",
tool, build_stage, host, build_stage, target
)
}
}
_ => format!("Building tool {} (stage{})", tool, build_stage),
}
}
impl Step for ToolBuild {
type Output = Option<PathBuf>;
@ -74,8 +112,14 @@ impl Step for ToolBuild {
if !self.allow_features.is_empty() {
cargo.allow_features(self.allow_features);
}
builder.info(&format!("Building stage{} tool {} ({})", compiler.stage, tool, target));
let msg = tooling_output(
self.mode,
self.tool,
self.compiler.stage,
&self.compiler.host,
&self.target,
);
builder.info(&msg);
let mut duplicates = Vec::new();
let is_expected = compile::stream_cargo(builder, cargo, vec![], &mut |msg| {
// Only care about big things like the RLS/Cargo for now
@ -562,10 +606,14 @@ impl Step for Rustdoc {
features.as_slice(),
);
builder.info(&format!(
"Building rustdoc for stage{} ({})",
target_compiler.stage, target_compiler.host
));
let msg = tooling_output(
Mode::ToolRustc,
"rustdoc",
build_compiler.stage,
&self.compiler.host,
&target,
);
builder.info(&msg);
builder.run(&mut cargo.into());
// Cargo adds a number of paths to the dylib search path on windows, which results in

View File

@ -2,7 +2,9 @@ warning: unexpected `cfg` condition value
--> $DIR/invalid-cfg-value.rs:7:7
|
LL | #[cfg(feature = "sedre")]
| ^^^^^^^^^^^^^^^^^
| ^^^^^^^^^^-------
| |
| help: did you mean: `"serde"`
|
= note: expected values for `feature` are: full, serde
= note: `#[warn(unexpected_cfgs)]` on by default

View File

@ -49,12 +49,9 @@ error[E0747]: type provided when a constant was expected
--> $DIR/invalid-const-arguments.rs:10:19
|
LL | impl<N> Foo for B<N> {}
| ^
|
help: consider changing this type parameter to be a `const` generic
|
LL | impl<const N: u8> Foo for B<N> {}
| ~~~~~~~~~~~
| - ^
| |
| help: consider changing this type parameter to a const parameter: `const N: u8`
error[E0747]: unresolved item provided when a constant was expected
--> $DIR/invalid-const-arguments.rs:14:32

View File

@ -0,0 +1,6 @@
// https://internals.rust-lang.org/t/18227
fn main() {
prinltn!(); //~ ERROR cannot find macro `prinltn` in this scope
//^ a macro with a similar name exists: `println`
}

View File

@ -0,0 +1,11 @@
error: cannot find macro `prinltn` in this scope
--> $DIR/println-typo.rs:4:5
|
LL | prinltn!();
| ^^^^^^^ help: a macro with a similar name exists: `println`
--> $SRC_DIR/std/src/macros.rs:LL:COL
|
= note: similarly named macro `println` defined here
error: aborting due to previous error