Rollup merge of #79000 - sivadeilra:user/ardavis/lev_distance, r=wesleywiser

Move lev_distance to rustc_ast, make non-generic

rustc_ast currently has a few dependencies on rustc_lexer. Ideally, an AST
would not have any dependency its lexer, for minimizing
design-time dependencies. Breaking this dependency would also have practical
benefits, since modifying rustc_lexer would not trigger a rebuild of rustc_ast.

This commit does not remove the rustc_ast --> rustc_lexer dependency,
but it does remove one of the sources of this dependency, which is the
code that handles fuzzy matching between symbol names for making suggestions
in diagnostics. Since that code depends only on Symbol, it is easy to move
it to rustc_span. It might even be best to move it to a separate crate,
since other tools such as Cargo use the same algorithm, and have simply
contain a duplicate of the code.

This changes the signature of find_best_match_for_name so that it is no
longer generic over its input. I checked the optimized binaries, and this
function was duplicated for nearly every call site, because most call sites
used short-lived iterator chains, generic over Map and such. But there's
no good reason for a function like this to be generic, since all it does
is immediately convert the generic input (the Iterator impl) to a concrete
Vec<Symbol>. This has all of the costs of generics (duplicated method bodies)
with no benefit.

Changing find_best_match_for_name to be non-generic removed about 10KB of
code from the optimized binary. I know it's a drop in the bucket, but we have
to start reducing binary size, and beginning to tame over-use of generics
is part of that.
This commit is contained in:
Jonas Schievink 2020-11-26 13:39:05 +01:00 committed by GitHub
commit 6fcd589025
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 96 additions and 84 deletions

View File

@ -34,7 +34,6 @@ macro_rules! unwrap_or {
pub mod util {
pub mod classify;
pub mod comments;
pub mod lev_distance;
pub mod literal;
pub mod parser;
}

View File

@ -1,6 +1,5 @@
use rustc_ast::mut_visit::{visit_clobber, MutVisitor, *};
use rustc_ast::ptr::P;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::{self as ast, AttrVec, BlockCheckMode};
use rustc_codegen_ssa::traits::CodegenBackend;
use rustc_data_structures::fingerprint::Fingerprint;
@ -20,6 +19,7 @@ use rustc_session::parse::CrateConfig;
use rustc_session::CrateDisambiguator;
use rustc_session::{early_error, filesearch, output, DiagnosticOutput, Session};
use rustc_span::edition::Edition;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::FileLoader;
use rustc_span::symbol::{sym, Symbol};
use smallvec::SmallVec;
@ -512,8 +512,11 @@ pub(crate) fn check_attr_crate_type(
if let ast::MetaItemKind::NameValue(spanned) = a.meta().unwrap().kind {
let span = spanned.span;
let lev_candidate =
find_best_match_for_name(CRATE_TYPES.iter().map(|(k, _)| k), n, None);
let lev_candidate = find_best_match_for_name(
&CRATE_TYPES.iter().map(|(k, _)| *k).collect::<Vec<_>>(),
n,
None,
);
if let Some(candidate) = lev_candidate {
lint_buffer.buffer_lint_with_diagnostic(
lint::builtin::UNKNOWN_CRATE_TYPES,

View File

@ -19,7 +19,6 @@ use self::TargetLint::*;
use crate::levels::LintLevelsBuilder;
use crate::passes::{EarlyLintPassObject, LateLintPassObject};
use rustc_ast as ast;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync;
use rustc_errors::{add_elided_lifetime_in_path_suggestion, struct_span_err, Applicability};
@ -37,6 +36,7 @@ use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::lint::{FutureIncompatibleInfo, Level, Lint, LintBuffer, LintId};
use rustc_session::Session;
use rustc_session::SessionLintStore;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::{symbol::Symbol, MultiSpan, Span, DUMMY_SP};
use rustc_target::abi::LayoutOf;
@ -411,7 +411,7 @@ impl LintStore {
self.by_name.keys().map(|name| Symbol::intern(&name)).collect::<Vec<_>>();
let suggestion = find_best_match_for_name(
symbols.iter(),
&symbols,
Symbol::intern(&lint_name.to_lowercase()),
None,
);

View File

@ -1,7 +1,6 @@
use std::cmp::Reverse;
use std::ptr;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::{self as ast, Path};
use rustc_ast_pretty::pprust;
use rustc_data_structures::fx::FxHashSet;
@ -14,6 +13,7 @@ use rustc_middle::bug;
use rustc_middle::ty::{self, DefIdTree};
use rustc_session::Session;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::SourceMap;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, MultiSpan, Span};
@ -716,7 +716,7 @@ impl<'a> Resolver<'a> {
suggestions.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());
match find_best_match_for_name(
suggestions.iter().map(|suggestion| &suggestion.candidate),
&suggestions.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
ident.name,
None,
) {

View File

@ -10,7 +10,6 @@ use crate::{CrateLint, Module, ModuleOrUniformRoot, ParentScope, PerNS, ScopeSet
use crate::{NameBinding, NameBindingKind, PathResult, PrivacyError, ToNameBinding};
use rustc_ast::unwrap_or;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::NodeId;
use rustc_ast_lowering::ResolverAstLowering;
use rustc_data_structures::fx::FxHashSet;
@ -25,6 +24,7 @@ use rustc_session::lint::builtin::{PUB_USE_OF_PRIVATE_EXTERN_CRATE, UNUSED_IMPOR
use rustc_session::lint::BuiltinLintDiagnostics;
use rustc_session::DiagnosticMessageId;
use rustc_span::hygiene::ExpnId;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, Ident, Symbol};
use rustc_span::{MultiSpan, Span};
@ -1096,33 +1096,37 @@ impl<'a, 'b> ImportResolver<'a, 'b> {
_ => None,
};
let resolutions = resolutions.as_ref().into_iter().flat_map(|r| r.iter());
let names = resolutions.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
if *i == ident {
return None;
} // Never suggest the same name
match *resolution.borrow() {
NameResolution { binding: Some(name_binding), .. } => {
match name_binding.kind {
NameBindingKind::Import { binding, .. } => {
match binding.kind {
// Never suggest the name that has binding error
// i.e., the name that cannot be previously resolved
NameBindingKind::Res(Res::Err, _) => None,
_ => Some(&i.name),
let names = resolutions
.filter_map(|(BindingKey { ident: i, .. }, resolution)| {
if *i == ident {
return None;
} // Never suggest the same name
match *resolution.borrow() {
NameResolution { binding: Some(name_binding), .. } => {
match name_binding.kind {
NameBindingKind::Import { binding, .. } => {
match binding.kind {
// Never suggest the name that has binding error
// i.e., the name that cannot be previously resolved
NameBindingKind::Res(Res::Err, _) => None,
_ => Some(i.name),
}
}
_ => Some(i.name),
}
_ => Some(&i.name),
}
NameResolution { ref single_imports, .. }
if single_imports.is_empty() =>
{
None
}
_ => Some(i.name),
}
NameResolution { ref single_imports, .. } if single_imports.is_empty() => {
None
}
_ => Some(&i.name),
}
});
})
.collect::<Vec<Symbol>>();
let lev_suggestion =
find_best_match_for_name(names, ident.name, None).map(|suggestion| {
find_best_match_for_name(&names, ident.name, None).map(|suggestion| {
(
vec![(ident.span, suggestion.to_string())],
String::from("a similar name exists in the module"),

View File

@ -5,7 +5,6 @@ use crate::path_names_to_string;
use crate::{CrateLint, Module, ModuleKind, ModuleOrUniformRoot};
use crate::{PathResult, PathSource, Segment};
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_ast::visit::FnKind;
use rustc_ast::{self as ast, Expr, ExprKind, Item, ItemKind, NodeId, Path, Ty, TyKind};
use rustc_ast_pretty::pprust::path_segment_to_string;
@ -18,6 +17,7 @@ use rustc_hir::def_id::{DefId, CRATE_DEF_INDEX, LOCAL_CRATE};
use rustc_hir::PrimTy;
use rustc_session::parse::feature_err;
use rustc_span::hygiene::MacroKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_span::{BytePos, MultiSpan, Span, DUMMY_SP};
@ -1206,7 +1206,7 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
names.sort_by_cached_key(|suggestion| suggestion.candidate.as_str());
match find_best_match_for_name(
names.iter().map(|suggestion| &suggestion.candidate),
&names.iter().map(|suggestion| suggestion.candidate).collect::<Vec<Symbol>>(),
name,
None,
) {
@ -1592,9 +1592,10 @@ impl<'a: 'ast, 'ast> LateResolutionVisitor<'a, '_, 'ast> {
.bindings
.iter()
.filter(|(id, _)| id.span.ctxt() == label.span.ctxt())
.map(|(id, _)| &id.name);
.map(|(id, _)| id.name)
.collect::<Vec<Symbol>>();
find_best_match_for_name(names, label.name, None).map(|symbol| {
find_best_match_for_name(&names, label.name, None).map(|symbol| {
// Upon finding a similar name, get the ident that it was from - the span
// contained within helps make a useful diagnostic. In addition, determine
// whether this candidate is within scope.

View File

@ -1,6 +1,4 @@
// FIXME(Centril): Move to rustc_span?
use rustc_span::symbol::Symbol;
use crate::symbol::Symbol;
use std::cmp;
#[cfg(test)]
@ -45,17 +43,14 @@ pub fn lev_distance(a: &str, b: &str) -> usize {
///
/// Besides Levenshtein, we use case insensitive comparison to improve accuracy on an edge case with
/// a lower(upper)case letters mismatch.
pub fn find_best_match_for_name<'a, T>(
iter_names: T,
#[cold]
pub fn find_best_match_for_name(
name_vec: &[Symbol],
lookup: Symbol,
dist: Option<usize>,
) -> Option<Symbol>
where
T: Iterator<Item = &'a Symbol>,
{
) -> Option<Symbol> {
let lookup = &lookup.as_str();
let max_dist = dist.unwrap_or_else(|| cmp::max(lookup.len(), 3) / 3);
let name_vec: Vec<&Symbol> = iter_names.collect();
let (case_insensitive_match, levenshtein_match) = name_vec
.iter()
@ -83,18 +78,18 @@ where
// 2. Levenshtein distance match
// 3. Sorted word match
if let Some(candidate) = case_insensitive_match {
Some(*candidate)
Some(candidate)
} else if levenshtein_match.is_some() {
levenshtein_match.map(|(candidate, _)| *candidate)
levenshtein_match.map(|(candidate, _)| candidate)
} else {
find_match_by_sorted_words(name_vec, lookup)
}
}
fn find_match_by_sorted_words<'a>(iter_names: Vec<&'a Symbol>, lookup: &str) -> Option<Symbol> {
fn find_match_by_sorted_words(iter_names: &[Symbol], lookup: &str) -> Option<Symbol> {
iter_names.iter().fold(None, |result, candidate| {
if sort_by_words(&candidate.as_str()) == sort_by_words(lookup) {
Some(**candidate)
Some(*candidate)
} else {
result
}

View File

@ -21,38 +21,35 @@ fn test_lev_distance() {
#[test]
fn test_find_best_match_for_name() {
use rustc_span::with_default_session_globals;
use crate::with_default_session_globals;
with_default_session_globals(|| {
let input = vec![Symbol::intern("aaab"), Symbol::intern("aaabc")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None),
find_best_match_for_name(&input, Symbol::intern("aaaa"), None),
Some(Symbol::intern("aaab"))
);
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("1111111111"), None),
None
);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("1111111111"), None), None);
let input = vec![Symbol::intern("aAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("AAAA"), None),
find_best_match_for_name(&input, Symbol::intern("AAAA"), None),
Some(Symbol::intern("aAAA"))
);
let input = vec![Symbol::intern("AAAA")];
// Returns None because `lev_distance > max_dist / 3`
assert_eq!(find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), None), None);
assert_eq!(find_best_match_for_name(&input, Symbol::intern("aaaa"), None), None);
let input = vec![Symbol::intern("AAAA")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("aaaa"), Some(4)),
find_best_match_for_name(&input, Symbol::intern("aaaa"), Some(4)),
Some(Symbol::intern("AAAA"))
);
let input = vec![Symbol::intern("a_longer_variable_name")];
assert_eq!(
find_best_match_for_name(input.iter(), Symbol::intern("a_variable_longer_name"), None),
find_best_match_for_name(&input, Symbol::intern("a_variable_longer_name"), None),
Some(Symbol::intern("a_longer_variable_name"))
);
})

View File

@ -34,6 +34,7 @@ use hygiene::Transparency;
pub use hygiene::{DesugaringKind, ExpnData, ExpnId, ExpnKind, ForLoopLoc, MacroKind};
pub mod def_id;
use def_id::{CrateNum, DefId, LOCAL_CRATE};
pub mod lev_distance;
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};

View File

@ -1,11 +1,11 @@
use crate::astconv::AstConv;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_errors::{pluralize, struct_span_err, Applicability};
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_middle::ty;
use rustc_session::parse::feature_err;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{sym, Ident};
use rustc_span::{Span, DUMMY_SP};
@ -180,7 +180,7 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {
.collect();
if let (Some(suggested_name), true) = (
find_best_match_for_name(all_candidate_names.iter(), assoc_name.name, None),
find_best_match_for_name(&all_candidate_names, assoc_name.name, None),
assoc_name.span != DUMMY_SP,
) {
err.span_suggestion(

View File

@ -13,7 +13,6 @@ use crate::errors::{
};
use crate::middle::resolve_lifetime as rl;
use crate::require_c_abi_if_c_variadic;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::{struct_span_err, Applicability, ErrorReported, FatalError};
use rustc_hir as hir;
@ -26,6 +25,7 @@ use rustc_middle::ty::subst::{self, InternalSubsts, Subst, SubstsRef};
use rustc_middle::ty::GenericParamDefKind;
use rustc_middle::ty::{self, Const, DefIdTree, Ty, TyCtxt, TypeFoldable};
use rustc_session::lint::builtin::AMBIGUOUS_ASSOCIATED_ITEMS;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::symbol::{Ident, Symbol};
use rustc_span::{Span, DUMMY_SP};
use rustc_target::spec::abi;
@ -1579,7 +1579,11 @@ impl<'o, 'tcx> dyn AstConv<'tcx> + 'o {
let adt_def = qself_ty.ty_adt_def().expect("enum is not an ADT");
if let Some(suggested_name) = find_best_match_for_name(
adt_def.variants.iter().map(|variant| &variant.ident.name),
&adt_def
.variants
.iter()
.map(|variant| variant.ident.name)
.collect::<Vec<Symbol>>(),
assoc_ident.name,
None,
) {

View File

@ -22,7 +22,6 @@ use crate::type_error_struct;
use crate::errors::{AddressOfTemporaryTaken, ReturnStmtOutsideOfFnBody, StructExprNonExhaustive};
use rustc_ast as ast;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stack::ensure_sufficient_stack;
use rustc_errors::ErrorReported;
@ -40,6 +39,7 @@ use rustc_middle::ty::Ty;
use rustc_middle::ty::TypeFoldable;
use rustc_middle::ty::{AdtKind, Visibility};
use rustc_span::hygiene::DesugaringKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::Span;
use rustc_span::symbol::{kw, sym, Ident, Symbol};
use rustc_trait_selection::traits::{self, ObligationCauseCode};
@ -1441,18 +1441,22 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
field: Symbol,
skip: Vec<Symbol>,
) -> Option<Symbol> {
let names = variant.fields.iter().filter_map(|field| {
// ignore already set fields and private fields from non-local crates
if skip.iter().any(|&x| x == field.ident.name)
|| (!variant.def_id.is_local() && field.vis != Visibility::Public)
{
None
} else {
Some(&field.ident.name)
}
});
let names = variant
.fields
.iter()
.filter_map(|field| {
// ignore already set fields and private fields from non-local crates
if skip.iter().any(|&x| x == field.ident.name)
|| (!variant.def_id.is_local() && field.vis != Visibility::Public)
{
None
} else {
Some(field.ident.name)
}
})
.collect::<Vec<Symbol>>();
find_best_match_for_name(names, field, None)
find_best_match_for_name(&names, field, None)
}
fn available_field_names(&self, variant: &'tcx ty::VariantDef) -> Vec<Symbol> {

View File

@ -9,7 +9,6 @@ use crate::hir::def::DefKind;
use crate::hir::def_id::DefId;
use rustc_ast as ast;
use rustc_ast::util::lev_distance::{find_best_match_for_name, lev_distance};
use rustc_data_structures::fx::FxHashSet;
use rustc_data_structures::sync::Lrc;
use rustc_hir as hir;
@ -27,6 +26,7 @@ use rustc_middle::ty::{
};
use rustc_session::lint;
use rustc_span::def_id::LocalDefId;
use rustc_span::lev_distance::{find_best_match_for_name, lev_distance};
use rustc_span::{symbol::Ident, Span, Symbol, DUMMY_SP};
use rustc_trait_selection::autoderef::{self, Autoderef};
use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt;
@ -1538,8 +1538,11 @@ impl<'a, 'tcx> ProbeContext<'a, 'tcx> {
Ok(None)
} else {
let best_name = {
let names = applicable_close_candidates.iter().map(|cand| &cand.ident.name);
find_best_match_for_name(names, self.method_name.unwrap().name, None)
let names = applicable_close_candidates
.iter()
.map(|cand| cand.ident.name)
.collect::<Vec<Symbol>>();
find_best_match_for_name(&names, self.method_name.unwrap().name, None)
}
.unwrap();
Ok(applicable_close_candidates

View File

@ -2,7 +2,6 @@
//! found or is otherwise invalid.
use crate::check::FnCtxt;
use rustc_ast::util::lev_distance;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::{pluralize, struct_span_err, Applicability, DiagnosticBuilder};
use rustc_hir as hir;
@ -17,6 +16,7 @@ use rustc_middle::ty::print::with_crate_prefix;
use rustc_middle::ty::{
self, ToPolyTraitRef, ToPredicate, Ty, TyCtxt, TypeFoldable, WithConstness,
};
use rustc_span::lev_distance;
use rustc_span::symbol::{kw, sym, Ident};
use rustc_span::{source_map, FileName, Span};
use rustc_trait_selection::traits::query::evaluate_obligation::InferCtxtExt;
@ -744,7 +744,7 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
if actual.is_enum() {
let adt_def = actual.ty_adt_def().expect("enum is not an ADT");
if let Some(suggestion) = lev_distance::find_best_match_for_name(
adt_def.variants.iter().map(|s| &s.ident.name),
&adt_def.variants.iter().map(|s| s.ident.name).collect::<Vec<_>>(),
item_name.name,
None,
) {

View File

@ -1,7 +1,6 @@
use crate::check::FnCtxt;
use rustc_ast as ast;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_data_structures::fx::FxHashMap;
use rustc_errors::{pluralize, struct_span_err, Applicability, DiagnosticBuilder};
use rustc_hir as hir;
@ -13,6 +12,7 @@ use rustc_infer::infer::type_variable::{TypeVariableOrigin, TypeVariableOriginKi
use rustc_middle::ty::subst::GenericArg;
use rustc_middle::ty::{self, Adt, BindingMode, Ty, TypeFoldable};
use rustc_span::hygiene::DesugaringKind;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_span::source_map::{Span, Spanned};
use rustc_span::symbol::Ident;
use rustc_trait_selection::traits::{ObligationCause, Pattern};
@ -1302,8 +1302,9 @@ impl<'a, 'tcx> FnCtxt<'a, 'tcx> {
),
);
if plural == "" {
let input = unmentioned_fields.iter().map(|(_, field)| &field.name);
let suggested_name = find_best_match_for_name(input, ident.name, None);
let input =
unmentioned_fields.iter().map(|(_, field)| field.name).collect::<Vec<_>>();
let suggested_name = find_best_match_for_name(&input, ident.name, None);
if let Some(suggested_name) = suggested_name {
err.span_suggestion(
ident.span,

View File

@ -5,7 +5,7 @@ use crate::utils::{
span_lint_and_sugg, span_lint_and_then, without_block_comments,
};
use if_chain::if_chain;
use rustc_ast::util::lev_distance::find_best_match_for_name;
use rustc_span::lev_distance::find_best_match_for_name;
use rustc_ast::{AttrKind, AttrStyle, Attribute, Lit, LitKind, MetaItemKind, NestedMetaItem};
use rustc_errors::Applicability;
use rustc_hir::{
@ -427,7 +427,7 @@ fn check_clippy_lint_names(cx: &LateContext<'_>, ident: &str, items: &[NestedMet
.map(|l| Symbol::intern(&l.name_lower()))
.collect::<Vec<_>>();
let sugg = find_best_match_for_name(
symbols.iter(),
&symbols,
Symbol::intern(&format!("clippy::{}", name_lower)),
None,
);