Allow drivers to supply a list of extra symbols to intern

This commit is contained in:
Alex Macleod 2025-03-19 02:19:33 +00:00
parent 87e60a7d28
commit f740326216
23 changed files with 115 additions and 58 deletions

View File

@ -259,6 +259,7 @@ pub fn run_compiler(at_args: &[String], callbacks: &mut (dyn Callbacks + Send))
hash_untracked_state: None,
register_lints: None,
override_queries: None,
extra_symbols: Vec::new(),
make_codegen_backend: None,
registry: diagnostics_registry(),
using_internal_features: &USING_INTERNAL_FEATURES,

View File

@ -17,7 +17,7 @@ fn def_path_hash_depends_on_crate_id() {
// the crate by changing the crate disambiguator (e.g. via bumping the
// crate's version number).
create_session_globals_then(Edition::Edition2024, None, || {
create_session_globals_then(Edition::Edition2024, &[], None, || {
let id0 = StableCrateId::new(Symbol::intern("foo"), false, vec!["1".to_string()], "");
let id1 = StableCrateId::new(Symbol::intern("foo"), false, vec!["2".to_string()], "");

View File

@ -340,6 +340,10 @@ pub struct Config {
/// the list of queries.
pub override_queries: Option<fn(&Session, &mut Providers)>,
/// An extra set of symbols to add to the symbol interner, the symbol indices
/// will start at [`PREDEFINED_SYMBOLS_COUNT`](rustc_span::symbol::PREDEFINED_SYMBOLS_COUNT)
pub extra_symbols: Vec<&'static str>,
/// This is a callback from the driver that is called to create a codegen backend.
///
/// Has no uses within this repository, but is used by bjorn3 for "the
@ -401,6 +405,7 @@ pub fn run_compiler<R: Send>(config: Config, f: impl FnOnce(&Compiler) -> R + Se
&early_dcx,
config.opts.edition,
config.opts.unstable_opts.threads,
&config.extra_symbols,
SourceMapInputs { file_loader, path_mapping, hash_kind, checksum_hash_kind },
|current_gcx| {
// The previous `early_dcx` can't be reused here because it doesn't

View File

@ -53,7 +53,7 @@ where
checksum_hash_kind,
});
rustc_span::create_session_globals_then(DEFAULT_EDITION, sm_inputs, || {
rustc_span::create_session_globals_then(DEFAULT_EDITION, &[], sm_inputs, || {
let temps_dir = sessopts.unstable_opts.temps_dir.as_deref().map(PathBuf::from);
let io = CompilerIO {
input: Input::Str { name: FileName::Custom(String::new()), input: String::new() },

View File

@ -117,6 +117,7 @@ fn run_in_thread_with_globals<F: FnOnce(CurrentGcx) -> R + Send, R: Send>(
thread_stack_size: usize,
edition: Edition,
sm_inputs: SourceMapInputs,
extra_symbols: &[&'static str],
f: F,
) -> R {
// The "thread pool" is a single spawned thread in the non-parallel
@ -134,9 +135,12 @@ fn run_in_thread_with_globals<F: FnOnce(CurrentGcx) -> R + Send, R: Send>(
// name contains null bytes.
let r = builder
.spawn_scoped(s, move || {
rustc_span::create_session_globals_then(edition, Some(sm_inputs), || {
f(CurrentGcx::new())
})
rustc_span::create_session_globals_then(
edition,
extra_symbols,
Some(sm_inputs),
|| f(CurrentGcx::new()),
)
})
.unwrap()
.join();
@ -152,6 +156,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
thread_builder_diag: &EarlyDiagCtxt,
edition: Edition,
threads: usize,
extra_symbols: &[&'static str],
sm_inputs: SourceMapInputs,
f: F,
) -> R {
@ -168,12 +173,18 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
let registry = sync::Registry::new(std::num::NonZero::new(threads).unwrap());
if !sync::is_dyn_thread_safe() {
return run_in_thread_with_globals(thread_stack_size, edition, sm_inputs, |current_gcx| {
// Register the thread for use with the `WorkerLocal` type.
registry.register();
return run_in_thread_with_globals(
thread_stack_size,
edition,
sm_inputs,
extra_symbols,
|current_gcx| {
// Register the thread for use with the `WorkerLocal` type.
registry.register();
f(current_gcx)
});
f(current_gcx)
},
);
}
let current_gcx = FromDyn::from(CurrentGcx::new());
@ -217,7 +228,7 @@ pub(crate) fn run_in_thread_pool_with_globals<F: FnOnce(CurrentGcx) -> R + Send,
// pool. Upon creation, each worker thread created gets a copy of the
// session globals in TLS. This is possible because `SessionGlobals` impls
// `Send` in the parallel compiler.
rustc_span::create_session_globals_then(edition, Some(sm_inputs), || {
rustc_span::create_session_globals_then(edition, extra_symbols, Some(sm_inputs), || {
rustc_span::with_session_globals(|session_globals| {
let session_globals = FromDyn::from(session_globals);
builder

View File

@ -142,13 +142,13 @@ pub(super) fn symbols(input: TokenStream) -> TokenStream {
output
}
struct Preinterned {
struct Predefined {
idx: u32,
span_of_name: Span,
}
struct Entries {
map: HashMap<String, Preinterned>,
map: HashMap<String, Predefined>,
}
impl Entries {
@ -163,7 +163,7 @@ impl Entries {
prev.idx
} else {
let idx = self.len();
self.map.insert(s.to_string(), Preinterned { idx, span_of_name: span });
self.map.insert(s.to_string(), Predefined { idx, span_of_name: span });
idx
}
}
@ -295,10 +295,14 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
}
let symbol_digits_base = entries.map["0"].idx;
let preinterned_symbols_count = entries.len();
let predefined_symbols_count = entries.len();
let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #symbol_digits_base;
const PREINTERNED_SYMBOLS_COUNT: u32 = #preinterned_symbols_count;
/// The number of predefined symbols; this is the the first index for
/// extra pre-interned symbols in an Interner created via
/// [`Interner::with_extra_symbols`].
pub const PREDEFINED_SYMBOLS_COUNT: u32 = #predefined_symbols_count;
#[doc(hidden)]
#[allow(non_upper_case_globals)]
@ -315,10 +319,13 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
}
impl Interner {
pub(crate) fn fresh() -> Self {
Interner::prefill(&[
#prefill_stream
])
/// Creates an `Interner` with the predefined symbols from the `symbols!` macro and
/// any extra symbols provided by external drivers such as Clippy
pub(crate) fn with_extra_symbols(extra_symbols: &[&'static str]) -> Self {
Interner::prefill(
&[#prefill_stream],
extra_symbols,
)
}
}
};

View File

@ -562,9 +562,9 @@ impl<'a, 'tcx> SpanDecoder for DecodeContext<'a, 'tcx> {
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new_from_decoded(symbol_index)
Symbol::new(symbol_index)
}
_ => unreachable!(),
}

View File

@ -201,9 +201,9 @@ impl<'a, 'tcx> SpanEncoder for EncodeContext<'a, 'tcx> {
}
fn encode_symbol(&mut self, symbol: Symbol) {
// if symbol preinterned, emit tag and symbol index
if symbol.is_preinterned() {
self.opaque.emit_u8(SYMBOL_PREINTERNED);
// if symbol predefined, emit tag and symbol index
if symbol.is_predefined() {
self.opaque.emit_u8(SYMBOL_PREDEFINED);
self.opaque.emit_u32(symbol.as_u32());
} else {
// otherwise write it as string or as offset to it

View File

@ -576,7 +576,7 @@ impl SpanTag {
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
const SYMBOL_PREDEFINED: u8 = 2;
pub fn provide(providers: &mut Providers) {
encoder::provide(providers);

View File

@ -45,7 +45,7 @@ const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
const SYMBOL_PREDEFINED: u8 = 2;
/// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results
@ -673,9 +673,9 @@ impl<'a, 'tcx> SpanDecoder for CacheDecoder<'a, 'tcx> {
Symbol::intern(s)
})
}
SYMBOL_PREINTERNED => {
SYMBOL_PREDEFINED => {
let symbol_index = self.read_u32();
Symbol::new_from_decoded(symbol_index)
Symbol::new(symbol_index)
}
_ => unreachable!(),
}
@ -891,9 +891,9 @@ impl<'a, 'tcx> SpanEncoder for CacheEncoder<'a, 'tcx> {
// copy&paste impl from rustc_metadata
fn encode_symbol(&mut self, symbol: Symbol) {
// if symbol preinterned, emit tag and symbol index
if symbol.is_preinterned() {
self.encoder.emit_u8(SYMBOL_PREINTERNED);
// if symbol predefined, emit tag and symbol index
if symbol.is_predefined() {
self.encoder.emit_u8(SYMBOL_PREDEFINED);
self.encoder.emit_u32(symbol.as_u32());
} else {
// otherwise write it as string or as offset to it

View File

@ -116,9 +116,13 @@ pub struct SessionGlobals {
}
impl SessionGlobals {
pub fn new(edition: Edition, sm_inputs: Option<SourceMapInputs>) -> SessionGlobals {
pub fn new(
edition: Edition,
extra_symbols: &[&'static str],
sm_inputs: Option<SourceMapInputs>,
) -> SessionGlobals {
SessionGlobals {
symbol_interner: symbol::Interner::fresh(),
symbol_interner: symbol::Interner::with_extra_symbols(extra_symbols),
span_interner: Lock::new(span_encoding::SpanInterner::default()),
metavar_spans: Default::default(),
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
@ -129,6 +133,7 @@ impl SessionGlobals {
pub fn create_session_globals_then<R>(
edition: Edition,
extra_symbols: &[&'static str],
sm_inputs: Option<SourceMapInputs>,
f: impl FnOnce() -> R,
) -> R {
@ -137,7 +142,7 @@ pub fn create_session_globals_then<R>(
"SESSION_GLOBALS should never be overwritten! \
Use another thread if you need another SessionGlobals"
);
let session_globals = SessionGlobals::new(edition, sm_inputs);
let session_globals = SessionGlobals::new(edition, extra_symbols, sm_inputs);
SESSION_GLOBALS.set(&session_globals, f)
}
@ -156,7 +161,7 @@ where
F: FnOnce(&SessionGlobals) -> R,
{
if !SESSION_GLOBALS.is_set() {
let session_globals = SessionGlobals::new(edition, None);
let session_globals = SessionGlobals::new(edition, &[], None);
SESSION_GLOBALS.set(&session_globals, || SESSION_GLOBALS.with(f))
} else {
SESSION_GLOBALS.with(f)
@ -172,7 +177,7 @@ where
/// Default edition, no source map.
pub fn create_default_session_globals_then<R>(f: impl FnOnce() -> R) -> R {
create_session_globals_then(edition::DEFAULT_EDITION, None, f)
create_session_globals_then(edition::DEFAULT_EDITION, &[], None, f)
}
// If this ever becomes non thread-local, `decode_syntax_context`

View File

@ -2506,15 +2506,10 @@ rustc_index::newtype_index! {
}
impl Symbol {
const fn new(n: u32) -> Self {
pub const fn new(n: u32) -> Self {
Symbol(SymbolIndex::from_u32(n))
}
/// for use in Decoder only
pub fn new_from_decoded(n: u32) -> Self {
Self::new(n)
}
/// Maps a string to its interned representation.
#[rustc_diagnostic_item = "SymbolIntern"]
pub fn intern(string: &str) -> Self {
@ -2600,11 +2595,14 @@ struct InternerInner {
}
impl Interner {
fn prefill(init: &[&'static str]) -> Self {
Interner(Lock::new(InternerInner {
arena: Default::default(),
strings: init.iter().copied().collect(),
}))
fn prefill(init: &[&'static str], extra: &[&'static str]) -> Self {
let strings = FxIndexSet::from_iter(init.iter().copied().chain(extra.iter().copied()));
assert_eq!(
strings.len(),
init.len() + extra.len(),
"`init` or `extra` contain duplicate symbols",
);
Interner(Lock::new(InternerInner { arena: Default::default(), strings }))
}
#[inline]
@ -2729,9 +2727,9 @@ impl Symbol {
self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword()
}
/// Is this symbol was interned in compiler's `symbols!` macro
pub fn is_preinterned(self) -> bool {
self.as_u32() < PREINTERNED_SYMBOLS_COUNT
/// Was this symbol predefined in the compiler's `symbols!` macro
pub fn is_predefined(self) -> bool {
self.as_u32() < PREDEFINED_SYMBOLS_COUNT
}
}

View File

@ -3,7 +3,7 @@ use crate::create_default_session_globals_then;
#[test]
fn interner_tests() {
let i = Interner::prefill(&[]);
let i = Interner::prefill(&[], &[]);
// first one is zero:
assert_eq!(i.intern("dog"), Symbol::new(0));
// re-use gets the same entry:

View File

@ -321,6 +321,7 @@ pub(crate) fn create_config(
(rustc_interface::DEFAULT_QUERY_PROVIDERS.typeck)(tcx, def_id)
};
}),
extra_symbols: Vec::new(),
make_codegen_backend: None,
registry: rustc_driver::diagnostics_registry(),
ice_file: None,

View File

@ -191,6 +191,7 @@ pub(crate) fn run(dcx: DiagCtxtHandle<'_>, input: Input, options: RustdocOptions
hash_untracked_state: None,
register_lints: Some(Box::new(crate::lint::register_lints)),
override_queries: None,
extra_symbols: Vec::new(),
make_codegen_backend: None,
registry: rustc_driver::diagnostics_registry(),
ice_file: None,

View File

@ -25,6 +25,7 @@ path = "src/driver.rs"
[dependencies]
clippy_config = { path = "clippy_config" }
clippy_lints = { path = "clippy_lints" }
clippy_utils = { path = "clippy_utils" }
rustc_tools_util = { path = "rustc_tools_util", version = "0.4.2" }
tempfile = { version = "3.3", optional = true }
termize = "0.1"

View File

@ -1,10 +1,10 @@
use super::{Attribute, DEPRECATED_CFG_ATTR, DEPRECATED_CLIPPY_CFG_ATTR, unnecessary_clippy_cfg};
use clippy_utils::diagnostics::span_lint_and_sugg;
use clippy_utils::msrvs::{self, MsrvStack};
use clippy_utils::sym;
use rustc_ast::AttrStyle;
use rustc_errors::Applicability;
use rustc_lint::EarlyContext;
use rustc_span::sym;
pub(super) fn check(cx: &EarlyContext<'_>, attr: &Attribute, msrv: &MsrvStack) {
// check cfg_attr
@ -18,7 +18,7 @@ pub(super) fn check(cx: &EarlyContext<'_>, attr: &Attribute, msrv: &MsrvStack) {
&& msrv.meets(msrvs::TOOL_ATTRIBUTES)
// check for `rustfmt_skip` and `rustfmt::skip`
&& let Some(skip_item) = &items[1].meta_item()
&& (skip_item.has_name(sym!(rustfmt_skip))
&& (skip_item.has_name(sym::rustfmt_skip)
|| skip_item
.path
.segments

View File

@ -2,10 +2,10 @@ use super::USELESS_ATTRIBUTE;
use super::utils::{is_lint_level, is_word, namespace_and_lint};
use clippy_utils::diagnostics::span_lint_and_then;
use clippy_utils::source::{SpanRangeExt, first_line_of_span};
use clippy_utils::sym;
use rustc_ast::{Attribute, Item, ItemKind};
use rustc_errors::Applicability;
use rustc_lint::{EarlyContext, LintContext};
use rustc_span::sym;
pub(super) fn check(cx: &EarlyContext<'_>, item: &Item, attrs: &[Attribute]) {
let skip_unused_imports = attrs.iter().any(|attr| attr.has_name(sym::macro_use));
@ -61,7 +61,7 @@ pub(super) fn check(cx: &EarlyContext<'_>, item: &Item, attrs: &[Attribute]) {
if is_word(lint, sym::unused_imports) && skip_unused_imports {
return;
}
if is_word(lint, sym!(unused_extern_crates)) {
if is_word(lint, sym::unused_extern_crates) {
return;
}
},

View File

@ -38,7 +38,7 @@ pub fn check(
// of all `#[test]` attributes in not ignored code examples
fn check_code_sample(code: String, edition: Edition, ignore: bool) -> (bool, Vec<Range<usize>>) {
rustc_driver::catch_fatal_errors(|| {
rustc_span::create_session_globals_then(edition, None, || {
rustc_span::create_session_globals_then(edition, &[], None, || {
let mut test_attr_spans = vec![];
let filename = FileName::anon_source_code(&code);

View File

@ -3,6 +3,7 @@
#![feature(f128)]
#![feature(f16)]
#![feature(if_let_guard)]
#![feature(macro_metavar_expr)]
#![feature(macro_metavar_expr_concat)]
#![feature(let_chains)]
#![feature(never_type)]
@ -74,6 +75,7 @@ pub mod qualify_min_const_fn;
pub mod source;
pub mod str_utils;
pub mod sugg;
pub mod sym;
pub mod ty;
pub mod usage;
pub mod visitors;
@ -125,7 +127,7 @@ use rustc_middle::ty::{
use rustc_span::hygiene::{ExpnKind, MacroKind};
use rustc_span::source_map::SourceMap;
use rustc_span::symbol::{Ident, Symbol, kw};
use rustc_span::{InnerSpan, Span, sym};
use rustc_span::{InnerSpan, Span};
use visitors::{Visitable, for_each_unconsumed_temporary};
use crate::consts::{ConstEvalCtxt, Constant, mir_to_const};

View File

@ -0,0 +1,23 @@
#![allow(non_upper_case_globals)]
use rustc_span::symbol::{Symbol, PREDEFINED_SYMBOLS_COUNT};
pub use rustc_span::sym::*;
macro_rules! generate {
($($sym:ident,)*) => {
/// To be supplied to `rustc_interface::Config`
pub const EXTRA_SYMBOLS: &[&str] = &[
$(stringify!($sym),)*
];
$(
pub const $sym: Symbol = Symbol::new(PREDEFINED_SYMBOLS_COUNT + ${index()});
)*
};
}
generate! {
rustfmt_skip,
unused_extern_crates,
}

View File

@ -160,6 +160,7 @@ impl rustc_driver::Callbacks for ClippyCallbacks {
clippy_lints::register_lints(lint_store, conf);
clippy_lints::register_pre_expansion_lints(lint_store, conf);
}));
config.extra_symbols = clippy_utils::sym::EXTRA_SYMBOLS.into();
// FIXME: #4825; This is required, because Clippy lints that are based on MIR have to be
// run on the unoptimized MIR. On the other hand this results in some false negatives. If

View File

@ -70,6 +70,7 @@ fn compile(code: String, output: PathBuf, sysroot: PathBuf, linker: Option<&Path
hash_untracked_state: None,
register_lints: None,
override_queries: None,
extra_symbols: Vec::new(),
make_codegen_backend: None,
registry: rustc_driver::diagnostics_registry(),
using_internal_features: &rustc_driver::USING_INTERNAL_FEATURES,