From 0016356f2b8b9c5e5dc99204175957b30470fb7f Mon Sep 17 00:00:00 2001 From: klensy Date: Sat, 20 Aug 2022 15:13:41 +0300 Subject: [PATCH 1/2] symbols: add `is_preinterned` fn to check if symbol was preinterned in compiler --- compiler/rustc_macros/src/symbols.rs | 2 +- compiler/rustc_span/src/symbol.rs | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/compiler/rustc_macros/src/symbols.rs b/compiler/rustc_macros/src/symbols.rs index 1b245f2a750..92590c33b9d 100644 --- a/compiler/rustc_macros/src/symbols.rs +++ b/compiler/rustc_macros/src/symbols.rs @@ -195,10 +195,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec) { #n, }); } - let _ = counter; // for future use let output = quote! { const SYMBOL_DIGITS_BASE: u32 = #digits_base; + const PREINTERNED_SYMBOLS_COUNT: u32 = #counter; #[doc(hidden)] #[allow(non_upper_case_globals)] diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 156f53ac486..70e78682c65 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -2027,6 +2027,11 @@ impl Symbol { pub fn can_be_raw(self) -> bool { self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword() } + + /// Is this symbol was interned in compiler's `symbols!` macro + pub fn is_preinterned(self) -> bool { + self.as_u32() < PREINTERNED_SYMBOLS_COUNT + } } impl Ident { From f6329485a83c1d241635e0dedbf62929e193b10a Mon Sep 17 00:00:00 2001 From: klensy Date: Sat, 20 Aug 2022 15:39:21 +0300 Subject: [PATCH 2/2] rmeta/query cache: don't write string values of preinterned symbols --- compiler/rustc_metadata/src/rmeta/decoder.rs | 4 +++ compiler/rustc_metadata/src/rmeta/encoder.rs | 29 ++++++++++------ compiler/rustc_metadata/src/rmeta/mod.rs | 1 + .../rustc_query_impl/src/on_disk_cache.rs | 34 +++++++++++++------ compiler/rustc_span/src/symbol.rs | 5 +++ 5 files changed, 51 insertions(+), 22 deletions(-) diff --git a/compiler/rustc_metadata/src/rmeta/decoder.rs b/compiler/rustc_metadata/src/rmeta/decoder.rs index 889001d0a84..d0e0aa91480 100644 --- a/compiler/rustc_metadata/src/rmeta/decoder.rs +++ b/compiler/rustc_metadata/src/rmeta/decoder.rs @@ -631,6 +631,10 @@ impl<'a, 'tcx> Decodable> for Symbol { sym } + SYMBOL_PREINTERNED => { + let symbol_index = d.read_u32(); + Symbol::new_from_decoded(symbol_index) + } _ => unreachable!(), } } diff --git a/compiler/rustc_metadata/src/rmeta/encoder.rs b/compiler/rustc_metadata/src/rmeta/encoder.rs index 0d3a3efb0d3..cd5da40150d 100644 --- a/compiler/rustc_metadata/src/rmeta/encoder.rs +++ b/compiler/rustc_metadata/src/rmeta/encoder.rs @@ -317,17 +317,24 @@ impl<'a, 'tcx> Encodable> for Span { impl<'a, 'tcx> Encodable> for Symbol { fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) { - match s.symbol_table.entry(*self) { - Entry::Vacant(o) => { - s.opaque.emit_u8(SYMBOL_STR); - let pos = s.opaque.position(); - o.insert(pos); - s.emit_str(self.as_str()); - } - Entry::Occupied(o) => { - let x = o.get().clone(); - s.emit_u8(SYMBOL_OFFSET); - s.emit_usize(x); + // if symbol preinterned, emit tag and symbol index + if self.is_preinterned() { + s.opaque.emit_u8(SYMBOL_PREINTERNED); + s.opaque.emit_u32(self.as_u32()); + } else { + // otherwise write it as string or as offset to it + match s.symbol_table.entry(*self) { + Entry::Vacant(o) => { + s.opaque.emit_u8(SYMBOL_STR); + let pos = s.opaque.position(); + o.insert(pos); + s.emit_str(self.as_str()); + } + Entry::Occupied(o) => { + let x = o.get().clone(); + s.emit_u8(SYMBOL_OFFSET); + s.emit_usize(x); + } } } } diff --git a/compiler/rustc_metadata/src/rmeta/mod.rs b/compiler/rustc_metadata/src/rmeta/mod.rs index 91d744879fd..e6cceaf29d5 100644 --- a/compiler/rustc_metadata/src/rmeta/mod.rs +++ b/compiler/rustc_metadata/src/rmeta/mod.rs @@ -448,6 +448,7 @@ const TAG_PARTIAL_SPAN: u8 = 2; // Tags for encoding Symbol's const SYMBOL_STR: u8 = 0; const SYMBOL_OFFSET: u8 = 1; +const SYMBOL_PREINTERNED: u8 = 2; pub fn provide(providers: &mut Providers) { encoder::provide(providers); diff --git a/compiler/rustc_query_impl/src/on_disk_cache.rs b/compiler/rustc_query_impl/src/on_disk_cache.rs index 6711dd3d5c5..5ef95911f56 100644 --- a/compiler/rustc_query_impl/src/on_disk_cache.rs +++ b/compiler/rustc_query_impl/src/on_disk_cache.rs @@ -42,6 +42,7 @@ const TAG_EXPN_DATA: u8 = 1; // Tags for encoding Symbol's const SYMBOL_STR: u8 = 0; const SYMBOL_OFFSET: u8 = 1; +const SYMBOL_PREINTERNED: u8 = 2; /// Provides an interface to incremental compilation data cached from the /// previous compilation session. This data will eventually include the results @@ -745,6 +746,10 @@ impl<'a, 'tcx> Decodable> for Symbol { sym } + SYMBOL_PREINTERNED => { + let symbol_index = d.read_u32(); + Symbol::new_from_decoded(symbol_index) + } _ => unreachable!(), } } @@ -939,17 +944,24 @@ impl<'a, 'tcx> Encodable> for Span { // copy&paste impl from rustc_metadata impl<'a, 'tcx> Encodable> for Symbol { fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) { - match s.symbol_table.entry(*self) { - Entry::Vacant(o) => { - s.encoder.emit_u8(SYMBOL_STR); - let pos = s.encoder.position(); - o.insert(pos); - s.emit_str(self.as_str()); - } - Entry::Occupied(o) => { - let x = o.get().clone(); - s.emit_u8(SYMBOL_OFFSET); - s.emit_usize(x); + // if symbol preinterned, emit tag and symbol index + if self.is_preinterned() { + s.encoder.emit_u8(SYMBOL_PREINTERNED); + s.encoder.emit_u32(self.as_u32()); + } else { + // otherwise write it as string or as offset to it + match s.symbol_table.entry(*self) { + Entry::Vacant(o) => { + s.encoder.emit_u8(SYMBOL_STR); + let pos = s.encoder.position(); + o.insert(pos); + s.emit_str(self.as_str()); + } + Entry::Occupied(o) => { + let x = o.get().clone(); + s.emit_u8(SYMBOL_OFFSET); + s.emit_usize(x); + } } } } diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs index 70e78682c65..ac166e09843 100644 --- a/compiler/rustc_span/src/symbol.rs +++ b/compiler/rustc_span/src/symbol.rs @@ -1803,6 +1803,11 @@ impl Symbol { Symbol(SymbolIndex::from_u32(n)) } + /// for use in Decoder only + pub fn new_from_decoded(n: u32) -> Self { + Self::new(n) + } + /// Maps a string to its interned representation. pub fn intern(string: &str) -> Self { with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))