Auto merge of #100803 - klensy:do-not-encode-preinterned-symbols, r=bjorn3

Symbols: do not write string values of preinterned symbols into compiled artifacts

r? `@bjorn3`

Followup for #98851

https://github.com/rust-lang/rust/pull/98851#issuecomment-1215606291
This commit is contained in:
bors 2022-08-24 16:56:32 +00:00
commit ebfc7aa531
6 changed files with 57 additions and 23 deletions

View File

@ -195,10 +195,10 @@ fn symbols_with_errors(input: TokenStream) -> (TokenStream, Vec<syn::Error>) {
#n,
});
}
let _ = counter; // for future use
let output = quote! {
const SYMBOL_DIGITS_BASE: u32 = #digits_base;
const PREINTERNED_SYMBOLS_COUNT: u32 = #counter;
#[doc(hidden)]
#[allow(non_upper_case_globals)]

View File

@ -631,6 +631,10 @@ impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for Symbol {
sym
}
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Symbol::new_from_decoded(symbol_index)
}
_ => unreachable!(),
}
}

View File

@ -317,17 +317,24 @@ impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Span {
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut EncodeContext<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.opaque.emit_u8(SYMBOL_STR);
let pos = s.opaque.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
// if symbol preinterned, emit tag and symbol index
if self.is_preinterned() {
s.opaque.emit_u8(SYMBOL_PREINTERNED);
s.opaque.emit_u32(self.as_u32());
} else {
// otherwise write it as string or as offset to it
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.opaque.emit_u8(SYMBOL_STR);
let pos = s.opaque.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}

View File

@ -448,6 +448,7 @@ const TAG_PARTIAL_SPAN: u8 = 2;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
pub fn provide(providers: &mut Providers) {
encoder::provide(providers);

View File

@ -42,6 +42,7 @@ const TAG_EXPN_DATA: u8 = 1;
// Tags for encoding Symbol's
const SYMBOL_STR: u8 = 0;
const SYMBOL_OFFSET: u8 = 1;
const SYMBOL_PREINTERNED: u8 = 2;
/// Provides an interface to incremental compilation data cached from the
/// previous compilation session. This data will eventually include the results
@ -745,6 +746,10 @@ impl<'a, 'tcx> Decodable<CacheDecoder<'a, 'tcx>> for Symbol {
sym
}
SYMBOL_PREINTERNED => {
let symbol_index = d.read_u32();
Symbol::new_from_decoded(symbol_index)
}
_ => unreachable!(),
}
}
@ -939,17 +944,24 @@ impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Span {
// copy&paste impl from rustc_metadata
impl<'a, 'tcx> Encodable<CacheEncoder<'a, 'tcx>> for Symbol {
fn encode(&self, s: &mut CacheEncoder<'a, 'tcx>) {
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.encoder.emit_u8(SYMBOL_STR);
let pos = s.encoder.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
// if symbol preinterned, emit tag and symbol index
if self.is_preinterned() {
s.encoder.emit_u8(SYMBOL_PREINTERNED);
s.encoder.emit_u32(self.as_u32());
} else {
// otherwise write it as string or as offset to it
match s.symbol_table.entry(*self) {
Entry::Vacant(o) => {
s.encoder.emit_u8(SYMBOL_STR);
let pos = s.encoder.position();
o.insert(pos);
s.emit_str(self.as_str());
}
Entry::Occupied(o) => {
let x = o.get().clone();
s.emit_u8(SYMBOL_OFFSET);
s.emit_usize(x);
}
}
}
}

View File

@ -1804,6 +1804,11 @@ impl Symbol {
Symbol(SymbolIndex::from_u32(n))
}
/// for use in Decoder only
pub fn new_from_decoded(n: u32) -> Self {
Self::new(n)
}
/// Maps a string to its interned representation.
pub fn intern(string: &str) -> Self {
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
@ -2028,6 +2033,11 @@ impl Symbol {
pub fn can_be_raw(self) -> bool {
self != kw::Empty && self != kw::Underscore && !self.is_path_segment_keyword()
}
/// Is this symbol was interned in compiler's `symbols!` macro
pub fn is_preinterned(self) -> bool {
self.as_u32() < PREINTERNED_SYMBOLS_COUNT
}
}
impl Ident {