mirror of
https://github.com/rust-lang/rust.git
synced 2025-06-05 11:48:30 +00:00
Auto merge of #88978 - bjorn3:move_symbol_interner_lock, r=Mark-Simulacrum
Move the Lock into symbol::Interner This makes it easier to make the symbol interner (near) lock free in case of concurrent accesses in the future. With https://github.com/rust-lang/rust/pull/87867 landed this shouldn't affect performance anymore.
This commit is contained in:
commit
6c33a0a2ec
@ -78,7 +78,7 @@ mod tests;
|
|||||||
// threads within the compilation session, but is not accessible outside the
|
// threads within the compilation session, but is not accessible outside the
|
||||||
// session.
|
// session.
|
||||||
pub struct SessionGlobals {
|
pub struct SessionGlobals {
|
||||||
symbol_interner: Lock<symbol::Interner>,
|
symbol_interner: symbol::Interner,
|
||||||
span_interner: Lock<span_encoding::SpanInterner>,
|
span_interner: Lock<span_encoding::SpanInterner>,
|
||||||
hygiene_data: Lock<hygiene::HygieneData>,
|
hygiene_data: Lock<hygiene::HygieneData>,
|
||||||
source_map: Lock<Option<Lrc<SourceMap>>>,
|
source_map: Lock<Option<Lrc<SourceMap>>>,
|
||||||
@ -87,7 +87,7 @@ pub struct SessionGlobals {
|
|||||||
impl SessionGlobals {
|
impl SessionGlobals {
|
||||||
pub fn new(edition: Edition) -> SessionGlobals {
|
pub fn new(edition: Edition) -> SessionGlobals {
|
||||||
SessionGlobals {
|
SessionGlobals {
|
||||||
symbol_interner: Lock::new(symbol::Interner::fresh()),
|
symbol_interner: symbol::Interner::fresh(),
|
||||||
span_interner: Lock::new(span_encoding::SpanInterner::default()),
|
span_interner: Lock::new(span_encoding::SpanInterner::default()),
|
||||||
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
|
hygiene_data: Lock::new(hygiene::HygieneData::new(edition)),
|
||||||
source_map: Lock::new(None),
|
source_map: Lock::new(None),
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
use rustc_arena::DroplessArena;
|
use rustc_arena::DroplessArena;
|
||||||
use rustc_data_structures::fx::FxHashMap;
|
use rustc_data_structures::fx::FxHashMap;
|
||||||
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
|
use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
|
||||||
|
use rustc_data_structures::sync::Lock;
|
||||||
use rustc_macros::HashStable_Generic;
|
use rustc_macros::HashStable_Generic;
|
||||||
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
||||||
|
|
||||||
@ -1623,14 +1624,15 @@ impl Symbol {
|
|||||||
|
|
||||||
/// Maps a string to its interned representation.
|
/// Maps a string to its interned representation.
|
||||||
pub fn intern(string: &str) -> Self {
|
pub fn intern(string: &str) -> Self {
|
||||||
with_interner(|interner| interner.intern(string))
|
with_session_globals(|session_globals| session_globals.symbol_interner.intern(string))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convert to a `SymbolStr`. This is a slowish operation because it
|
/// Convert to a `SymbolStr`. This is a slowish operation because it
|
||||||
/// requires locking the symbol interner.
|
/// requires locking the symbol interner.
|
||||||
pub fn as_str(self) -> SymbolStr {
|
pub fn as_str(self) -> SymbolStr {
|
||||||
with_interner(|interner| unsafe {
|
with_session_globals(|session_globals| {
|
||||||
SymbolStr { string: std::mem::transmute::<&str, &str>(interner.get(self)) }
|
let symbol_str = session_globals.symbol_interner.get(self);
|
||||||
|
unsafe { SymbolStr { string: std::mem::transmute::<&str, &str>(symbol_str) } }
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1639,7 +1641,7 @@ impl Symbol {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn len(self) -> usize {
|
pub fn len(self) -> usize {
|
||||||
with_interner(|interner| interner.get(self).len())
|
with_session_globals(|session_globals| session_globals.symbol_interner.get(self).len())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn is_empty(self) -> bool {
|
pub fn is_empty(self) -> bool {
|
||||||
@ -1696,6 +1698,9 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub(crate) struct Interner(Lock<InternerInner>);
|
||||||
|
|
||||||
// The `&'static str`s in this type actually point into the arena.
|
// The `&'static str`s in this type actually point into the arena.
|
||||||
//
|
//
|
||||||
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
|
// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
|
||||||
@ -1705,7 +1710,7 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
|
|||||||
// This type is private to prevent accidentally constructing more than one `Interner` on the same
|
// This type is private to prevent accidentally constructing more than one `Interner` on the same
|
||||||
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
|
// thread, which makes it easy to mixup `Symbol`s between `Interner`s.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub(crate) struct Interner {
|
struct InternerInner {
|
||||||
arena: DroplessArena,
|
arena: DroplessArena,
|
||||||
names: FxHashMap<&'static str, Symbol>,
|
names: FxHashMap<&'static str, Symbol>,
|
||||||
strings: Vec<&'static str>,
|
strings: Vec<&'static str>,
|
||||||
@ -1713,37 +1718,38 @@ pub(crate) struct Interner {
|
|||||||
|
|
||||||
impl Interner {
|
impl Interner {
|
||||||
fn prefill(init: &[&'static str]) -> Self {
|
fn prefill(init: &[&'static str]) -> Self {
|
||||||
Interner {
|
Interner(Lock::new(InternerInner {
|
||||||
strings: init.into(),
|
strings: init.into(),
|
||||||
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
|
names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn intern(&mut self, string: &str) -> Symbol {
|
fn intern(&self, string: &str) -> Symbol {
|
||||||
if let Some(&name) = self.names.get(string) {
|
let mut inner = self.0.lock();
|
||||||
|
if let Some(&name) = inner.names.get(string) {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
let name = Symbol::new(self.strings.len() as u32);
|
let name = Symbol::new(inner.strings.len() as u32);
|
||||||
|
|
||||||
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
|
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
|
||||||
// UTF-8.
|
// UTF-8.
|
||||||
let string: &str =
|
let string: &str =
|
||||||
unsafe { str::from_utf8_unchecked(self.arena.alloc_slice(string.as_bytes())) };
|
unsafe { str::from_utf8_unchecked(inner.arena.alloc_slice(string.as_bytes())) };
|
||||||
// It is safe to extend the arena allocation to `'static` because we only access
|
// It is safe to extend the arena allocation to `'static` because we only access
|
||||||
// these while the arena is still alive.
|
// these while the arena is still alive.
|
||||||
let string: &'static str = unsafe { &*(string as *const str) };
|
let string: &'static str = unsafe { &*(string as *const str) };
|
||||||
self.strings.push(string);
|
inner.strings.push(string);
|
||||||
self.names.insert(string, name);
|
inner.names.insert(string, name);
|
||||||
name
|
name
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the symbol as a string. `Symbol::as_str()` should be used in
|
// Get the symbol as a string. `Symbol::as_str()` should be used in
|
||||||
// preference to this function.
|
// preference to this function.
|
||||||
pub fn get(&self, symbol: Symbol) -> &str {
|
fn get(&self, symbol: Symbol) -> &str {
|
||||||
self.strings[symbol.0.as_usize()]
|
self.0.lock().strings[symbol.0.as_usize()]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1874,11 +1880,6 @@ impl Ident {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
|
||||||
fn with_interner<T, F: FnOnce(&mut Interner) -> T>(f: F) -> T {
|
|
||||||
with_session_globals(|session_globals| f(&mut *session_globals.symbol_interner.lock()))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An alternative to [`Symbol`], useful when the chars within the symbol need to
|
/// An alternative to [`Symbol`], useful when the chars within the symbol need to
|
||||||
/// be accessed. It deliberately has limited functionality and should only be
|
/// be accessed. It deliberately has limited functionality and should only be
|
||||||
/// used for temporary values.
|
/// used for temporary values.
|
||||||
|
@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn interner_tests() {
|
fn interner_tests() {
|
||||||
let mut i: Interner = Interner::default();
|
let i = Interner::default();
|
||||||
// first one is zero:
|
// first one is zero:
|
||||||
assert_eq!(i.intern("dog"), Symbol::new(0));
|
assert_eq!(i.intern("dog"), Symbol::new(0));
|
||||||
// re-use gets the same entry:
|
// re-use gets the same entry:
|
||||||
|
Loading…
Reference in New Issue
Block a user