From 94a36d2ce53f9991a529f4d8c3621f73e719de23 Mon Sep 17 00:00:00 2001
From: Nicholas Nethercote <n.nethercote@gmail.com>
Date: Thu, 2 Nov 2023 17:02:36 +1100
Subject: [PATCH] Use `FxIndexSet` in the symbol interner.

It makes the code a little nicer.

As part of this, the interner's `Default` impl is removed and `prefill`
is used in a test instead.
---
 compiler/rustc_span/src/symbol.rs       | 31 +++++++++----------------
 compiler/rustc_span/src/symbol/tests.rs |  2 +-
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/compiler/rustc_span/src/symbol.rs b/compiler/rustc_span/src/symbol.rs
index 3f99d2a4b1f..923ee630d95 100644
--- a/compiler/rustc_span/src/symbol.rs
+++ b/compiler/rustc_span/src/symbol.rs
@@ -3,7 +3,7 @@
 //! type, and vice versa.
 
 use rustc_arena::DroplessArena;
-use rustc_data_structures::fx::FxHashMap;
+use rustc_data_structures::fx::FxIndexSet;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher, ToStableHashKey};
 use rustc_data_structures::sync::Lock;
 use rustc_macros::HashStable_Generic;
@@ -2076,43 +2076,33 @@ impl<CTX> ToStableHashKey<CTX> for Symbol {
     }
 }
 
-#[derive(Default)]
 pub(crate) struct Interner(Lock<InternerInner>);
 
 // The `&'static str`s in this type actually point into the arena.
 //
-// The `FxHashMap`+`Vec` pair could be replaced by `FxIndexSet`, but #75278
-// found that to regress performance up to 2% in some cases. This might be
-// revisited after further improvements to `indexmap`.
-//
 // This type is private to prevent accidentally constructing more than one
 // `Interner` on the same thread, which makes it easy to mix up `Symbol`s
 // between `Interner`s.
-#[derive(Default)]
 struct InternerInner {
     arena: DroplessArena,
-    names: FxHashMap<&'static str, Symbol>,
-    strings: Vec<&'static str>,
+    strings: FxIndexSet<&'static str>,
 }
 
 impl Interner {
     fn prefill(init: &[&'static str]) -> Self {
         Interner(Lock::new(InternerInner {
-            strings: init.into(),
-            names: init.iter().copied().zip((0..).map(Symbol::new)).collect(),
-            ..Default::default()
+            arena: Default::default(),
+            strings: init.iter().copied().collect(),
         }))
     }
 
     #[inline]
     fn intern(&self, string: &str) -> Symbol {
         let mut inner = self.0.lock();
-        if let Some(&name) = inner.names.get(string) {
-            return name;
+        if let Some(idx) = inner.strings.get_index_of(string) {
+            return Symbol::new(idx as u32);
         }
 
-        let name = Symbol::new(inner.strings.len() as u32);
-
         // SAFETY: we convert from `&str` to `&[u8]`, clone it into the arena,
         // and immediately convert the clone back to `&[u8]`, all because there
         // is no `inner.arena.alloc_str()` method. This is clearly safe.
@@ -2122,20 +2112,21 @@ impl Interner {
         // SAFETY: we can extend the arena allocation to `'static` because we
         // only access these while the arena is still alive.
         let string: &'static str = unsafe { &*(string as *const str) };
-        inner.strings.push(string);
 
         // This second hash table lookup can be avoided by using `RawEntryMut`,
         // but this code path isn't hot enough for it to be worth it. See
         // #91445 for details.
-        inner.names.insert(string, name);
-        name
+        let (idx, is_new) = inner.strings.insert_full(string);
+        debug_assert!(is_new); // due to the get_index_of check above
+
+        Symbol::new(idx as u32)
     }
 
     /// Get the symbol as a string.
     ///
     /// [`Symbol::as_str()`] should be used in preference to this function.
     fn get(&self, symbol: Symbol) -> &str {
-        self.0.lock().strings[symbol.0.as_usize()]
+        self.0.lock().strings.get_index(symbol.0.as_usize()).unwrap()
     }
 }
 
diff --git a/compiler/rustc_span/src/symbol/tests.rs b/compiler/rustc_span/src/symbol/tests.rs
index 0958fce5fee..4366c5a2c26 100644
--- a/compiler/rustc_span/src/symbol/tests.rs
+++ b/compiler/rustc_span/src/symbol/tests.rs
@@ -4,7 +4,7 @@ use crate::create_default_session_globals_then;
 
 #[test]
 fn interner_tests() {
-    let i = Interner::default();
+    let i = Interner::prefill(&[]);
     // first one is zero:
     assert_eq!(i.intern("dog"), Symbol::new(0));
     // re-use gets the same entry: