Store DefPathHash->DefIndex map in on-disk-hash-table format in crate metadata.

This encoding allows for random access without an expensive upfront decoding
state which in turn allows simplifying the DefPathIndex lookup logic without
regressing performance.
This commit is contained in:
Michael Woerister 2021-07-20 13:59:12 +02:00
parent d0be27c8ec
commit 960893c50a
9 changed files with 132 additions and 145 deletions

View File

@ -45,7 +45,7 @@ pub struct CStore {
/// This map is used to verify we get no hash conflicts between
/// `StableCrateId` values.
stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
pub(crate) stable_crate_ids: FxHashMap<StableCrateId, CrateNum>,
/// Unused externs of the crate
unused_externs: Vec<Symbol>,

View File

@ -95,10 +95,8 @@ crate struct CrateMetadata {
raw_proc_macros: Option<&'static [ProcMacro]>,
/// Source maps for code from the crate.
source_map_import_info: OnceCell<Vec<ImportedSourceFile>>,
/// For every definition in this crate, maps its `DefPathHash` to its
/// `DefIndex`. See `raw_def_id_to_def_id` for more details about how
/// this is used.
def_path_hash_map: OnceCell<UnhashMap<DefPathHash, DefIndex>>,
/// For every definition in this crate, maps its `DefPathHash` to its `DefIndex`.
def_path_hash_map: DefPathHashMap<'static>,
/// Likewise for ExpnHash.
expn_hash_map: OnceCell<UnhashMap<ExpnHash, ExpnIndex>>,
/// Used for decoding interpret::AllocIds in a cached & thread-safe manner.
@ -320,6 +318,11 @@ impl<'a, 'tcx> DecodeContext<'a, 'tcx> {
self.lazy_state = LazyState::Previous(NonZeroUsize::new(position + min_size).unwrap());
Ok(Lazy::from_position_and_meta(NonZeroUsize::new(position).unwrap(), meta))
}
#[inline]
pub fn read_raw_bytes(&mut self, len: usize) -> &'a [u8] {
self.opaque.read_raw_bytes(len)
}
}
impl<'a, 'tcx> TyDecoder<'tcx> for DecodeContext<'a, 'tcx> {
@ -1596,58 +1599,6 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
.or_insert_with(|| self.root.tables.def_keys.get(self, index).unwrap().decode(self))
}
/// Finds the corresponding `DefId` for the provided `DefPathHash`, if it exists.
/// This is used by incremental compilation to map a serialized `DefPathHash` to
/// its `DefId` in the current session.
/// Normally, only one 'main' crate will change between incremental compilation sessions:
/// all dependencies will be completely unchanged. In this case, we can avoid
/// decoding every `DefPathHash` in the crate, since the `DefIndex` from the previous
/// session will still be valid. If our 'guess' is wrong (the `DefIndex` no longer exists,
/// or has a different `DefPathHash`, then we need to decode all `DefPathHashes` to determine
/// the correct mapping).
fn def_path_hash_to_def_id(
&self,
krate: CrateNum,
index_guess: u32,
hash: DefPathHash,
) -> Option<DefId> {
let def_index_guess = DefIndex::from_u32(index_guess);
let old_hash = self
.root
.tables
.def_path_hashes
.get(self, def_index_guess)
.map(|lazy| lazy.decode(self));
// Fast path: the definition and its index is unchanged from the
// previous compilation session. There is no need to decode anything
// else
if old_hash == Some(hash) {
return Some(DefId { krate, index: def_index_guess });
}
let is_proc_macro = self.is_proc_macro_crate();
// Slow path: We need to find out the new `DefIndex` of the provided
// `DefPathHash`, if its still exists. This requires decoding every `DefPathHash`
// stored in this crate.
let map = self.cdata.def_path_hash_map.get_or_init(|| {
let end_id = self.root.tables.def_path_hashes.size() as u32;
let mut map = UnhashMap::with_capacity_and_hasher(end_id as usize, Default::default());
for i in 0..end_id {
let def_index = DefIndex::from_u32(i);
// There may be gaps in the encoded table if we're decoding a proc-macro crate
if let Some(hash) = self.root.tables.def_path_hashes.get(self, def_index) {
map.insert(hash.decode(self), def_index);
} else if !is_proc_macro {
panic!("Missing def_path_hashes entry for {:?}", def_index);
}
}
map
});
map.get(&hash).map(|index| DefId { krate, index: *index })
}
// Returns the path leading to the thing with this `id`.
fn def_path(&self, id: DefIndex) -> DefPath {
debug!("def_path(cnum={:?}, id={:?})", self.cnum, id);
@ -1670,6 +1621,11 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
self.def_path_hash_unlocked(index, &mut def_path_hashes)
}
#[inline]
fn def_path_hash_to_def_index(&self, hash: DefPathHash) -> Option<DefIndex> {
self.def_path_hash_map.def_path_hash_to_def_index(&hash)
}
fn expn_hash_to_expn_id(&self, index_guess: u32, hash: ExpnHash) -> ExpnId {
debug_assert_eq!(ExpnId::from_hash(hash), None);
let index_guess = ExpnIndex::from_u32(index_guess);
@ -1936,13 +1892,18 @@ impl CrateMetadata {
let alloc_decoding_state =
AllocDecodingState::new(root.interpret_alloc_index.decode(&blob).collect());
let dependencies = Lock::new(cnum_map.iter().cloned().collect());
// Pre-decode the DefPathHash->DefIndex table. This is a cheap operation
// that does not copy any data. It just does some data verification.
let def_path_hash_map = root.def_path_hash_map.decode(&blob);
CrateMetadata {
blob,
root,
trait_impls,
raw_proc_macros,
source_map_import_info: OnceCell::new(),
def_path_hash_map: Default::default(),
def_path_hash_map,
expn_hash_map: Default::default(),
alloc_decoding_state,
cnum,

View File

@ -498,6 +498,10 @@ impl CrateStore for CStore {
self.get_crate_data(cnum).root.stable_crate_id
}
fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum {
self.stable_crate_ids[&stable_crate_id]
}
/// Returns the `DefKey` for a given `DefId`. This indicates the
/// parent `DefId` as well as some idea of what kind of data the
/// `DefId` refers to.
@ -513,14 +517,10 @@ impl CrateStore for CStore {
self.get_crate_data(def.krate).def_path_hash(def.index)
}
// See `CrateMetadataRef::def_path_hash_to_def_id` for more details
fn def_path_hash_to_def_id(
&self,
cnum: CrateNum,
index_guess: u32,
hash: DefPathHash,
) -> Option<DefId> {
self.get_crate_data(cnum).def_path_hash_to_def_id(cnum, index_guess, hash)
fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> Option<DefId> {
self.get_crate_data(cnum)
.def_path_hash_to_def_index(hash)
.map(|index| DefId { krate: cnum, index })
}
fn expn_hash_to_expn_id(&self, cnum: CrateNum, index_guess: u32, hash: ExpnHash) -> ExpnId {

View File

@ -0,0 +1,60 @@
use crate::rmeta::DecodeContext;
use crate::rmeta::EncodeContext;
use crate::rmeta::MetadataBlob;
use rustc_data_structures::owning_ref::OwningRef;
use rustc_hir::def_path_hash_map::{
Config as HashMapConfig, DefPathHashMap as DefPathHashMapInner,
};
use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder};
use rustc_span::def_id::{DefIndex, DefPathHash};
crate enum DefPathHashMap<'tcx> {
OwnedFromMetadata(odht::HashTable<HashMapConfig, OwningRef<MetadataBlob, [u8]>>),
BorrowedFromTcx(&'tcx DefPathHashMapInner),
}
impl DefPathHashMap<'tcx> {
#[inline]
pub fn def_path_hash_to_def_index(&self, def_path_hash: &DefPathHash) -> Option<DefIndex> {
match *self {
DefPathHashMap::OwnedFromMetadata(ref map) => map.get(def_path_hash),
DefPathHashMap::BorrowedFromTcx(_) => {
panic!("DefPathHashMap::BorrowedFromTcx variant only exists for serialization")
}
}
}
}
impl<'a, 'tcx> Encodable<EncodeContext<'a, 'tcx>> for DefPathHashMap<'tcx> {
fn encode(&self, e: &mut EncodeContext<'a, 'tcx>) -> opaque::EncodeResult {
match *self {
DefPathHashMap::BorrowedFromTcx(def_path_hash_map) => {
let bytes = def_path_hash_map.raw_bytes();
e.emit_usize(bytes.len())?;
e.emit_raw_bytes(bytes)
}
DefPathHashMap::OwnedFromMetadata(_) => {
panic!("DefPathHashMap::OwnedFromMetadata variant only exists for deserialization")
}
}
}
}
impl<'a, 'tcx> Decodable<DecodeContext<'a, 'tcx>> for DefPathHashMap<'static> {
fn decode(d: &mut DecodeContext<'a, 'tcx>) -> Result<DefPathHashMap<'static>, String> {
// Import TyDecoder so we can access the DecodeContext::position() method
use crate::rustc_middle::ty::codec::TyDecoder;
let len = d.read_usize()?;
let pos = d.position();
let o = OwningRef::new(d.blob().clone()).map(|x| &x[pos..pos + len]);
// Although we already have the data we need via the OwningRef, we still need
// to advance the DecodeContext's position so it's in a valid state after
// the method. We use read_raw_bytes() for that.
let _ = d.read_raw_bytes(len);
let inner = odht::HashTable::from_raw_bytes(o).map_err(|e| format!("{}", e))?;
Ok(DefPathHashMap::OwnedFromMetadata(inner))
}
}

View File

@ -1,3 +1,4 @@
use crate::rmeta::def_path_hash_map::DefPathHashMap;
use crate::rmeta::table::{FixedSizeEncoding, TableBuilder};
use crate::rmeta::*;
@ -472,6 +473,12 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
}
}
fn encode_def_path_hash_map(&mut self) -> Lazy<DefPathHashMap<'tcx>> {
self.lazy(DefPathHashMap::BorrowedFromTcx(
self.tcx.resolutions(()).definitions.def_path_hash_to_def_index_map(),
))
}
fn encode_source_map(&mut self) -> Lazy<[rustc_span::SourceFile]> {
let source_map = self.tcx.sess.source_map();
let all_source_files = source_map.files();
@ -675,6 +682,10 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let (syntax_contexts, expn_data, expn_hashes) = self.encode_hygiene();
let hygiene_bytes = self.position() - i;
i = self.position();
let def_path_hash_map = self.encode_def_path_hash_map();
let def_path_hash_map_bytes = self.position() - i;
// Encode source_map. This needs to be done last,
// since encoding `Span`s tells us which `SourceFiles` we actually
// need to encode.
@ -722,6 +733,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
syntax_contexts,
expn_data,
expn_hashes,
def_path_hash_map,
});
let total_bytes = self.position();
@ -744,6 +756,7 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
eprintln!(" impl bytes: {}", impl_bytes);
eprintln!(" exp. symbols bytes: {}", exported_symbols_bytes);
eprintln!(" def-path table bytes: {}", def_path_table_bytes);
eprintln!(" def-path hashes bytes: {}", def_path_hash_map_bytes);
eprintln!(" proc-macro-data-bytes: {}", proc_macro_data_bytes);
eprintln!(" mir bytes: {}", mir_bytes);
eprintln!(" item bytes: {}", item_bytes);

View File

@ -1,4 +1,5 @@
use decoder::Metadata;
use def_path_hash_map::DefPathHashMap;
use table::{Table, TableBuilder};
use rustc_ast::{self as ast, MacroDef};
@ -35,6 +36,7 @@ use encoder::EncodeContext;
use rustc_span::hygiene::SyntaxContextData;
mod decoder;
mod def_path_hash_map;
mod encoder;
mod table;
@ -231,6 +233,8 @@ crate struct CrateRoot<'tcx> {
expn_data: ExpnDataTable,
expn_hashes: ExpnHashTable,
def_path_hash_map: Lazy<DefPathHashMap<'tcx>>,
source_map: Lazy<[rustc_span::SourceFile]>,
compiler_builtins: bool,

View File

@ -199,14 +199,10 @@ pub trait CrateStore: std::fmt::Debug {
// incr. comp. uses to identify a CrateNum.
fn crate_name(&self, cnum: CrateNum) -> Symbol;
fn stable_crate_id(&self, cnum: CrateNum) -> StableCrateId;
fn stable_crate_id_to_crate_num(&self, stable_crate_id: StableCrateId) -> CrateNum;
/// Fetch a DefId from a DefPathHash for a foreign crate.
fn def_path_hash_to_def_id(
&self,
cnum: CrateNum,
index_guess: u32,
hash: DefPathHash,
) -> Option<DefId>;
fn def_path_hash_to_def_id(&self, cnum: CrateNum, hash: DefPathHash) -> Option<DefId>;
fn expn_hash_to_expn_id(&self, cnum: CrateNum, index_guess: u32, hash: ExpnHash) -> ExpnId;
// utility functions

View File

@ -1316,6 +1316,17 @@ impl<'tcx> TyCtxt<'tcx> {
}
}
/// Maps a StableCrateId to the corresponding CrateNum. This method assumes
/// that the crate in question has already been loaded by the CrateStore.
#[inline]
pub fn stable_crate_id_to_crate_num(self, stable_crate_id: StableCrateId) -> CrateNum {
if stable_crate_id == self.sess.local_stable_crate_id() {
LOCAL_CRATE
} else {
self.untracked_resolutions.cstore.stable_crate_id_to_crate_num(stable_crate_id)
}
}
pub fn def_path_debug_str(self, def_id: DefId) -> String {
// We are explicitly not going through queries here in order to get
// crate name and stable crate id since this code is called from debug!()

View File

@ -25,7 +25,6 @@ use rustc_span::hygiene::{
use rustc_span::source_map::{SourceMap, StableSourceFileId};
use rustc_span::CachingSourceMapView;
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, SourceFile, Span};
use std::collections::hash_map::Entry;
use std::mem;
const TAG_FILE_FOOTER: u128 = 0xC0FFEE_C0FFEE_C0FFEE_C0FFEE_C0FFEE;
@ -414,80 +413,23 @@ impl<'sess> rustc_middle::ty::OnDiskCache<'sess> for OnDiskCache<'sess> {
}
fn def_path_hash_to_def_id(&self, tcx: TyCtxt<'tcx>, hash: DefPathHash) -> Option<DefId> {
let mut cache = self.def_path_hash_to_def_id_cache.lock();
match cache.entry(hash) {
Entry::Occupied(e) => *e.get(),
Entry::Vacant(e) => {
debug!("def_path_hash_to_def_id({:?})", hash);
// Check if the `DefPathHash` corresponds to a definition in the current
// crate
if let Some(def_id) =
tcx.definitions_untracked().local_def_path_hash_to_def_id(hash)
{
let def_id = def_id.to_def_id();
e.insert(Some(def_id));
return Some(def_id);
}
// This `raw_def_id` represents the `DefId` of this `DefPathHash` in
// the *previous* compliation session. The `DefPathHash` includes the
// owning crate, so if the corresponding definition still exists in the
// current compilation session, the crate is guaranteed to be the same
// (otherwise, we would compute a different `DefPathHash`).
let raw_def_id = self.get_raw_def_id(&hash)?;
debug!("def_path_hash_to_def_id({:?}): raw_def_id = {:?}", hash, raw_def_id);
// If the owning crate no longer exists, the corresponding definition definitely
// no longer exists.
let krate = self.try_remap_cnum(tcx, hash.stable_crate_id())?;
debug!("def_path_hash_to_def_id({:?}): krate = {:?}", hash, krate);
// If our `DefPathHash` corresponded to a definition in the local crate,
// we should have either found it in `local_def_path_hash_to_def_id`, or
// never attempted to load it in the first place. Any query result or `DepNode`
// that references a local `DefId` should depend on some HIR-related `DepNode`.
// If a local definition is removed/modified such that its old `DefPathHash`
// no longer has a corresponding definition, that HIR-related `DepNode` should
// end up red. This should prevent us from ever calling
// `tcx.def_path_hash_to_def_id`, since we'll end up recomputing any
// queries involved.
debug_assert_ne!(krate, LOCAL_CRATE);
// Try to find a definition in the current session, using the previous `DefIndex`
// as an initial guess.
let opt_def_id =
tcx.cstore_untracked().def_path_hash_to_def_id(krate, raw_def_id.index, hash);
debug!("def_path_to_def_id({:?}): opt_def_id = {:?}", hash, opt_def_id);
e.insert(opt_def_id);
opt_def_id
}
debug!("def_path_hash_to_def_id({:?})", hash);
let stable_crate_id = hash.stable_crate_id();
// If this is a DefPathHash from the local crate, we can look up the
// DefId in the tcx's `Definitions`.
if stable_crate_id == tcx.sess.local_stable_crate_id() {
tcx.definitions_untracked()
.local_def_path_hash_to_def_id(hash)
.map(LocalDefId::to_def_id)
} else {
// If this is a DefPathHash from an upstream crate, let the CrateStore map
// it to a DefId.
let cnum = tcx.cstore_untracked().stable_crate_id_to_crate_num(stable_crate_id);
tcx.cstore_untracked().def_path_hash_to_def_id(cnum, hash)
}
}
fn register_reused_dep_node(&self, tcx: TyCtxt<'sess>, dep_node: &DepNode) {
// For reused dep nodes, we only need to store the mapping if the node
// is one whose query key we can reconstruct from the hash. We use the
// mapping to aid that reconstruction in the next session. While we also
// use it to decode `DefId`s we encoded in the cache as `DefPathHashes`,
// they're already registered during `DefId` encoding.
if dep_node.kind.can_reconstruct_query_key() {
let hash = DefPathHash(dep_node.hash.into());
// We can't simply copy the `RawDefId` from `foreign_def_path_hashes` to
// `latest_foreign_def_path_hashes`, since the `RawDefId` might have
// changed in the current compilation session (e.g. we've added/removed crates,
// or added/removed definitions before/after the target definition).
if let Some(def_id) = self.def_path_hash_to_def_id(tcx, hash) {
if !def_id.is_local() {
self.store_foreign_def_id_hash(def_id, hash);
}
}
}
}
fn store_foreign_def_id_hash(&self, def_id: DefId, hash: DefPathHash) {
// We may overwrite an existing entry, but it will have the same value,
// so it's fine
self.latest_foreign_def_path_hashes
.lock()
.insert(hash, RawDefId { krate: def_id.krate.as_u32(), index: def_id.index.as_u32() });
}
}
impl<'sess> OnDiskCache<'sess> {