From cde09e7ca36aa2ee7d7677d038944cadbd65f40b Mon Sep 17 00:00:00 2001 From: Ariel Ben-Yehuda Date: Thu, 3 Sep 2015 01:22:31 +0300 Subject: [PATCH] rewrite metadata indexing this improves the compilation time for small crates by ~20% --- src/librustc/lib.rs | 1 - src/librustc/metadata/common.rs | 8 +- src/librustc/metadata/creader.rs | 3 +- src/librustc/metadata/csearch.rs | 36 +----- src/librustc/metadata/cstore.rs | 3 +- src/librustc/metadata/decoder.rs | 187 +++++++++++---------------- src/librustc/metadata/encoder.rs | 119 +++++------------- src/librustc/metadata/index.rs | 208 +++++++++++++++++++++++++++++++ src/librustc/metadata/mod.rs | 1 + 9 files changed, 322 insertions(+), 244 deletions(-) create mode 100644 src/librustc/metadata/index.rs diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index f6a877bafce..755d83bf8ec 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -51,7 +51,6 @@ #![feature(rustc_diagnostic_macros)] #![feature(rustc_private)] #![feature(scoped_tls)] -#![feature(slice_bytes)] #![feature(slice_splits)] #![feature(slice_patterns)] #![feature(staged_api)] diff --git a/src/librustc/metadata/common.rs b/src/librustc/metadata/common.rs index 3e0cf30fe94..0fb55bd7907 100644 --- a/src/librustc/metadata/common.rs +++ b/src/librustc/metadata/common.rs @@ -45,13 +45,7 @@ pub const tag_items_data_item_is_tuple_struct_ctor: usize = 0x29; pub const tag_index: usize = 0x2a; -pub const tag_index_buckets: usize = 0x2b; - -pub const tag_index_buckets_bucket: usize = 0x2c; - -pub const tag_index_buckets_bucket_elt: usize = 0x2d; - -pub const tag_index_table: usize = 0x2e; +// GAP 0x2b, 0x2c, 0x2d, 0x2e pub const tag_meta_item_name_value: usize = 0x2f; diff --git a/src/librustc/metadata/creader.rs b/src/librustc/metadata/creader.rs index d758b253441..5ac84578369 100644 --- a/src/librustc/metadata/creader.rs +++ b/src/librustc/metadata/creader.rs @@ -304,6 +304,7 @@ impl<'a> CrateReader<'a> { let cmeta = Rc::new(cstore::crate_metadata { name: name.to_string(), local_path: RefCell::new(SmallVector::zero()), + index: decoder::load_index(metadata.as_slice()), data: metadata, cnum_map: RefCell::new(cnum_map), cnum: cnum, @@ -521,7 +522,7 @@ impl<'a> CrateReader<'a> { } let registrar = decoder::get_plugin_registrar_fn(ekrate.metadata.as_slice()) - .map(|id| decoder::get_symbol(ekrate.metadata.as_slice(), id)); + .map(|id| decoder::get_symbol_from_buf(ekrate.metadata.as_slice(), id)); match (ekrate.dylib.as_ref(), registrar) { (Some(dylib), Some(reg)) => Some((dylib.to_path_buf(), reg)), diff --git a/src/librustc/metadata/csearch.rs b/src/librustc/metadata/csearch.rs index 91c7ac48918..c539bfc5964 100644 --- a/src/librustc/metadata/csearch.rs +++ b/src/librustc/metadata/csearch.rs @@ -11,23 +11,18 @@ // Searching for information from the cstore use front::map as ast_map; -use metadata::common::*; use metadata::cstore; use metadata::decoder; use metadata::inline::InlinedItem; use middle::def_id::DefId; use middle::lang_items; use middle::ty; +use util::nodemap::FnvHashMap; -use rbml; -use rbml::reader; use std::rc::Rc; use syntax::ast; use rustc_front::attr; use rustc_front::hir; -use syntax::diagnostic::expect; - -use std::collections::hash_map::HashMap; #[derive(Copy, Clone)] pub struct MethodInfo { @@ -38,7 +33,7 @@ pub struct MethodInfo { pub fn get_symbol(cstore: &cstore::CStore, def: DefId) -> String { let cdata = cstore.get_crate_data(def.krate); - decoder::get_symbol(cdata.data(), def.node) + decoder::get_symbol(&cdata, def.node) } /// Iterates over all the language items in the given crate. @@ -201,7 +196,7 @@ pub fn get_struct_field_names(cstore: &cstore::CStore, def: DefId) -> Vec HashMap FnvHashMap> { let cdata = cstore.get_crate_data(def.krate); decoder::get_struct_field_attrs(&*cdata) @@ -243,31 +238,6 @@ pub fn get_super_predicates<'tcx>(tcx: &ty::ctxt<'tcx>, def: DefId) decoder::get_super_predicates(&*cdata, def.node, tcx) } -pub fn get_field_type<'tcx>(tcx: &ty::ctxt<'tcx>, class_id: DefId, - def: DefId) -> ty::TypeScheme<'tcx> { - let cstore = &tcx.sess.cstore; - let cdata = cstore.get_crate_data(class_id.krate); - let all_items = reader::get_doc(rbml::Doc::new(cdata.data()), tag_items); - let class_doc = expect(tcx.sess.diagnostic(), - decoder::maybe_find_item(class_id.node, all_items), - || { - (format!("get_field_type: class ID {:?} not found", - class_id)).to_string() - }); - let the_field = expect(tcx.sess.diagnostic(), - decoder::maybe_find_item(def.node, class_doc), - || { - (format!("get_field_type: in class {:?}, field ID {:?} not found", - class_id, - def)).to_string() - }); - let ty = decoder::item_type(def, the_field, tcx, &*cdata); - ty::TypeScheme { - generics: ty::Generics::empty(), - ty: ty, - } -} - pub fn get_impl_polarity<'tcx>(tcx: &ty::ctxt<'tcx>, def: DefId) -> Option diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs index 838f78163f0..42f52b2d20b 100644 --- a/src/librustc/metadata/cstore.rs +++ b/src/librustc/metadata/cstore.rs @@ -18,7 +18,7 @@ pub use self::LinkagePreference::*; pub use self::NativeLibraryKind::*; use back::svh::Svh; -use metadata::{creader, decoder, loader}; +use metadata::{creader, decoder, index, loader}; use session::search_paths::PathKind; use util::nodemap::{FnvHashMap, NodeMap, NodeSet}; @@ -65,6 +65,7 @@ pub struct crate_metadata { pub codemap_import_info: RefCell>, pub span: codemap::Span, pub staged_api: bool, + pub index: index::Index, /// Flag if this crate is required by an rlib version of this crate, or in /// other words whether it was explicitly linked to. An example of a crate diff --git a/src/librustc/metadata/decoder.rs b/src/librustc/metadata/decoder.rs index 5991b79896b..309a18feb9a 100644 --- a/src/librustc/metadata/decoder.rs +++ b/src/librustc/metadata/decoder.rs @@ -26,6 +26,7 @@ use metadata::csearch::MethodInfo; use metadata::csearch; use metadata::cstore; use metadata::encoder::def_to_u64; +use metadata::index; use metadata::inline::InlinedItem; use metadata::tydecode::TyDecoder; use middle::def; @@ -37,12 +38,9 @@ use middle::ty::{self, RegionEscape, Ty}; use util::nodemap::FnvHashMap; use std::cell::{Cell, RefCell}; -use std::collections::HashMap; -use std::hash::{Hash, SipHasher, Hasher}; use std::io::prelude::*; use std::io; use std::rc::Rc; -use std::slice::bytes; use std::str; use rbml::reader; @@ -59,57 +57,24 @@ use syntax::ptr::P; pub type Cmd<'a> = &'a crate_metadata; -// A function that takes a def_id relative to the crate being searched and -// returns a def_id relative to the compilation environment, i.e. if we hit a -// def_id for an item defined in another crate, somebody needs to figure out -// what crate that's in and give us a def_id that makes sense for the current -// build. - -fn u32_from_be_bytes(bytes: &[u8]) -> u32 { - let mut b = [0; 4]; - bytes::copy_memory(&bytes[..4], &mut b); - unsafe { (*(b.as_ptr() as *const u32)).to_be() } -} - -fn lookup_hash<'a, F>(d: rbml::Doc<'a>, mut eq_fn: F, hash: u64) -> Option> where - F: FnMut(&[u8]) -> bool, -{ - let index = reader::get_doc(d, tag_index); - let table = reader::get_doc(index, tag_index_table); - let hash_pos = table.start + (hash % 256 * 4) as usize; - let pos = u32_from_be_bytes(&d.data[hash_pos..]) as usize; - let tagged_doc = reader::doc_at(d.data, pos).unwrap(); - - reader::tagged_docs(tagged_doc.doc, tag_index_buckets_bucket_elt).find(|elt| { - eq_fn(&elt.data[elt.start + 4 .. elt.end]) - }).map(|elt| { - let pos = u32_from_be_bytes(&elt.data[elt.start..]) as usize; - reader::doc_at(d.data, pos).unwrap().doc - }) -} - -pub fn maybe_find_item<'a>(item_id: ast::NodeId, - items: rbml::Doc<'a>) -> Option> { - fn eq_item(bytes: &[u8], item_id: ast::NodeId) -> bool { - u32_from_be_bytes(bytes) == item_id +impl crate_metadata { + fn get_item(&self, item_id: ast::NodeId) -> Option { + self.index.lookup_item(self.data(), item_id).map(|pos| { + reader::doc_at(self.data(), pos as usize).unwrap().doc + }) } - let mut s = SipHasher::new_with_keys(0, 0); - (item_id as i64).hash(&mut s); - lookup_hash(items, |a| eq_item(a, item_id), s.finish()) -} -fn find_item<'a>(item_id: ast::NodeId, items: rbml::Doc<'a>) -> rbml::Doc<'a> { - match maybe_find_item(item_id, items) { - None => panic!("lookup_item: id not found: {}", item_id), - Some(d) => d + fn lookup_item(&self, item_id: ast::NodeId) -> rbml::Doc { + match self.get_item(item_id) { + None => panic!("lookup_item: id not found: {}", item_id), + Some(d) => d + } } } -// Looks up an item in the given metadata and returns an rbml doc pointing -// to the item data. -fn lookup_item<'a>(item_id: ast::NodeId, data: &'a [u8]) -> rbml::Doc<'a> { - let items = reader::get_doc(rbml::Doc::new(data), tag_items); - find_item(item_id, items) +pub fn load_index(data: &[u8]) -> index::Index { + let index = reader::get_doc(rbml::Doc::new(data), tag_index); + index::Index::from_buf(index.data, index.start, index.end) } #[derive(Debug, PartialEq)] @@ -380,7 +345,7 @@ pub fn get_trait_def<'tcx>(cdata: Cmd, item_id: ast::NodeId, tcx: &ty::ctxt<'tcx>) -> ty::TraitDef<'tcx> { - let item_doc = lookup_item(item_id, cdata.data()); + let item_doc = cdata.lookup_item(item_id); let generics = doc_generics(item_doc, tcx, cdata, tag_item_generics); let unsafety = parse_unsafety(item_doc); let associated_type_names = parse_associated_type_names(item_doc); @@ -410,7 +375,7 @@ pub fn get_adt_def<'tcx>(intr: &IdentInterner, let mut disr_val = 0; reader::tagged_docs(doc, tag_items_data_item_variant).map(|p| { let did = translated_def_id(cdata, p); - let item = lookup_item(did.node, cdata.data()); + let item = cdata.lookup_item(did.node); if let Some(disr) = variant_disr_val(item) { disr_val = disr; @@ -459,7 +424,7 @@ pub fn get_adt_def<'tcx>(intr: &IdentInterner, } } - let doc = lookup_item(item_id, cdata.data()); + let doc = cdata.lookup_item(item_id); let did = DefId { krate: cdata.cnum, node: item_id }; let (kind, variants) = match item_family(doc) { Enum => (ty::AdtKind::Enum, @@ -516,7 +481,7 @@ pub fn get_predicates<'tcx>(cdata: Cmd, tcx: &ty::ctxt<'tcx>) -> ty::GenericPredicates<'tcx> { - let item_doc = lookup_item(item_id, cdata.data()); + let item_doc = cdata.lookup_item(item_id); doc_predicates(item_doc, tcx, cdata, tag_item_generics) } @@ -525,14 +490,14 @@ pub fn get_super_predicates<'tcx>(cdata: Cmd, tcx: &ty::ctxt<'tcx>) -> ty::GenericPredicates<'tcx> { - let item_doc = lookup_item(item_id, cdata.data()); + let item_doc = cdata.lookup_item(item_id); doc_predicates(item_doc, tcx, cdata, tag_item_super_predicates) } pub fn get_type<'tcx>(cdata: Cmd, id: ast::NodeId, tcx: &ty::ctxt<'tcx>) -> ty::TypeScheme<'tcx> { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let t = item_type(DefId { krate: cdata.cnum, node: id }, item_doc, tcx, cdata); let generics = doc_generics(item_doc, tcx, cdata, tag_item_generics); @@ -543,7 +508,7 @@ pub fn get_type<'tcx>(cdata: Cmd, id: ast::NodeId, tcx: &ty::ctxt<'tcx>) } pub fn get_stability(cdata: Cmd, id: ast::NodeId) -> Option { - let item = lookup_item(id, cdata.data()); + let item = cdata.lookup_item(id); reader::maybe_get_doc(item, tag_items_data_item_stability).map(|doc| { let mut decoder = reader::Decoder::new(doc); Decodable::decode(&mut decoder).unwrap() @@ -551,7 +516,7 @@ pub fn get_stability(cdata: Cmd, id: ast::NodeId) -> Option { } pub fn get_repr_attrs(cdata: Cmd, id: ast::NodeId) -> Vec { - let item = lookup_item(id, cdata.data()); + let item = cdata.lookup_item(id); match reader::maybe_get_doc(item, tag_items_data_item_repr).map(|doc| { let mut decoder = reader::Decoder::new(doc); Decodable::decode(&mut decoder).unwrap() @@ -565,7 +530,7 @@ pub fn get_impl_polarity<'tcx>(cdata: Cmd, id: ast::NodeId) -> Option { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let fam = item_family(item_doc); match fam { Family::Impl => { @@ -578,7 +543,7 @@ pub fn get_impl_polarity<'tcx>(cdata: Cmd, pub fn get_custom_coerce_unsized_kind<'tcx>(cdata: Cmd, id: ast::NodeId) -> Option { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); reader::maybe_get_doc(item_doc, tag_impl_coerce_unsized_kind).map(|kind_doc| { let mut decoder = reader::Decoder::new(kind_doc); Decodable::decode(&mut decoder).unwrap() @@ -590,7 +555,7 @@ pub fn get_impl_trait<'tcx>(cdata: Cmd, tcx: &ty::ctxt<'tcx>) -> Option> { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let fam = item_family(item_doc); match fam { Family::Impl | Family::DefaultImpl => { @@ -602,8 +567,16 @@ pub fn get_impl_trait<'tcx>(cdata: Cmd, } } -pub fn get_symbol(data: &[u8], id: ast::NodeId) -> String { - return item_symbol(lookup_item(id, data)); +pub fn get_symbol(cdata: Cmd, id: ast::NodeId) -> String { + return item_symbol(cdata.lookup_item(id)); +} + +/// If you have a crate_metadata, call get_symbol instead +pub fn get_symbol_from_buf(data: &[u8], id: ast::NodeId) -> String { + let index = load_index(data); + let pos = index.lookup_item(data, id).unwrap(); + let doc = reader::doc_at(data, pos as usize).unwrap().doc; + item_symbol(doc) } // Something that a name can resolve to. @@ -655,10 +628,8 @@ fn each_child_of_item_or_crate(intr: Rc, None => cdata }; - let other_crates_items = reader::get_doc(rbml::Doc::new(crate_data.data()), tag_items); - // Get the item. - match maybe_find_item(child_def_id.node, other_crates_items) { + match crate_data.get_item(child_def_id.node) { None => {} Some(child_item_doc) => { // Hand off the item to the callback. @@ -676,13 +647,12 @@ fn each_child_of_item_or_crate(intr: Rc, for inherent_impl_def_id_doc in reader::tagged_docs(item_doc, tag_items_data_item_inherent_impl) { let inherent_impl_def_id = item_def_id(inherent_impl_def_id_doc, cdata); - let items = reader::get_doc(rbml::Doc::new(cdata.data()), tag_items); - if let Some(inherent_impl_doc) = maybe_find_item(inherent_impl_def_id.node, items) { + if let Some(inherent_impl_doc) = cdata.get_item(inherent_impl_def_id.node) { for impl_item_def_id_doc in reader::tagged_docs(inherent_impl_doc, tag_item_impl_item) { let impl_item_def_id = item_def_id(impl_item_def_id_doc, cdata); - if let Some(impl_method_doc) = maybe_find_item(impl_item_def_id.node, items) { + if let Some(impl_method_doc) = cdata.get_item(impl_item_def_id.node) { if let StaticMethod = item_family(impl_method_doc) { // Hand off the static method to the callback. let static_method_name = item_name(&*intr, impl_method_doc); @@ -717,10 +687,8 @@ fn each_child_of_item_or_crate(intr: Rc, None => cdata }; - let other_crates_items = reader::get_doc(rbml::Doc::new(crate_data.data()), tag_items); - // Get the item. - if let Some(child_item_doc) = maybe_find_item(child_def_id.node, other_crates_items) { + if let Some(child_item_doc) = crate_data.get_item(child_def_id.node) { // Hand off the item to the callback. let def_like = item_to_def_like(crate_data, child_item_doc, child_def_id); // These items have a public visibility because they're part of @@ -740,9 +708,7 @@ pub fn each_child_of_item(intr: Rc, G: FnMut(ast::CrateNum) -> Rc, { // Find the item. - let root_doc = rbml::Doc::new(cdata.data()); - let items = reader::get_doc(root_doc, tag_items); - let item_doc = match maybe_find_item(id, items) { + let item_doc = match cdata.get_item(id) { None => return, Some(item_doc) => item_doc, }; @@ -775,11 +741,11 @@ pub fn each_top_level_item_of_crate(intr: Rc, } pub fn get_item_path(cdata: Cmd, id: ast::NodeId) -> Vec { - item_path(lookup_item(id, cdata.data())) + item_path(cdata.lookup_item(id)) } pub fn get_item_name(intr: &IdentInterner, cdata: Cmd, id: ast::NodeId) -> ast::Name { - item_name(intr, lookup_item(id, cdata.data())) + item_name(intr, cdata.lookup_item(id)) } pub type DecodeInlinedItem<'a> = @@ -793,14 +759,14 @@ pub fn maybe_get_item_ast<'tcx>(cdata: Cmd, tcx: &ty::ctxt<'tcx>, id: ast::NodeI mut decode_inlined_item: DecodeInlinedItem) -> csearch::FoundAst<'tcx> { debug!("Looking up item: {}", id); - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let path = item_path(item_doc).split_last().unwrap().1.to_vec(); match decode_inlined_item(cdata, tcx, path, item_doc) { Ok(ii) => csearch::FoundAst::Found(ii), Err(path) => { match item_parent_item(cdata, item_doc) { Some(did) => { - let parent_item = lookup_item(did.node, cdata.data()); + let parent_item = cdata.lookup_item(did.node); match decode_inlined_item(cdata, tcx, path, parent_item) { Ok(ii) => csearch::FoundAst::FoundParent(did, ii), Err(_) => csearch::FoundAst::NotFound @@ -842,7 +808,7 @@ fn get_explicit_self(item: rbml::Doc) -> ty::ExplicitSelfCategory { /// Returns the def IDs of all the items in the given implementation. pub fn get_impl_items(cdata: Cmd, impl_id: ast::NodeId) -> Vec { - reader::tagged_docs(lookup_item(impl_id, cdata.data()), tag_item_impl_item).map(|doc| { + reader::tagged_docs(cdata.lookup_item(impl_id), tag_item_impl_item).map(|doc| { let def_id = item_def_id(doc, cdata); match item_sort(doc) { Some('C') => ty::ConstTraitItemId(def_id), @@ -857,12 +823,12 @@ pub fn get_trait_name(intr: Rc, cdata: Cmd, id: ast::NodeId) -> ast::Name { - let doc = lookup_item(id, cdata.data()); + let doc = cdata.lookup_item(id); item_name(&*intr, doc) } pub fn is_static_method(cdata: Cmd, id: ast::NodeId) -> bool { - let doc = lookup_item(id, cdata.data()); + let doc = cdata.lookup_item(id); match item_sort(doc) { Some('r') | Some('p') => { get_explicit_self(doc) == ty::StaticExplicitSelfCategory @@ -876,12 +842,12 @@ pub fn get_impl_or_trait_item<'tcx>(intr: Rc, id: ast::NodeId, tcx: &ty::ctxt<'tcx>) -> ty::ImplOrTraitItem<'tcx> { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let def_id = item_def_id(item_doc, cdata); let container_id = item_require_parent_item(cdata, item_doc); - let container_doc = lookup_item(container_id.node, cdata.data()); + let container_doc = cdata.lookup_item(container_id.node); let container = match item_family(container_doc) { Trait => TraitContainer(container_id), _ => ImplContainer(container_id), @@ -936,8 +902,7 @@ pub fn get_impl_or_trait_item<'tcx>(intr: Rc, pub fn get_trait_item_def_ids(cdata: Cmd, id: ast::NodeId) -> Vec { - let data = cdata.data(); - let item = lookup_item(id, data); + let item = cdata.lookup_item(id); reader::tagged_docs(item, tag_item_trait_item).map(|mth| { let def_id = item_def_id(mth, cdata); match item_sort(mth) { @@ -950,8 +915,7 @@ pub fn get_trait_item_def_ids(cdata: Cmd, id: ast::NodeId) } pub fn get_item_variances(cdata: Cmd, id: ast::NodeId) -> ty::ItemVariances { - let data = cdata.data(); - let item_doc = lookup_item(id, data); + let item_doc = cdata.lookup_item(id); let variance_doc = reader::get_doc(item_doc, tag_item_variances); let mut decoder = reader::Decoder::new(variance_doc); Decodable::decode(&mut decoder).unwrap() @@ -962,12 +926,11 @@ pub fn get_provided_trait_methods<'tcx>(intr: Rc, id: ast::NodeId, tcx: &ty::ctxt<'tcx>) -> Vec>> { - let data = cdata.data(); - let item = lookup_item(id, data); + let item = cdata.lookup_item(id); reader::tagged_docs(item, tag_item_trait_item).filter_map(|mth_id| { let did = item_def_id(mth_id, cdata); - let mth = lookup_item(did.node, data); + let mth = cdata.lookup_item(did.node); if item_sort(mth) == Some('p') { let trait_item = get_impl_or_trait_item(intr.clone(), @@ -990,13 +953,12 @@ pub fn get_associated_consts<'tcx>(intr: Rc, id: ast::NodeId, tcx: &ty::ctxt<'tcx>) -> Vec>> { - let data = cdata.data(); - let item = lookup_item(id, data); + let item = cdata.lookup_item(id); [tag_item_trait_item, tag_item_impl_item].iter().flat_map(|&tag| { reader::tagged_docs(item, tag).filter_map(|ac_id| { let did = item_def_id(ac_id, cdata); - let ac_doc = lookup_item(did.node, data); + let ac_doc = cdata.lookup_item(did.node); if item_sort(ac_doc) == Some('C') { let trait_item = get_impl_or_trait_item(intr.clone(), @@ -1017,7 +979,7 @@ pub fn get_associated_consts<'tcx>(intr: Rc, pub fn get_type_name_if_impl(cdata: Cmd, node_id: ast::NodeId) -> Option { - let item = lookup_item(node_id, cdata.data()); + let item = cdata.lookup_item(node_id); if item_family(item) != Impl { return None; } @@ -1031,7 +993,7 @@ pub fn get_methods_if_impl(intr: Rc, cdata: Cmd, node_id: ast::NodeId) -> Option > { - let item = lookup_item(node_id, cdata.data()); + let item = cdata.lookup_item(node_id); if item_family(item) != Impl { return None; } @@ -1046,7 +1008,7 @@ pub fn get_methods_if_impl(intr: Rc, let mut impl_methods = Vec::new(); for impl_method_id in impl_method_ids { - let impl_method_doc = lookup_item(impl_method_id.node, cdata.data()); + let impl_method_doc = cdata.lookup_item(impl_method_id.node); let family = item_family(impl_method_doc); match family { StaticMethod | Method => { @@ -1069,7 +1031,7 @@ pub fn get_tuple_struct_definition_if_ctor(cdata: Cmd, node_id: ast::NodeId) -> Option { - let item = lookup_item(node_id, cdata.data()); + let item = cdata.lookup_item(node_id); reader::tagged_docs(item, tag_items_data_item_is_tuple_struct_ctor).next().map(|_| { item_require_parent_item(cdata, item) }) @@ -1083,11 +1045,11 @@ pub fn get_item_attrs(cdata: Cmd, // look at the definition let node_id = get_tuple_struct_definition_if_ctor(cdata, orig_node_id); let node_id = node_id.map(|x| x.node).unwrap_or(orig_node_id); - let item = lookup_item(node_id, cdata.data()); + let item = cdata.lookup_item(node_id); get_attributes(item) } -pub fn get_struct_field_attrs(cdata: Cmd) -> HashMap> { +pub fn get_struct_field_attrs(cdata: Cmd) -> FnvHashMap> { let data = rbml::Doc::new(cdata.data()); let fields = reader::get_doc(data, tag_struct_fields); reader::tagged_docs(fields, tag_struct_field).map(|field| { @@ -1107,8 +1069,7 @@ fn struct_field_family_to_visibility(family: Family) -> hir::Visibility { pub fn get_struct_field_names(intr: &IdentInterner, cdata: Cmd, id: ast::NodeId) -> Vec { - let data = cdata.data(); - let item = lookup_item(id, data); + let item = cdata.lookup_item(id); reader::tagged_docs(item, tag_item_field).map(|an_item| { item_name(intr, an_item) }).chain(reader::tagged_docs(item, tag_item_unnamed_field).map(|_| { @@ -1299,7 +1260,7 @@ pub fn each_inherent_implementation_for_type(cdata: Cmd, mut callback: F) where F: FnMut(DefId), { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); for impl_doc in reader::tagged_docs(item_doc, tag_items_data_item_inherent_impl) { if reader::maybe_get_doc(impl_doc, tag_item_trait_ref).is_none() { callback(item_def_id(impl_doc, cdata)); @@ -1313,7 +1274,7 @@ pub fn each_implementation_for_trait(cdata: Cmd, F: FnMut(DefId), { if cdata.cnum == def_id.krate { - let item_doc = lookup_item(def_id.node, cdata.data()); + let item_doc = cdata.lookup_item(def_id.node); for impl_doc in reader::tagged_docs(item_doc, tag_items_data_item_extension_impl) { callback(item_def_id(impl_doc, cdata)); } @@ -1337,12 +1298,12 @@ pub fn each_implementation_for_trait(cdata: Cmd, pub fn get_trait_of_item(cdata: Cmd, id: ast::NodeId, tcx: &ty::ctxt) -> Option { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); let parent_item_id = match item_parent_item(cdata, item_doc) { None => return None, Some(item_id) => item_id, }; - let parent_item_doc = lookup_item(parent_item_id.node, cdata.data()); + let parent_item_doc = cdata.lookup_item(parent_item_id.node); match item_family(parent_item_doc) { Trait => Some(item_def_id(parent_item_doc, cdata)), Impl | DefaultImpl => { @@ -1423,7 +1384,7 @@ pub fn get_missing_lang_items(cdata: Cmd) } pub fn get_method_arg_names(cdata: Cmd, id: ast::NodeId) -> Vec { - let method_doc = lookup_item(id, cdata.data()); + let method_doc = cdata.lookup_item(id); match reader::maybe_get_doc(method_doc, tag_method_argument_names) { Some(args_doc) => { reader::tagged_docs(args_doc, tag_method_argument_name).map(|name_doc| { @@ -1446,7 +1407,7 @@ pub fn get_reachable_ids(cdata: Cmd) -> Vec { } pub fn is_typedef(cdata: Cmd, id: ast::NodeId) -> bool { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); match item_family(item_doc) { Type => true, _ => false, @@ -1454,7 +1415,7 @@ pub fn is_typedef(cdata: Cmd, id: ast::NodeId) -> bool { } pub fn is_const_fn(cdata: Cmd, id: ast::NodeId) -> bool { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); match fn_constness(item_doc) { hir::Constness::Const => true, hir::Constness::NotConst => false, @@ -1462,7 +1423,7 @@ pub fn is_const_fn(cdata: Cmd, id: ast::NodeId) -> bool { } pub fn is_impl(cdata: Cmd, id: ast::NodeId) -> bool { - let item_doc = lookup_item(id, cdata.data()); + let item_doc = cdata.lookup_item(id); match item_family(item_doc) { Impl => true, _ => false, @@ -1543,14 +1504,14 @@ fn doc_predicates<'tcx>(base_doc: rbml::Doc, } pub fn is_defaulted_trait(cdata: Cmd, trait_id: ast::NodeId) -> bool { - let trait_doc = lookup_item(trait_id, cdata.data()); + let trait_doc = cdata.lookup_item(trait_id); assert!(item_family(trait_doc) == Family::Trait); let defaulted_doc = reader::get_doc(trait_doc, tag_defaulted_trait); reader::doc_as_u8(defaulted_doc) != 0 } pub fn is_default_impl(cdata: Cmd, impl_id: ast::NodeId) -> bool { - let impl_doc = lookup_item(impl_id, cdata.data()); + let impl_doc = cdata.lookup_item(impl_id); item_family(impl_doc) == Family::DefaultImpl } @@ -1565,9 +1526,7 @@ pub fn get_imported_filemaps(metadata: &[u8]) -> Vec { } pub fn is_extern_fn(cdata: Cmd, id: ast::NodeId, tcx: &ty::ctxt) -> bool { - let root_doc = rbml::Doc::new(cdata.data()); - let items = reader::get_doc(root_doc, tag_items); - let item_doc = match maybe_find_item(id, items) { + let item_doc = match cdata.get_item(id) { Some(doc) => doc, None => return false, }; diff --git a/src/librustc/metadata/encoder.rs b/src/librustc/metadata/encoder.rs index 563cb1cbe73..ff24541692e 100644 --- a/src/librustc/metadata/encoder.rs +++ b/src/librustc/metadata/encoder.rs @@ -19,6 +19,7 @@ use metadata::common::*; use metadata::cstore; use metadata::decoder; use metadata::tyencode; +use metadata::index::{self, IndexEntry}; use metadata::inline::InlinedItemRef; use middle::def; use middle::def_id::{DefId, LOCAL_CRATE}; @@ -29,7 +30,6 @@ use util::nodemap::{FnvHashMap, NodeMap, NodeSet}; use serialize::Encodable; use std::cell::RefCell; -use std::hash::{Hash, Hasher, SipHasher}; use std::io::prelude::*; use std::io::{Cursor, SeekFrom}; use std::rc::Rc; @@ -86,12 +86,6 @@ fn encode_def_id(rbml_w: &mut Encoder, id: DefId) { rbml_w.wr_tagged_u64(tag_def_id, def_to_u64(id)); } -#[derive(Clone)] -struct entry { - val: T, - pos: u64 -} - fn encode_trait_ref<'a, 'tcx>(rbml_w: &mut Encoder, ecx: &EncodeContext<'a, 'tcx>, trait_ref: ty::TraitRef<'tcx>, @@ -279,7 +273,7 @@ fn encode_enum_variant_info(ecx: &EncodeContext, rbml_w: &mut Encoder, id: NodeId, vis: ast::Visibility, - index: &mut Vec>) { + index: &mut Vec) { debug!("encode_enum_variant_info(id={})", id); let mut disr_val = 0; @@ -296,8 +290,8 @@ fn encode_enum_variant_info(ecx: &EncodeContext, } } - index.push(entry { - val: vid.node as i64, + index.push(IndexEntry { + node: vid.node, pos: rbml_w.mark_stable_position(), }); rbml_w.start_tag(tag_items_data_item); @@ -625,13 +619,13 @@ fn encode_provided_source(rbml_w: &mut Encoder, fn encode_field<'a, 'tcx>(ecx: &EncodeContext<'a, 'tcx>, rbml_w: &mut Encoder, field: ty::FieldDef<'tcx>, - global_index: &mut Vec>) { + global_index: &mut Vec) { let nm = field.name; let id = field.did.node; let pos = rbml_w.mark_stable_position(); - global_index.push(entry { - val: id as i64, + global_index.push(IndexEntry { + node: id, pos: pos, }); rbml_w.start_tag(tag_items_data_item); @@ -651,10 +645,10 @@ fn encode_info_for_struct_ctor(ecx: &EncodeContext, rbml_w: &mut Encoder, name: Name, ctor_id: NodeId, - index: &mut Vec>, + index: &mut Vec, struct_id: NodeId) { - index.push(entry { - val: ctor_id as i64, + index.push(IndexEntry { + node: ctor_id, pos: rbml_w.mark_stable_position(), }); @@ -989,15 +983,15 @@ fn encode_stability(rbml_w: &mut Encoder, stab_opt: Option<&attr::Stability>) { fn encode_info_for_item(ecx: &EncodeContext, rbml_w: &mut Encoder, item: &ast::Item, - index: &mut Vec>, + index: &mut Vec, path: PathElems, vis: ast::Visibility) { let tcx = ecx.tcx; fn add_to_index(item: &ast::Item, rbml_w: &mut Encoder, - index: &mut Vec>) { - index.push(entry { - val: item.id as i64, + index: &mut Vec) { + index.push(IndexEntry { + node: item.id, pos: rbml_w.mark_stable_position(), }); } @@ -1261,8 +1255,8 @@ fn encode_info_for_item(ecx: &EncodeContext, None }; - index.push(entry { - val: trait_item_def_id.def_id().node as i64, + index.push(IndexEntry { + node: trait_item_def_id.def_id().node, pos: rbml_w.mark_stable_position(), }); @@ -1352,8 +1346,8 @@ fn encode_info_for_item(ecx: &EncodeContext, for (i, &item_def_id) in r.iter().enumerate() { assert_eq!(item_def_id.def_id().krate, LOCAL_CRATE); - index.push(entry { - val: item_def_id.def_id().node as i64, + index.push(IndexEntry { + node: item_def_id.def_id().node, pos: rbml_w.mark_stable_position(), }); @@ -1474,11 +1468,11 @@ fn encode_info_for_item(ecx: &EncodeContext, fn encode_info_for_foreign_item(ecx: &EncodeContext, rbml_w: &mut Encoder, nitem: &ast::ForeignItem, - index: &mut Vec>, + index: &mut Vec, path: PathElems, abi: abi::Abi) { - index.push(entry { - val: nitem.id as i64, + index.push(IndexEntry { + node: nitem.id, pos: rbml_w.mark_stable_position(), }); @@ -1522,7 +1516,7 @@ fn my_visit_expr(_e: &ast::Expr) { } fn my_visit_item(i: &ast::Item, rbml_w: &mut Encoder, ecx: &EncodeContext, - index: &mut Vec>) { + index: &mut Vec) { ecx.tcx.map.with_path(i.id, |path| { encode_info_for_item(ecx, rbml_w, i, index, path, i.vis); }); @@ -1531,7 +1525,7 @@ fn my_visit_item(i: &ast::Item, fn my_visit_foreign_item(ni: &ast::ForeignItem, rbml_w: &mut Encoder, ecx: &EncodeContext, - index: &mut Vec>) { + index: &mut Vec) { debug!("writing foreign item {}::{}", ecx.tcx.map.path_to_string(ni.id), ni.ident); @@ -1547,7 +1541,7 @@ fn my_visit_foreign_item(ni: &ast::ForeignItem, struct EncodeVisitor<'a, 'b:'a, 'c:'a, 'tcx:'c> { rbml_w_for_visit_item: &'a mut Encoder<'b>, ecx: &'a EncodeContext<'c,'tcx>, - index: &'a mut Vec>, + index: &'a mut Vec, } impl<'a, 'b, 'c, 'tcx, 'v> Visitor<'v> for EncodeVisitor<'a, 'b, 'c, 'tcx> { @@ -1574,11 +1568,11 @@ impl<'a, 'b, 'c, 'tcx, 'v> Visitor<'v> for EncodeVisitor<'a, 'b, 'c, 'tcx> { fn encode_info_for_items(ecx: &EncodeContext, rbml_w: &mut Encoder, krate: &ast::Crate) - -> Vec> { + -> Vec { let mut index = Vec::new(); rbml_w.start_tag(tag_items_data); - index.push(entry { - val: CRATE_NODE_ID as i64, + index.push(IndexEntry { + node: CRATE_NODE_ID, pos: rbml_w.mark_stable_position(), }); encode_info_for_mod(ecx, @@ -1601,62 +1595,13 @@ fn encode_info_for_items(ecx: &EncodeContext, } -// Path and definition ID indexing -fn encode_index(rbml_w: &mut Encoder, index: Vec>, mut write_fn: F) where - F: FnMut(&mut Cursor>, &T), - T: Hash, + +fn encode_index(rbml_w: &mut Encoder, index: Vec) { - let mut buckets: Vec>> = (0..256u16).map(|_| Vec::new()).collect(); - for elt in index { - let mut s = SipHasher::new(); - elt.val.hash(&mut s); - let h = s.finish() as usize; - (&mut buckets[h % 256]).push(elt); - } - rbml_w.start_tag(tag_index); - let mut bucket_locs = Vec::new(); - rbml_w.start_tag(tag_index_buckets); - for bucket in &buckets { - bucket_locs.push(rbml_w.mark_stable_position()); - rbml_w.start_tag(tag_index_buckets_bucket); - for elt in bucket { - rbml_w.start_tag(tag_index_buckets_bucket_elt); - assert!(elt.pos < 0xffff_ffff); - { - let wr: &mut Cursor> = rbml_w.writer; - write_be_u32(wr, elt.pos as u32); - } - write_fn(rbml_w.writer, &elt.val); - rbml_w.end_tag(); - } - rbml_w.end_tag(); - } + index::write_index(index, rbml_w.writer); rbml_w.end_tag(); - rbml_w.start_tag(tag_index_table); - for pos in &bucket_locs { - assert!(*pos < 0xffff_ffff); - let wr: &mut Cursor> = rbml_w.writer; - write_be_u32(wr, *pos as u32); - } - rbml_w.end_tag(); - rbml_w.end_tag(); -} - -fn write_i64(writer: &mut Cursor>, &n: &i64) { - let wr: &mut Cursor> = writer; - assert!(n < 0x7fff_ffff); - write_be_u32(wr, n as u32); -} - -fn write_be_u32(w: &mut Write, u: u32) { - w.write_all(&[ - (u >> 24) as u8, - (u >> 16) as u8, - (u >> 8) as u8, - (u >> 0) as u8, - ]); } fn encode_meta_item(rbml_w: &mut Encoder, mi: &ast::MetaItem) { @@ -2160,11 +2105,11 @@ fn encode_metadata_inner(wr: &mut Cursor>, i = rbml_w.writer.seek(SeekFrom::Current(0)).unwrap(); let items_index = encode_info_for_items(&ecx, &mut rbml_w, krate); stats.item_bytes = rbml_w.writer.seek(SeekFrom::Current(0)).unwrap() - i; + rbml_w.end_tag(); i = rbml_w.writer.seek(SeekFrom::Current(0)).unwrap(); - encode_index(&mut rbml_w, items_index, write_i64); + encode_index(&mut rbml_w, items_index); stats.index_bytes = rbml_w.writer.seek(SeekFrom::Current(0)).unwrap() - i; - rbml_w.end_tag(); encode_struct_field_attrs(&mut rbml_w, krate); diff --git a/src/librustc/metadata/index.rs b/src/librustc/metadata/index.rs new file mode 100644 index 00000000000..b02a9022a7a --- /dev/null +++ b/src/librustc/metadata/index.rs @@ -0,0 +1,208 @@ +// Copyright 2015 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::io::{Cursor, Write}; +use std::slice; +use std::u32; +use syntax::ast::NodeId; + +#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord)] +pub struct IndexEntry { + pub node: NodeId, + pub pos: u64 +} + +#[derive(Debug)] +pub struct IndexArrayEntry { + bits: u32, + first_pos: u32 +} + +impl IndexArrayEntry { + fn encode_to(&self, b: &mut W) { + write_be_u32(b, self.bits); + write_be_u32(b, self.first_pos); + } + + fn decode_from(b: &[u32]) -> Self { + IndexArrayEntry { + bits: b[0].to_be(), + first_pos: b[1].to_be() + } + } +} + +/// The Item Index +/// +/// This index maps the NodeId of each item to its location in the +/// metadata. +/// +/// The index is a sparse bit-vector consisting of a index-array +/// and a position-array. Each entry in the index-array handles 32 nodes. +/// The first word is a bit-array consisting of the nodes that hold items, +/// the second is the index of the first of the items in the position-array. +/// If there is a large set of non-item trailing nodes, they can be omitted +/// from the index-array. +/// +/// The index is serialized as an array of big-endian 32-bit words. +/// The first word is the number of items in the position-array. +/// Then, for each item, its position in the metadata follows. +/// After that the index-array is stored. +/// +/// struct index { +/// u32 item_count; +/// u32 items[self.item_count]; +/// struct { u32 bits; u32 offset; } positions[..]; +/// } +pub struct Index { + position_start: usize, + index_start: usize, + index_end: usize, +} + +pub fn write_index(mut entries: Vec, buf: &mut Cursor>) { + assert!(entries.len() < u32::MAX as usize); + entries.sort(); + + let mut last_entry = IndexArrayEntry { bits: 0, first_pos: 0 }; + + write_be_u32(buf, entries.len() as u32); + for &IndexEntry { pos, .. } in &entries { + assert!(pos < u32::MAX as u64); + write_be_u32(buf, pos as u32); + } + + let mut pos_in_index_array = 0; + for (i, &IndexEntry { node, .. }) in entries.iter().enumerate() { + let (x, s) = (node / 32 as u32, node % 32 as u32); + while x > pos_in_index_array { + pos_in_index_array += 1; + last_entry.encode_to(buf); + last_entry = IndexArrayEntry { bits: 0, first_pos: i as u32 }; + } + last_entry.bits |= 1< Option { + let ix = (i as usize)*2; + if ix >= index.len() { + None + } else { + Some(IndexArrayEntry::decode_from(&index[ix..ix+2])) + } + } + + fn item_from_pos(&self, positions: &[u32], pos: u32) -> u32 { + positions[pos as usize].to_be() + } + + #[inline(never)] + pub fn lookup_item(&self, buf: &[u8], node: NodeId) -> Option { + let index = bytes_to_words(&buf[self.index_start..self.index_end]); + let positions = bytes_to_words(&buf[self.position_start..self.index_start]); + let (x, s) = (node / 32 as u32, node % 32 as u32); + let result = match self.lookup_index(index, x) { + Some(IndexArrayEntry { bits, first_pos }) => { + let bit = 1< None // trailing zero + }; + debug!("lookup_item({:?}) = {:?}", node, result); + result + } + + pub fn from_buf(buf: &[u8], start: usize, end: usize) -> Self { + let buf = bytes_to_words(&buf[start..end]); + let position_count = buf[0].to_be() as usize; + let position_len = position_count*4; + info!("loaded index - position: {}-{}-{}", start, start+position_len, end); + debug!("index contents are {:?}", + buf.iter().map(|b| format!("{:08x}", b)).collect::>().concat()); + assert!(end-4-start >= position_len); + assert_eq!((end-4-start-position_len)%8, 0); + Index { + position_start: start+4, + index_start: start+position_len+4, + index_end: end + } + } +} + +fn write_be_u32(w: &mut W, u: u32) { + let _ = w.write_all(&[ + (u >> 24) as u8, + (u >> 16) as u8, + (u >> 8) as u8, + (u >> 0) as u8, + ]); +} + +fn bytes_to_words(b: &[u8]) -> &[u32] { + assert!(b.len() % 4 == 0); + unsafe { slice::from_raw_parts(b.as_ptr() as *const u32, b.len()/4) } +} + +#[test] +fn test_index() { + let entries = vec![ + IndexEntry { node: 0, pos: 17 }, + IndexEntry { node: 31, pos: 29 }, + IndexEntry { node: 32, pos: 1175 }, + IndexEntry { node: 191, pos: 21 }, + IndexEntry { node: 128, pos: 34 }, + IndexEntry { node: 145, pos: 70 }, + IndexEntry { node: 305, pos: 93214 }, + IndexEntry { node: 138, pos: 64 }, + IndexEntry { node: 129, pos: 53 }, + IndexEntry { node: 192, pos: 33334 }, + IndexEntry { node: 200, pos: 80123 }, + ]; + let mut c = Cursor::new(vec![]); + write_index(entries.clone(), &mut c); + let mut buf = c.into_inner(); + let expected: &[u8] = &[ + 0, 0, 0, 11, // # entries + // values: + 0,0,0,17, 0,0,0,29, 0,0,4,151, 0,0,0,34, + 0,0,0,53, 0,0,0,64, 0,0,0,70, 0,0,0,21, + 0,0,130,54, 0,1,56,251, 0,1,108,30, + // index: + 128,0,0,1,0,0,0,0, 0,0,0,1,0,0,0,2, + 0,0,0,0,0,0,0,3, 0,0,0,0,0,0,0,3, + 0,2,4,3,0,0,0,3, 128,0,0,0,0,0,0,7, + 0,0,1,1,0,0,0,8, 0,0,0,0,0,0,0,10, + 0,0,0,0,0,0,0,10, 0,2,0,0,0,0,0,10 + ]; + assert_eq!(buf, expected); + + // insert some junk padding + for i in 0..17 { buf.insert(0, i); buf.push(i) } + let index = Index::from_buf(&buf, 17, buf.len()-17); + + // test round-trip + for i in 0..4096 { + assert_eq!(index.lookup_item(&buf, i), + entries.iter().find(|e| e.node == i).map(|n| n.pos as u32)); + } +} diff --git a/src/librustc/metadata/mod.rs b/src/librustc/metadata/mod.rs index 44901eb0547..e532388d52e 100644 --- a/src/librustc/metadata/mod.rs +++ b/src/librustc/metadata/mod.rs @@ -16,6 +16,7 @@ pub mod decoder; pub mod creader; pub mod cstore; pub mod csearch; +pub mod index; pub mod loader; pub mod filesearch; pub mod macro_import;