Serialize all foreign SourceFiles into proc-macro crate metadata

Normally, we encode a `Span` that references a foreign `SourceFile` by
encoding information about the foreign crate. When we decode this
`Span`, we lookup the foreign crate in order to decode the `SourceFile`.

However, this approach does not work for proc-macro crates. When we load
a proc-macro crate, we do not deserialzie any of its dependencies (since
a proc-macro crate can only export proc-macros). This means that we
cannot serialize a reference to an upstream crate, since the associated
metadata will not be available when we try to deserialize it.

This commit modifies foreign span handling so that we treat all foreign
`SourceFile`s as local `SourceFile`s when serializing a proc-macro.
All `SourceFile`s will be stored into the metadata of a proc-macro
crate, allowing us to cotinue to deserialize a proc-macro crate without
needing to load any of its dependencies.

Since the number of foreign `SourceFile`s that we load during a
compilation session may be very large, we only serialize a `SourceFile`
if we have also serialized a `Span` which requires it.
This commit is contained in:
Aaron Hill 2020-06-24 13:16:36 -04:00
parent a1528c432e
commit 334373324d
No known key found for this signature in database
GPG Key ID: B4087E510E98B164
13 changed files with 220 additions and 43 deletions

View File

@ -450,19 +450,17 @@ impl<'a, 'tcx> SpecializedDecoder<Span> for DecodeContext<'a, 'tcx> {
let imported_source_files = if tag == TAG_VALID_SPAN_LOCAL {
self.cdata().imported_source_files(sess)
} else {
// FIXME: We don't decode dependencies of proc-macros.
// Remove this once #69976 is merged
// When we encode a proc-macro crate, all `Span`s should be encoded
// with `TAG_VALID_SPAN_LOCAL`
if self.cdata().root.is_proc_macro_crate() {
debug!(
"SpecializedDecoder<Span>::specialized_decode: skipping span for proc-macro crate {:?}",
self.cdata().cnum
);
// Decode `CrateNum` as u32 - using `CrateNum::decode` will ICE
// since we don't have `cnum_map` populated.
// This advances the decoder position so that we can continue
// to read metadata.
let _ = u32::decode(self)?;
return Ok(DUMMY_SP);
let cnum = u32::decode(self)?;
panic!(
"Decoding of crate {:?} tried to access proc-macro dep {:?}",
self.cdata().root.name,
cnum
);
}
// tag is TAG_VALID_SPAN_FOREIGN, checked by `debug_assert` above
let cnum = CrateNum::decode(self)?;
@ -990,8 +988,13 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
DefKind::Macro(macro_kind(raw_macro)),
self.local_def_id(def_index),
);
let ident = Ident::from_str(raw_macro.name());
callback(Export { ident, res, vis: ty::Visibility::Public, span: DUMMY_SP });
let ident = self.item_ident(def_index, sess);
callback(Export {
ident,
res,
vis: ty::Visibility::Public,
span: self.get_span(def_index, sess),
});
}
}
return;

View File

@ -16,6 +16,7 @@ use rustc_hir::intravisit::{self, NestedVisitorMap, Visitor};
use rustc_hir::itemlikevisit::{ItemLikeVisitor, ParItemLikeVisitor};
use rustc_hir::lang_items;
use rustc_hir::{AnonConst, GenericParamKind};
use rustc_index::bit_set::GrowableBitSet;
use rustc_index::vec::Idx;
use rustc_middle::hir::map::Map;
use rustc_middle::middle::cstore::{EncodedMetadata, ForeignModule, LinkagePreference, NativeLib};
@ -51,7 +52,20 @@ struct EncodeContext<'tcx> {
interpret_allocs_inverse: Vec<interpret::AllocId>,
// This is used to speed up Span encoding.
source_file_cache: Lrc<SourceFile>,
// The `usize` is an index into the `MonotonicVec`
// that stores the `SourceFile`
source_file_cache: (Lrc<SourceFile>, usize),
// The indices (into the `SourceMap`'s `MonotonicVec`)
// of all of the `SourceFiles` that we need to serialize.
// When we serialize a `Span`, we insert the index of its
// `SourceFile` into the `GrowableBitSet`.
//
// This needs to be a `GrowableBitSet` and not a
// regular `BitSet` because we may actually import new `SourceFiles`
// during metadata encoding, due to executing a query
// with a result containing a foreign `Span`.
required_source_files: Option<GrowableBitSet<usize>>,
is_proc_macro: bool,
}
macro_rules! encoder_methods {
@ -154,18 +168,23 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// The Span infrastructure should make sure that this invariant holds:
debug_assert!(span.lo <= span.hi);
if !self.source_file_cache.contains(span.lo) {
if !self.source_file_cache.0.contains(span.lo) {
let source_map = self.tcx.sess.source_map();
let source_file_index = source_map.lookup_source_file_idx(span.lo);
self.source_file_cache = source_map.files()[source_file_index].clone();
self.source_file_cache =
(source_map.files()[source_file_index].clone(), source_file_index);
}
if !self.source_file_cache.contains(span.hi) {
if !self.source_file_cache.0.contains(span.hi) {
// Unfortunately, macro expansion still sometimes generates Spans
// that malformed in this way.
return TAG_INVALID_SPAN.encode(self);
}
let source_files = self.required_source_files.as_mut().expect("Already encoded SourceMap!");
// Record the fact that we need to encode the data for this `SourceFile`
source_files.insert(self.source_file_cache.1);
// There are two possible cases here:
// 1. This span comes from a 'foreign' crate - e.g. some crate upstream of the
// crate we are writing metadata for. When the metadata for *this* crate gets
@ -176,7 +195,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// 2. This span comes from our own crate. No special hamdling is needed - we just
// write `TAG_VALID_SPAN_LOCAL` to let the deserializer know that it should use
// our own source map information.
let (tag, lo, hi) = if self.source_file_cache.is_imported() {
//
// If we're a proc-macro crate, we always treat this as a local `Span`.
// In `encode_source_map`, we serialize foreign `SourceFile`s into our metadata
// if we're a proc-macro crate.
// This allows us to avoid loading the dependencies of proc-macro crates: all of
// the information we need to decode `Span`s is stored in the proc-macro crate.
let (tag, lo, hi) = if self.source_file_cache.0.is_imported() && !self.is_proc_macro {
// To simplify deserialization, we 'rebase' this span onto the crate it originally came from
// (the crate that 'owns' the file it references. These rebased 'lo' and 'hi' values
// are relative to the source map information for the 'foreign' crate whose CrateNum
@ -188,13 +213,13 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
// Span that can be used without any additional trouble.
let external_start_pos = {
// Introduce a new scope so that we drop the 'lock()' temporary
match &*self.source_file_cache.external_src.lock() {
match &*self.source_file_cache.0.external_src.lock() {
ExternalSource::Foreign { original_start_pos, .. } => *original_start_pos,
src => panic!("Unexpected external source {:?}", src),
}
};
let lo = (span.lo - self.source_file_cache.start_pos) + external_start_pos;
let hi = (span.hi - self.source_file_cache.start_pos) + external_start_pos;
let lo = (span.lo - self.source_file_cache.0.start_pos) + external_start_pos;
let hi = (span.hi - self.source_file_cache.0.start_pos) + external_start_pos;
(TAG_VALID_SPAN_FOREIGN, lo, hi)
} else {
@ -212,7 +237,7 @@ impl<'tcx> SpecializedEncoder<Span> for EncodeContext<'tcx> {
if tag == TAG_VALID_SPAN_FOREIGN {
// This needs to be two lines to avoid holding the `self.source_file_cache`
// while calling `cnum.encode(self)`
let cnum = self.source_file_cache.cnum;
let cnum = self.source_file_cache.0.cnum;
cnum.encode(self)?;
}
Ok(())
@ -386,17 +411,24 @@ impl<'tcx> EncodeContext<'tcx> {
let all_source_files = source_map.files();
let (working_dir, _cwd_remapped) = self.tcx.sess.working_dir.clone();
// By replacing the `Option` with `None`, we ensure that we can't
// accidentally serialize any more `Span`s after the source map encoding
// is done.
let required_source_files = self.required_source_files.take().unwrap();
let adapted = all_source_files
.iter()
.filter(|source_file| {
// No need to re-export imported source_files, as any downstream
// crate will import them from their original source.
// FIXME(eddyb) the `Span` encoding should take that into account.
!source_file.is_imported()
.enumerate()
.filter(|(idx, source_file)| {
// Only serialize `SourceFile`s that were used
// during the encoding of a `Span`
required_source_files.contains(*idx) &&
// Don't serialize imported `SourceFile`s, unless
// we're in a proc-macro crate.
(!source_file.is_imported() || self.is_proc_macro)
})
.map(|source_file| {
match source_file.name {
.map(|(_, source_file)| {
let mut adapted = match source_file.name {
// This path of this SourceFile has been modified by
// path-remapping, so we use it verbatim (and avoid
// cloning the whole map in the process).
@ -419,15 +451,30 @@ impl<'tcx> EncodeContext<'tcx> {
// expanded code, not from a file
_ => source_file.clone(),
};
// We're serializing this `SourceFile` into our crate metadata,
// so mark it as coming from this crate.
// This also ensures that we don't try to deserialize the
// `CrateNum` for a proc-macro dependency - since proc macro
// dependencies aren't loaded when we deserialize a proc-macro,
// trying to remap the `CrateNum` would fail.
if self.is_proc_macro {
Lrc::make_mut(&mut adapted).cnum = LOCAL_CRATE;
}
adapted
})
.collect::<Vec<_>>();
self.lazy(adapted.iter().map(|rc| &**rc))
}
fn is_proc_macro(&self) -> bool {
self.tcx.sess.crate_types().contains(&CrateType::ProcMacro)
}
fn encode_crate_root(&mut self) -> Lazy<CrateRoot<'tcx>> {
let is_proc_macro = self.tcx.sess.crate_types().contains(&CrateType::ProcMacro);
let is_proc_macro = self.is_proc_macro();
let mut i = self.position();
@ -458,11 +505,6 @@ impl<'tcx> EncodeContext<'tcx> {
let foreign_modules = self.encode_foreign_modules();
// Encode source_map
i = self.position();
let source_map = self.encode_source_map();
let source_map_bytes = self.position() - i;
// Encode DefPathTable
i = self.position();
let def_path_table = self.encode_def_path_table();
@ -514,12 +556,19 @@ impl<'tcx> EncodeContext<'tcx> {
let proc_macro_data_bytes = self.position() - i;
// Encode exported symbols info. This is prefetched in `encode_metadata` so we encode
// this last to give the prefetching as much time as possible to complete.
// this late to give the prefetching as much time as possible to complete.
i = self.position();
let exported_symbols = self.tcx.exported_symbols(LOCAL_CRATE);
let exported_symbols = self.encode_exported_symbols(&exported_symbols);
let exported_symbols_bytes = self.position() - i;
// Encode source_map. This needs to be done last,
// since encoding `Span`s tells us which `SourceFiles` we actually
// need to encode.
i = self.position();
let source_map = self.encode_source_map();
let source_map_bytes = self.position() - i;
let attrs = tcx.hir().krate_attrs();
let has_default_lib_allocator = attr::contains_name(&attrs, sym::default_lib_allocator);
@ -1854,6 +1903,8 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
// Will be filled with the root position after encoding everything.
encoder.emit_raw_bytes(&[0, 0, 0, 0]);
let source_map_files = tcx.sess.source_map().files();
let mut ecx = EncodeContext {
opaque: encoder,
tcx,
@ -1861,10 +1912,13 @@ fn encode_metadata_impl(tcx: TyCtxt<'_>) -> EncodedMetadata {
lazy_state: LazyState::NoNode,
type_shorthands: Default::default(),
predicate_shorthands: Default::default(),
source_file_cache: tcx.sess.source_map().files()[0].clone(),
source_file_cache: (source_map_files[0].clone(), 0),
interpret_allocs: Default::default(),
interpret_allocs_inverse: Default::default(),
required_source_files: Some(GrowableBitSet::with_capacity(source_map_files.len())),
is_proc_macro: tcx.sess.crate_types().contains(&CrateType::ProcMacro),
};
drop(source_map_files);
// Encode the rustc version string in a predictable location.
rustc_version().encode(&mut ecx).unwrap();

View File

@ -192,7 +192,6 @@ crate struct CrateRoot<'tcx> {
diagnostic_items: Lazy<[(Symbol, DefIndex)]>,
native_libraries: Lazy<[NativeLib]>,
foreign_modules: Lazy<[ForeignModule]>,
source_map: Lazy<[rustc_span::SourceFile]>,
def_path_table: Lazy<rustc_hir::definitions::DefPathTable>,
impls: Lazy<[TraitImpls]>,
interpret_alloc_index: Lazy<[u32]>,
@ -203,6 +202,7 @@ crate struct CrateRoot<'tcx> {
proc_macro_data: Option<Lazy<[DefIndex]>>,
exported_symbols: Lazy!([(ExportedSymbol<'tcx>, SymbolExportLevel)]),
source_map: Lazy<[rustc_span::SourceFile]>,
compiler_builtins: bool,
needs_allocator: bool,

View File

@ -395,10 +395,11 @@ pub fn debug_hygiene_data(verbose: bool) -> String {
data.expn_data.iter().enumerate().for_each(|(id, expn_info)| {
let expn_info = expn_info.as_ref().expect("no expansion data for an expansion ID");
s.push_str(&format!(
"\n{}: parent: {:?}, call_site_ctxt: {:?}, kind: {:?}",
"\n{}: parent: {:?}, call_site_ctxt: {:?}, def_site_ctxt: {:?}, kind: {:?}",
id,
expn_info.parent,
expn_info.call_site.ctxt(),
expn_info.def_site.ctxt(),
expn_info.kind,
));
});

View File

@ -40,6 +40,41 @@ pub fn original_sp(sp: Span, enclosing_sp: Span) -> Span {
}
}
pub mod monotonic {
use std::ops::{Deref, DerefMut};
/// A `MonotonicVec` is a `Vec` which can only be grown.
/// Once inserted, an element can never be removed or swapped,
/// guaranteeing that any indices into a `MonotonicVec` are stable
// This is declared in its own module to ensure that the private
// field is inaccessible
pub struct MonotonicVec<T>(Vec<T>);
impl<T> MonotonicVec<T> {
pub fn new(val: Vec<T>) -> MonotonicVec<T> {
MonotonicVec(val)
}
pub fn push(&mut self, val: T) {
self.0.push(val);
}
}
impl<T> Default for MonotonicVec<T> {
fn default() -> Self {
MonotonicVec::new(vec![])
}
}
impl<T> Deref for MonotonicVec<T> {
type Target = Vec<T>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<T> !DerefMut for MonotonicVec<T> {}
}
#[derive(Clone, RustcEncodable, RustcDecodable, Debug, Copy, HashStable_Generic)]
pub struct Spanned<T> {
pub node: T,
@ -125,7 +160,7 @@ impl StableSourceFileId {
#[derive(Default)]
pub(super) struct SourceMapFiles {
source_files: Vec<Lrc<SourceFile>>,
source_files: monotonic::MonotonicVec<Lrc<SourceFile>>,
stable_id_to_source_file: FxHashMap<StableSourceFileId, Lrc<SourceFile>>,
}
@ -199,7 +234,9 @@ impl SourceMap {
Ok(bytes)
}
pub fn files(&self) -> MappedLockGuard<'_, Vec<Lrc<SourceFile>>> {
// By returning a `MonotonicVec`, we ensure that consumers cannot invalidate
// any existing indices pointing into `files`.
pub fn files(&self) -> MappedLockGuard<'_, monotonic::MonotonicVec<Lrc<SourceFile>>> {
LockGuard::map(self.files.borrow(), |files| &mut files.source_files)
}
@ -912,6 +949,8 @@ impl SourceMap {
}
// Returns the index of the `SourceFile` (in `self.files`) that contains `pos`.
// This index is guaranteed to be valid for the lifetime of this `SourceMap`,
// since `source_files` is a `MonotonicVec`
pub fn lookup_source_file_idx(&self, pos: BytePos) -> usize {
self.files
.borrow()

View File

@ -16,8 +16,8 @@ fn y /* 0#0 */() { }
/*
Expansions:
0: parent: ExpnId(0), call_site_ctxt: #0, kind: Root
1: parent: ExpnId(0), call_site_ctxt: #0, kind: Macro(Bang, "foo")
0: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Root
1: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Macro(Bang, "foo")
SyntaxContexts:
#0: parent: #0, outer_mark: (ExpnId(0), Opaque)

View File

@ -0,0 +1,10 @@
#[macro_export]
macro_rules! make_it {
($name:ident) => {
#[proc_macro]
pub fn $name(input: TokenStream) -> TokenStream {
println!("Def site: {:?}", Span::def_site());
input
}
};
}

View File

@ -0,0 +1,12 @@
// force-host
// no-prefer-dynamic
// edition:2018
#![feature(proc_macro_def_site)]
#![crate_type = "proc-macro"]
extern crate proc_macro;
extern crate make_macro;
use proc_macro::{TokenStream, Span};
make_macro::make_it!(print_def_site);

View File

@ -10,11 +10,16 @@ error[E0603]: derive macro import `Empty` is private
LL | use m::Empty;
| ^^^^^ private derive macro import
|
note: the derive macro import `Empty` is defined here
note: the derive macro import `Empty` is defined here...
--> $DIR/disappearing-resolution.rs:9:9
|
LL | use test_macros::Empty;
| ^^^^^^^^^^^^^^^^^^
note: ...and refers to the derive macro `Empty` which is defined here
--> $DIR/auxiliary/test-macros.rs:25:1
|
LL | pub fn empty_derive(_: TokenStream) -> TokenStream {
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ consider importing it directly
error: aborting due to 2 previous errors

View File

@ -0,0 +1,11 @@
// aux-build:make-macro.rs
// aux-build:meta-macro.rs
// edition:2018
// compile-flags: -Z span-debug -Z unpretty=expanded,hygiene
// check-pass
extern crate meta_macro;
fn main() {
meta_macro::print_def_site!();
}

View File

@ -0,0 +1,30 @@
Def site: $DIR/auxiliary/make-macro.rs:5:9: 8:10 (#3)
#![feature /* 280#0 */(prelude_import)]
#[prelude_import /* 527#1 */]
use std /* 687#1 */::prelude /* 526#1 */::v1 /* 783#1 */::*;
#[macro_use /* 404#1 */]
extern crate std /* 687#1 */;
// aux-build:make-macro.rs
// aux-build:meta-macro.rs
// edition:2018
// compile-flags: -Z span-debug -Z unpretty=expanded,hygiene
// check-pass
extern crate meta_macro /* 834#0 */;
fn main /* 406#0 */() { }
/*
Expansions:
0: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Root
1: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: AstPass(StdImports)
2: parent: ExpnId(0), call_site_ctxt: #0, def_site_ctxt: #0, kind: Macro(Bang, "meta_macro::print_def_site")
SyntaxContexts:
#0: parent: #0, outer_mark: (ExpnId(0), Opaque)
#1: parent: #0, outer_mark: (ExpnId(1), Opaque)
#2: parent: #0, outer_mark: (ExpnId(1), Transparent)
#3: parent: #0, outer_mark: (ExpnId(2), Opaque)
#4: parent: #0, outer_mark: (ExpnId(2), Transparent)
#5: parent: #0, outer_mark: (ExpnId(2), SemiTransparent)
*/

View File

@ -0,0 +1,11 @@
// aux-build:make-macro.rs
// aux-build:meta-macro.rs
// edition:2018
// compile-flags: -Z span-debug
// run-pass
extern crate meta_macro;
fn main() {
meta_macro::print_def_site!();
}

View File

@ -0,0 +1 @@
Def site: $DIR/auxiliary/make-macro.rs:5:9: 8:10 (#3)