Auto merge of #68277 - michaelwoerister:re-export-dylib-instances, r=alexcrichton

Make sure that all upstream generics get re-exported from Rust dylibs.

This PR contains a fix for #67276. Rust dylibs would not re-export all generic instances when compiling with `-Zshare-generics=on` (=default for debug builds) which could lead to situations where the compiler expected certain generic instances to be available but then the linker would not find them.

### TODO
- [x] Write a regression test based on the description [here](https://github.com/rust-lang/rust/issues/67276#issuecomment-574613457).
- [x] Find out if this also fixes other issues related to https://github.com/rust-lang/rust/issues/64319.

r? @alexcrichton ~~(once the TODOs are done)~~
cc @pnkfelix @AlexKornitzer
This commit is contained in:
bors 2020-01-20 13:37:03 +00:00
commit 66b0c97070
16 changed files with 270 additions and 51 deletions

View File

@ -32,7 +32,9 @@ pub enum ExportedSymbol<'tcx> {
}
impl<'tcx> ExportedSymbol<'tcx> {
pub fn symbol_name(&self, tcx: TyCtxt<'tcx>) -> ty::SymbolName {
/// This is the symbol name of an instance if it is instantiated in the
/// local crate.
pub fn symbol_name_for_local_instance(&self, tcx: TyCtxt<'tcx>) -> ty::SymbolName {
match *self {
ExportedSymbol::NonGeneric(def_id) => tcx.symbol_name(ty::Instance::mono(tcx, def_id)),
ExportedSymbol::Generic(def_id, substs) => {
@ -50,9 +52,22 @@ impl<'tcx> ExportedSymbol<'tcx> {
}
ExportedSymbol::Generic(..) | ExportedSymbol::NoDefId(_) => cmp::Ordering::Less,
},
ExportedSymbol::Generic(..) => match *other {
ExportedSymbol::Generic(self_def_id, self_substs) => match *other {
ExportedSymbol::NonGeneric(_) => cmp::Ordering::Greater,
ExportedSymbol::Generic(..) => self.symbol_name(tcx).cmp(&other.symbol_name(tcx)),
ExportedSymbol::Generic(other_def_id, other_substs) => {
// We compare the symbol names because they are cached as query
// results which makes them relatively cheap to access repeatedly.
//
// It might be even faster to build a local cache of stable IDs
// for sorting. Exported symbols are really only sorted once
// in order to make the `exported_symbols` query result stable.
let self_symbol_name =
tcx.symbol_name(ty::Instance::new(self_def_id, self_substs));
let other_symbol_name =
tcx.symbol_name(ty::Instance::new(other_def_id, other_substs));
self_symbol_name.cmp(&other_symbol_name)
}
ExportedSymbol::NoDefId(_) => cmp::Ordering::Less,
},
ExportedSymbol::NoDefId(self_symbol_name) => match *other {

View File

@ -557,6 +557,9 @@ rustc_queries! {
desc { |tcx| "generating MIR shim for `{}`", tcx.def_path_str(key.def_id()) }
}
/// The `symbol_name` query provides the symbol name for calling a
/// given instance from the local crate. In particular, it will also
/// look up the correct symbol name of instances from upstream crates.
query symbol_name(key: ty::Instance<'tcx>) -> ty::SymbolName {
no_force
desc { "computing the symbol for `{}`", key }
@ -971,6 +974,11 @@ rustc_queries! {
}
Linking {
/// The list of symbols exported from the given crate.
///
/// - All names contained in `exported_symbols(cnum)` are guaranteed to
/// correspond to a publicly visible symbol in `cnum` machine code.
/// - The `exported_symbols` sets of different crates do not intersect.
query exported_symbols(_: CrateNum)
-> Arc<Vec<(ExportedSymbol<'tcx>, SymbolExportLevel)>> {
desc { "exported_symbols" }

View File

@ -1103,7 +1103,11 @@ fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<String> {
let export_threshold = symbol_export::crates_export_threshold(&[crate_type]);
for &(symbol, level) in tcx.exported_symbols(LOCAL_CRATE).iter() {
if level.is_below_threshold(export_threshold) {
symbols.push(symbol.symbol_name(tcx).to_string());
symbols.push(symbol_export::symbol_name_for_instance_in_crate(
tcx,
symbol,
LOCAL_CRATE,
));
}
}
@ -1124,12 +1128,7 @@ fn exported_symbols(tcx: TyCtxt<'_>, crate_type: CrateType) -> Vec<String> {
continue;
}
// FIXME rust-lang/rust#64319, rust-lang/rust#64872:
// We want to block export of generics from dylibs,
// but we must fix rust-lang/rust#65890 before we can
// do that robustly.
symbols.push(symbol.symbol_name(tcx).to_string());
symbols.push(symbol_export::symbol_name_for_instance_in_crate(tcx, symbol, cnum));
}
}
}

View File

@ -8,6 +8,7 @@ use rustc::ty::query::Providers;
use rustc::ty::subst::SubstsRef;
use rustc::ty::Instance;
use rustc::ty::{SymbolName, TyCtxt};
use rustc_codegen_utils::symbol_names;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir;
@ -358,3 +359,32 @@ fn symbol_export_level(tcx: TyCtxt<'_>, sym_def_id: DefId) -> SymbolExportLevel
SymbolExportLevel::Rust
}
}
/// This is the symbol name of the given instance instantiated in a specific crate.
pub fn symbol_name_for_instance_in_crate<'tcx>(
tcx: TyCtxt<'tcx>,
symbol: ExportedSymbol<'tcx>,
instantiating_crate: CrateNum,
) -> String {
// If this is something instantiated in the local crate then we might
// already have cached the name as a query result.
if instantiating_crate == LOCAL_CRATE {
return symbol.symbol_name_for_local_instance(tcx).to_string();
}
// This is something instantiated in an upstream crate, so we have to use
// the slower (because uncached) version of computing the symbol name.
match symbol {
ExportedSymbol::NonGeneric(def_id) => symbol_names::symbol_name_for_instance_in_crate(
tcx,
Instance::mono(tcx, def_id),
instantiating_crate,
),
ExportedSymbol::Generic(def_id, substs) => symbol_names::symbol_name_for_instance_in_crate(
tcx,
Instance::new(def_id, substs),
instantiating_crate,
),
ExportedSymbol::NoDefId(symbol_name) => symbol_name.to_string(),
}
}

View File

@ -2,7 +2,7 @@ use super::command::Command;
use super::link::{self, get_linker, remove};
use super::linker::LinkerInfo;
use super::lto::{self, SerializedModule};
use super::symbol_export::ExportedSymbols;
use super::symbol_export::{symbol_name_for_instance_in_crate, ExportedSymbols};
use crate::{
CachedModuleCodegen, CodegenResults, CompiledModule, CrateInfo, ModuleCodegen, ModuleKind,
RLIB_BYTECODE_EXTENSION,
@ -956,7 +956,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
let symbols = tcx
.exported_symbols(cnum)
.iter()
.map(|&(s, lvl)| (s.symbol_name(tcx).to_string(), lvl))
.map(|&(s, lvl)| (symbol_name_for_instance_in_crate(tcx, s, cnum), lvl))
.collect();
Arc::new(symbols)
};

View File

@ -91,8 +91,9 @@ use rustc::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc::mir::mono::{InstantiationMode, MonoItem};
use rustc::session::config::SymbolManglingVersion;
use rustc::ty::query::Providers;
use rustc::ty::subst::SubstsRef;
use rustc::ty::{self, Instance, TyCtxt};
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_hir::def_id::{CrateNum, LOCAL_CRATE};
use rustc_hir::Node;
use rustc_span::symbol::Symbol;
@ -102,15 +103,70 @@ use log::debug;
mod legacy;
mod v0;
pub fn provide(providers: &mut Providers<'_>) {
*providers = Providers {
symbol_name: |tcx, instance| ty::SymbolName { name: symbol_name(tcx, instance) },
..*providers
};
/// This function computes the symbol name for the given `instance` and the
/// given instantiating crate. That is, if you know that instance X is
/// instantiated in crate Y, this is the symbol name this instance would have.
pub fn symbol_name_for_instance_in_crate(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
instantiating_crate: CrateNum,
) -> String {
compute_symbol_name(tcx, instance, || instantiating_crate)
}
fn symbol_name(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Symbol {
pub fn provide(providers: &mut Providers<'_>) {
*providers = Providers { symbol_name: symbol_name_provider, ..*providers };
}
// The `symbol_name` query provides the symbol name for calling a given
// instance from the local crate. In particular, it will also look up the
// correct symbol name of instances from upstream crates.
fn symbol_name_provider(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> ty::SymbolName {
let symbol_name = compute_symbol_name(tcx, instance, || {
// This closure determines the instantiating crate for instances that
// need an instantiating-crate-suffix for their symbol name, in order
// to differentiate between local copies.
//
// For generics we might find re-usable upstream instances. For anything
// else we rely on their being a local copy available.
if is_generic(instance.substs) {
let def_id = instance.def_id();
if !def_id.is_local() && tcx.sess.opts.share_generics() {
// If we are re-using a monomorphization from another crate,
// we have to compute the symbol hash accordingly.
let upstream_monomorphizations = tcx.upstream_monomorphizations_for(def_id);
upstream_monomorphizations
.and_then(|monos| monos.get(&instance.substs).cloned())
// If there is no instance available upstream, there'll be
// one in the current crate.
.unwrap_or(LOCAL_CRATE)
} else {
// For generic functions defined in the current crate, there
// can be no upstream instances. Also, if we don't share
// generics, we'll instantiate a local copy too.
LOCAL_CRATE
}
} else {
// For non-generic things that need to avoid naming conflicts, we
// always instantiate a copy in the local crate.
LOCAL_CRATE
}
});
ty::SymbolName { name: Symbol::intern(&symbol_name) }
}
/// Computes the symbol name for the given instance. This function will call
/// `compute_instantiating_crate` if it needs to factor the instantiating crate
/// into the symbol name.
fn compute_symbol_name(
tcx: TyCtxt<'tcx>,
instance: Instance<'tcx>,
compute_instantiating_crate: impl FnOnce() -> CrateNum,
) -> String {
let def_id = instance.def_id();
let substs = instance.substs;
@ -121,11 +177,11 @@ fn symbol_name(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Symbol {
if def_id.is_local() {
if tcx.plugin_registrar_fn(LOCAL_CRATE) == Some(def_id) {
let disambiguator = tcx.sess.local_crate_disambiguator();
return Symbol::intern(&tcx.sess.generate_plugin_registrar_symbol(disambiguator));
return tcx.sess.generate_plugin_registrar_symbol(disambiguator);
}
if tcx.proc_macro_decls_static(LOCAL_CRATE) == Some(def_id) {
let disambiguator = tcx.sess.local_crate_disambiguator();
return Symbol::intern(&tcx.sess.generate_proc_macro_decls_symbol(disambiguator));
return tcx.sess.generate_proc_macro_decls_symbol(disambiguator);
}
}
@ -162,29 +218,28 @@ fn symbol_name(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Symbol {
|| !tcx.wasm_import_module_map(def_id.krate).contains_key(&def_id)
{
if let Some(name) = attrs.link_name {
return name;
return name.to_string();
}
return tcx.item_name(def_id);
return tcx.item_name(def_id).to_string();
}
}
if let Some(name) = attrs.export_name {
// Use provided name
return name;
return name.to_string();
}
if attrs.flags.contains(CodegenFnAttrFlags::NO_MANGLE) {
// Don't mangle
return tcx.item_name(def_id);
return tcx.item_name(def_id).to_string();
}
let is_generic = substs.non_erasable_generics().next().is_some();
let avoid_cross_crate_conflicts =
// If this is an instance of a generic function, we also hash in
// the ID of the instantiating crate. This avoids symbol conflicts
// in case the same instances is emitted in two crates of the same
// project.
is_generic ||
is_generic(substs) ||
// If we're dealing with an instance of a function that's inlined from
// another crate but we're marking it as globally shared to our
@ -197,25 +252,8 @@ fn symbol_name(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Symbol {
_ => false,
};
let instantiating_crate = if avoid_cross_crate_conflicts {
Some(if is_generic {
if !def_id.is_local() && tcx.sess.opts.share_generics() {
// If we are re-using a monomorphization from another crate,
// we have to compute the symbol hash accordingly.
let upstream_monomorphizations = tcx.upstream_monomorphizations_for(def_id);
upstream_monomorphizations
.and_then(|monos| monos.get(&substs).cloned())
.unwrap_or(LOCAL_CRATE)
} else {
LOCAL_CRATE
}
} else {
LOCAL_CRATE
})
} else {
None
};
let instantiating_crate =
if avoid_cross_crate_conflicts { Some(compute_instantiating_crate()) } else { None };
// Pick the crate responsible for the symbol mangling version, which has to:
// 1. be stable for each instance, whether it's being defined or imported
@ -232,10 +270,12 @@ fn symbol_name(tcx: TyCtxt<'tcx>, instance: Instance<'tcx>) -> Symbol {
tcx.symbol_mangling_version(mangling_version_crate)
};
let mangled = match mangling_version {
match mangling_version {
SymbolManglingVersion::Legacy => legacy::mangle(tcx, instance, instantiating_crate),
SymbolManglingVersion::V0 => v0::mangle(tcx, instance, instantiating_crate),
};
Symbol::intern(&mangled)
}
}
fn is_generic(substs: SubstsRef<'_>) -> bool {
substs.non_erasable_generics().next().is_some()
}

View File

@ -0,0 +1,39 @@
-include ../../run-make-fulldeps/tools.mk
# Different optimization levels imply different values for `-Zshare-generics`,
# so try out a whole bunch of combinations to make sure everything is compatible
all:
# First up, try some defaults
$(RUSTC) --crate-type rlib foo.rs
$(RUSTC) --crate-type dylib bar.rs -C opt-level=3
# Next try mixing up some things explicitly
$(RUSTC) --crate-type rlib foo.rs -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=no
$(RUSTC) --crate-type rlib foo.rs -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=yes
$(RUSTC) --crate-type rlib foo.rs -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=no
$(RUSTC) --crate-type rlib foo.rs -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=yes
# Now combine a whole bunch of options together
$(RUSTC) --crate-type rlib foo.rs
$(RUSTC) --crate-type dylib bar.rs
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -C opt-level=1
$(RUSTC) --crate-type dylib bar.rs -C opt-level=1 -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -C opt-level=1 -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -C opt-level=2
$(RUSTC) --crate-type dylib bar.rs -C opt-level=2 -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -C opt-level=2 -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -C opt-level=3
$(RUSTC) --crate-type dylib bar.rs -C opt-level=3 -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -C opt-level=3 -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -C opt-level=s
$(RUSTC) --crate-type dylib bar.rs -C opt-level=s -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -C opt-level=s -Z share-generics=yes
$(RUSTC) --crate-type dylib bar.rs -C opt-level=z
$(RUSTC) --crate-type dylib bar.rs -C opt-level=z -Z share-generics=no
$(RUSTC) --crate-type dylib bar.rs -C opt-level=z -Z share-generics=yes

View File

@ -0,0 +1,5 @@
extern crate foo;
pub fn bar() {
foo::foo();
}

View File

@ -0,0 +1,9 @@
pub fn foo() {
bar::<usize>();
}
pub fn bar<T>() {
baz();
}
fn baz() {}

View File

@ -0,0 +1,22 @@
# This test makes sure all generic instances get re-exported from Rust dylibs for use by
# `-Zshare-generics`. There are two rlibs (`instance_provider_a` and `instance_provider_b`)
# which both provide an instance of `Cell<i32>::set`. There is `instance_user_dylib` which is
# supposed to re-export both these instances, and then there are `instance_user_a_rlib` and
# `instance_user_b_rlib` which each rely on a specific instance to be available.
#
# In the end everything is linked together into `linked_leaf`. If `instance_user_dylib` does
# not export both then we'll get an `undefined reference` error for one of the instances.
#
# This is regression test for https://github.com/rust-lang/rust/issues/67276.
-include ../../run-make-fulldeps/tools.mk
COMMON_ARGS=-Cprefer-dynamic -Zshare-generics=yes -Ccodegen-units=1 -Zsymbol-mangling-version=v0
all:
$(RUSTC) instance_provider_a.rs $(COMMON_ARGS) --crate-type=rlib
$(RUSTC) instance_provider_b.rs $(COMMON_ARGS) --crate-type=rlib
$(RUSTC) instance_user_dylib.rs $(COMMON_ARGS) --crate-type=dylib
$(RUSTC) instance_user_a_rlib.rs $(COMMON_ARGS) --crate-type=rlib
$(RUSTC) instance_user_b_rlib.rs $(COMMON_ARGS) --crate-type=rlib
$(RUSTC) linked_leaf.rs $(COMMON_ARGS) --crate-type=bin

View File

@ -0,0 +1,6 @@
use std::cell::Cell;
pub fn foo() {
let a: Cell<i32> = Cell::new(1);
a.set(123);
}

View File

@ -0,0 +1,6 @@
use std::cell::Cell;
pub fn foo() {
let b: Cell<i32> = Cell::new(1);
b.set(123);
}

View File

@ -0,0 +1,9 @@
extern crate instance_provider_a as upstream;
use std::cell::Cell;
pub fn foo() {
upstream::foo();
let b: Cell<i32> = Cell::new(1);
b.set(123);
}

View File

@ -0,0 +1,9 @@
extern crate instance_provider_b as upstream;
use std::cell::Cell;
pub fn foo() {
upstream::foo();
let b: Cell<i32> = Cell::new(1);
b.set(123);
}

View File

@ -0,0 +1,7 @@
extern crate instance_provider_a;
extern crate instance_provider_b;
pub fn foo() {
instance_provider_a::foo();
instance_provider_b::foo();
}

View File

@ -0,0 +1,15 @@
extern crate instance_user_dylib;
extern crate instance_user_a_rlib;
extern crate instance_user_b_rlib;
use std::cell::Cell;
fn main() {
instance_user_a_rlib::foo();
instance_user_b_rlib::foo();
instance_user_dylib::foo();
let a: Cell<i32> = Cell::new(1);
a.set(123);
}