rustc: Add _imp_ symbols later in compilation

On MSVC targets rustc will add symbols prefixed with `_imp_` to LLVM modules to
"emulate" dllexported statics as that workaround is still in place after #27438
hasn't been solved otherwise. These statics, however, were getting gc'd by
ThinLTO accidentally which later would cause linking failures.

This commit updates the location we add such symbols to happen just before
codegen to ensure that (a) they're not eliminated by the optimizer and (b) the
optimizer doesn't even worry about them.

Closes #45347
This commit is contained in:
Alex Crichton 2017-10-17 13:08:13 -07:00
parent 0be6863a15
commit 3541ffb668
4 changed files with 115 additions and 50 deletions

View File

@ -12,6 +12,8 @@ use back::lto;
use back::link::{self, get_linker, remove};
use back::linker::LinkerInfo;
use back::symbol_export::ExportedSymbols;
use base;
use consts;
use rustc_incremental::{save_trans_partition, in_incr_comp_dir};
use rustc::dep_graph::DepGraph;
use rustc::middle::cstore::{LinkMeta, EncodedMetadata};
@ -35,12 +37,13 @@ use syntax::attr;
use syntax::ext::hygiene::Mark;
use syntax_pos::MultiSpan;
use syntax_pos::symbol::Symbol;
use type_::Type;
use context::{is_pie_binary, get_reloc_model};
use jobserver::{Client, Acquired};
use rustc_demangle;
use std::any::Any;
use std::ffi::CString;
use std::ffi::{CString, CStr};
use std::fs;
use std::io;
use std::io::Write;
@ -315,6 +318,8 @@ pub struct CodegenContext {
metadata_module_config: Arc<ModuleConfig>,
allocator_module_config: Arc<ModuleConfig>,
pub tm_factory: Arc<Fn() -> Result<TargetMachineRef, String> + Send + Sync>,
pub msvc_imps_needed: bool,
pub target_pointer_width: String,
// Number of cgus excluding the allocator/metadata modules
pub total_cgus: usize,
@ -586,6 +591,10 @@ unsafe fn codegen(cgcx: &CodegenContext,
let module_name = Some(&module_name[..]);
let handlers = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
if cgcx.msvc_imps_needed {
create_msvc_imps(cgcx, llcx, llmod);
}
// A codegen-specific pass manager is used to generate object
// files for an LLVM module.
//
@ -1300,6 +1309,8 @@ fn start_executing_work(tcx: TyCtxt,
allocator_module_config: allocator_config,
tm_factory: target_machine_factory(tcx.sess),
total_cgus,
msvc_imps_needed: msvc_imps_needed(tcx),
target_pointer_width: tcx.sess.target.target.target_pointer_width.clone(),
};
// This is the "main loop" of parallel work happening for parallel codegen.
@ -2133,3 +2144,51 @@ pub fn submit_translated_module_to_llvm(tcx: TyCtxt,
cost,
})));
}
fn msvc_imps_needed(tcx: TyCtxt) -> bool {
tcx.sess.target.target.options.is_like_msvc &&
tcx.sess.crate_types.borrow().iter().any(|ct| *ct == config::CrateTypeRlib)
}
// Create a `__imp_<symbol> = &symbol` global for every public static `symbol`.
// This is required to satisfy `dllimport` references to static data in .rlibs
// when using MSVC linker. We do this only for data, as linker can fix up
// code references on its own.
// See #26591, #27438
fn create_msvc_imps(cgcx: &CodegenContext, llcx: ContextRef, llmod: ModuleRef) {
if !cgcx.msvc_imps_needed {
return
}
// The x86 ABI seems to require that leading underscores are added to symbol
// names, so we need an extra underscore on 32-bit. There's also a leading
// '\x01' here which disables LLVM's symbol mangling (e.g. no extra
// underscores added in front).
let prefix = if cgcx.target_pointer_width == "32" {
"\x01__imp__"
} else {
"\x01__imp_"
};
unsafe {
let i8p_ty = Type::i8p_llcx(llcx);
let globals = base::iter_globals(llmod)
.filter(|&val| {
llvm::LLVMRustGetLinkage(val) == llvm::Linkage::ExternalLinkage &&
llvm::LLVMIsDeclaration(val) == 0
})
.map(move |val| {
let name = CStr::from_ptr(llvm::LLVMGetValueName(val));
let mut imp_name = prefix.as_bytes().to_vec();
imp_name.extend(name.to_bytes());
let imp_name = CString::new(imp_name).unwrap();
(imp_name, val)
})
.collect::<Vec<_>>();
for (imp_name, val) in globals {
let imp = llvm::LLVMAddGlobal(llmod,
i8p_ty.to_ref(),
imp_name.as_ptr() as *const _);
llvm::LLVMSetInitializer(imp, consts::ptrcast(val, i8p_ty));
llvm::LLVMRustSetLinkage(imp, llvm::Linkage::ExternalLinkage);
}
}
}

View File

@ -78,7 +78,7 @@ use rustc::util::nodemap::{NodeSet, FxHashMap, FxHashSet, DefIdSet};
use CrateInfo;
use std::any::Any;
use std::ffi::{CStr, CString};
use std::ffi::CString;
use std::str;
use std::sync::Arc;
use std::time::{Instant, Duration};
@ -812,47 +812,7 @@ fn write_metadata<'a, 'gcx>(tcx: TyCtxt<'a, 'gcx, 'gcx>,
return (metadata_llcx, metadata_llmod, metadata, hashes);
}
// Create a `__imp_<symbol> = &symbol` global for every public static `symbol`.
// This is required to satisfy `dllimport` references to static data in .rlibs
// when using MSVC linker. We do this only for data, as linker can fix up
// code references on its own.
// See #26591, #27438
fn create_imps(sess: &Session,
llvm_module: &ModuleLlvm) {
// The x86 ABI seems to require that leading underscores are added to symbol
// names, so we need an extra underscore on 32-bit. There's also a leading
// '\x01' here which disables LLVM's symbol mangling (e.g. no extra
// underscores added in front).
let prefix = if sess.target.target.target_pointer_width == "32" {
"\x01__imp__"
} else {
"\x01__imp_"
};
unsafe {
let exported: Vec<_> = iter_globals(llvm_module.llmod)
.filter(|&val| {
llvm::LLVMRustGetLinkage(val) ==
llvm::Linkage::ExternalLinkage &&
llvm::LLVMIsDeclaration(val) == 0
})
.collect();
let i8p_ty = Type::i8p_llcx(llvm_module.llcx);
for val in exported {
let name = CStr::from_ptr(llvm::LLVMGetValueName(val));
let mut imp_name = prefix.as_bytes().to_vec();
imp_name.extend(name.to_bytes());
let imp_name = CString::new(imp_name).unwrap();
let imp = llvm::LLVMAddGlobal(llvm_module.llmod,
i8p_ty.to_ref(),
imp_name.as_ptr() as *const _);
llvm::LLVMSetInitializer(imp, consts::ptrcast(val, i8p_ty));
llvm::LLVMRustSetLinkage(imp, llvm::Linkage::ExternalLinkage);
}
}
}
struct ValueIter {
pub struct ValueIter {
cur: ValueRef,
step: unsafe extern "C" fn(ValueRef) -> ValueRef,
}
@ -871,7 +831,7 @@ impl Iterator for ValueIter {
}
}
fn iter_globals(llmod: llvm::ModuleRef) -> ValueIter {
pub fn iter_globals(llmod: llvm::ModuleRef) -> ValueIter {
unsafe {
ValueIter {
cur: llvm::LLVMGetFirstGlobal(llmod),
@ -1437,12 +1397,6 @@ fn compile_codegen_unit<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
tm: create_target_machine(ccx.sess()),
};
// Adjust exported symbols for MSVC dllimport
if ccx.sess().target.target.options.is_like_msvc &&
ccx.sess().crate_types.borrow().iter().any(|ct| *ct == config::CrateTypeRlib) {
create_imps(ccx.sess(), &llvm_module);
}
ModuleTranslation {
name: cgu_name,
source: ModuleSource::Translated(llvm_module),

View File

@ -0,0 +1,21 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// no-prefer-dynamic
// compile-flags: -Z thinlto -C codegen-units=8 -C prefer-dynamic
#![crate_type = "rlib"]
#![crate_type = "dylib"]
pub static A: u32 = 43;
pub mod a {
pub static A: u32 = 43;
}

View File

@ -0,0 +1,31 @@
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
// aux-build:msvc-imp-present.rs
// compile-flags: -Z thinlto -C codegen-units=8
// min-llvm-version: 4.0
// no-prefer-dynamic
// On MSVC we have a "hack" where we emit symbols that look like `_imp_$name`
// for all exported statics. This is done because we apply `dllimport` to all
// imported constants and this allows everything to actually link correctly.
//
// The ThinLTO passes aggressively remove symbols if they can, and this test
// asserts that the ThinLTO passes don't remove these compiler-generated
// `_imp_*` symbols. The external library that we link in here is compiled with
// ThinLTO and multiple codegen units and has a few exported constants. Note
// that we also namely compile the library as both a dylib and an rlib, but we
// link the rlib to ensure that we assert those generated symbols exist.
extern crate msvc_imp_present as bar;
fn main() {
println!("{}", bar::A);
}