Auto merge of #84449 - alexcrichton:metadata-in-object, r=nagisa

rustc: Store metadata-in-rlibs in object files

This commit updates how rustc compiler metadata is stored in rlibs.
Previously metadata was stored as a raw file that has the same format as
`--emit metadata`. After this commit, however, the metadata is encoded
into a small object file which has one section which is the contents of
the metadata.

The motivation for this commit is to fix a common case where #83730
arises. The problem is that when rustc crates a `dylib` crate type it
needs to include entire rlib files into the dylib, so it passes
`--whole-archive` (or the equivalent) to the linker. The problem with
this, though, is that the linker will attempt to read all files in the
archive. If the metadata file were left as-is (today) then the linker
would generate an error saying it can't read the file. The previous
solution was to alter the rlib just before linking, creating a new
archive in a temporary directory which has the metadata file removed.

This problem from before this commit is now removed if the metadata file
is stored in an object file that the linker can read. The only caveat we
have to take care of is to ensure that the linker never actually
includes the contents of the object file into the final output. We apply
similar tricks as the `.llvmbc` bytecode sections to do this.

This involved changing the metadata loading code a bit, namely updating
some of the LLVM C APIs used to use non-deprecated ones and fiddling
with the lifetimes a bit to get everything to work out. Otherwise though
this isn't intended to be a functional change really, only that metadata
is stored differently in archives now.

This should end up fixing #83730 because by default dylibs will no
longer have their rlib dependencies "altered" meaning that
split-debuginfo will continue to have valid paths pointing at the
original rlibs. (note that we still "alter" rlibs if LTO is enabled to
remove Rust object files and we also "alter" for the #[link(cfg)]
feature, but that's rarely used).

Closes #83730
This commit is contained in:
bors 2021-06-04 19:29:50 +00:00
commit c79419af07
8 changed files with 212 additions and 182 deletions

View File

@ -2154,9 +2154,9 @@ dependencies = [
[[package]]
name = "memchr"
version = "2.3.3"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3728d817d99e5ac407411fa471ff9800a778d88a24685968b36824eaf4bee400"
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
[[package]]
name = "memmap2"
@ -2359,6 +2359,17 @@ dependencies = [
"rustc-std-workspace-core",
]
[[package]]
name = "object"
version = "0.25.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8bc1d42047cf336f0f939c99e97183cf31551bf0f2865a2ec9c8d91fd4ffb5e"
dependencies = [
"crc32fast",
"indexmap",
"memchr",
]
[[package]]
name = "once_cell"
version = "1.7.2"
@ -3701,7 +3712,7 @@ dependencies = [
"itertools 0.9.0",
"jobserver",
"libc",
"object",
"object 0.25.2",
"pathdiff",
"rustc_apfloat",
"rustc_ast",
@ -4912,7 +4923,7 @@ dependencies = [
"hermit-abi",
"libc",
"miniz_oxide",
"object",
"object 0.22.0",
"panic_abort",
"panic_unwind",
"profiler_builtins",

View File

@ -582,11 +582,6 @@ pub struct PassManager<'a>(InvariantOpaque<'a>);
extern "C" {
pub type PassManagerBuilder;
}
extern "C" {
pub type ObjectFile;
}
#[repr(C)]
pub struct SectionIterator<'a>(InvariantOpaque<'a>);
extern "C" {
pub type Pass;
}
@ -1703,35 +1698,6 @@ extern "C" {
pub fn LLVMDisposeMessage(message: *mut c_char);
// Stuff that's in llvm-wrapper/ because it's not upstream yet.
/// Opens an object file.
pub fn LLVMCreateObjectFile(
MemBuf: &'static mut MemoryBuffer,
) -> Option<&'static mut ObjectFile>;
/// Closes an object file.
pub fn LLVMDisposeObjectFile(ObjFile: &'static mut ObjectFile);
/// Enumerates the sections in an object file.
pub fn LLVMGetSections(ObjFile: &'a ObjectFile) -> &'a mut SectionIterator<'a>;
/// Destroys a section iterator.
pub fn LLVMDisposeSectionIterator(SI: &'a mut SectionIterator<'a>);
/// Returns `true` if the section iterator is at the end of the section
/// list:
pub fn LLVMIsSectionIteratorAtEnd(ObjFile: &'a ObjectFile, SI: &SectionIterator<'a>) -> Bool;
/// Moves the section iterator to point to the next section.
pub fn LLVMMoveToNextSection(SI: &SectionIterator<'_>);
/// Returns the current section size.
pub fn LLVMGetSectionSize(SI: &SectionIterator<'_>) -> c_ulonglong;
/// Returns the current section contents as a string buffer.
pub fn LLVMGetSectionContents(SI: &SectionIterator<'_>) -> *const c_char;
/// Reads the given file and returns it as a memory buffer. Use
/// LLVMDisposeMemoryBuffer() to get rid of it.
pub fn LLVMRustCreateMemoryBufferWithContentsOfFile(
Path: *const c_char,
) -> Option<&'static mut MemoryBuffer>;
pub fn LLVMStartMultithreaded() -> Bool;
/// Returns a string describing the last error caused by an LLVMRust* call.
@ -2236,12 +2202,6 @@ extern "C" {
pub fn LLVMRustArchiveIteratorFree(AIR: &'a mut ArchiveIterator<'a>);
pub fn LLVMRustDestroyArchive(AR: &'static mut Archive);
#[allow(improper_ctypes)]
pub fn LLVMRustGetSectionName(
SI: &SectionIterator<'_>,
data: &mut Option<std::ptr::NonNull<c_char>>,
) -> size_t;
#[allow(improper_ctypes)]
pub fn LLVMRustWriteTwineToString(T: &Twine, s: &RustString);

View File

@ -150,50 +150,6 @@ impl Attribute {
}
}
// Memory-managed interface to object files.
pub struct ObjectFile {
pub llof: &'static mut ffi::ObjectFile,
}
unsafe impl Send for ObjectFile {}
impl ObjectFile {
// This will take ownership of llmb
pub fn new(llmb: &'static mut MemoryBuffer) -> Option<ObjectFile> {
unsafe {
let llof = LLVMCreateObjectFile(llmb)?;
Some(ObjectFile { llof })
}
}
}
impl Drop for ObjectFile {
fn drop(&mut self) {
unsafe {
LLVMDisposeObjectFile(&mut *(self.llof as *mut _));
}
}
}
// Memory-managed interface to section iterators.
pub struct SectionIter<'a> {
pub llsi: &'a mut SectionIterator<'a>,
}
impl Drop for SectionIter<'a> {
fn drop(&mut self) {
unsafe {
LLVMDisposeSectionIterator(&mut *(self.llsi as *mut _));
}
}
}
pub fn mk_section_iter(llof: &ffi::ObjectFile) -> SectionIter<'_> {
unsafe { SectionIter { llsi: LLVMGetSections(llof) } }
}
pub fn set_section(llglobal: &Value, section_name: &str) {
let section_name_cstr = CString::new(section_name).expect("unexpected CString error");
unsafe {

View File

@ -35,6 +35,6 @@ rustc_target = { path = "../rustc_target" }
rustc_session = { path = "../rustc_session" }
[dependencies.object]
version = "0.22.0"
version = "0.25.2"
default-features = false
features = ["read_core", "elf", "macho", "pe", "unaligned", "archive"]
features = ["read_core", "elf", "macho", "pe", "unaligned", "archive", "write"]

View File

@ -3,7 +3,7 @@ use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_errors::Handler;
use rustc_fs_util::fix_windows_verbatim_for_gcc;
use rustc_hir::def_id::CrateNum;
use rustc_middle::middle::cstore::{EncodedMetadata, LibSource};
use rustc_middle::middle::cstore::LibSource;
use rustc_middle::middle::dependency_format::Linkage;
use rustc_session::config::{self, CFGuard, CrateType, DebugInfo};
use rustc_session::config::{OutputFilenames, OutputType, PrintRequest};
@ -14,6 +14,7 @@ use rustc_session::utils::NativeLibKind;
/// need out of the shared crate context before we get rid of it.
use rustc_session::{filesearch, Session};
use rustc_span::symbol::Symbol;
use rustc_target::abi::Endian;
use rustc_target::spec::crt_objects::{CrtObjects, CrtObjectsFallback};
use rustc_target::spec::{LinkOutputKind, LinkerFlavor, LldFlavor, SplitDebuginfo};
use rustc_target::spec::{PanicStrategy, RelocModel, RelroLevel, SanitizerSet, Target};
@ -28,6 +29,9 @@ use crate::{
};
use cc::windows_registry;
use object::elf;
use object::write::Object;
use object::{Architecture, BinaryFormat, Endianness, FileFlags, SectionFlags, SectionKind};
use tempfile::Builder as TempFileBuilder;
use std::ffi::OsString;
@ -278,9 +282,9 @@ pub fn each_linked_rlib(
/// building an `.rlib` (stomping over one another), or writing an `.rmeta` into a
/// directory being searched for `extern crate` (observing an incomplete file).
/// The returned path is the temporary file containing the complete metadata.
pub fn emit_metadata(sess: &Session, metadata: &EncodedMetadata, tmpdir: &MaybeTempDir) -> PathBuf {
pub fn emit_metadata(sess: &Session, metadata: &[u8], tmpdir: &MaybeTempDir) -> PathBuf {
let out_filename = tmpdir.as_ref().join(METADATA_FILENAME);
let result = fs::write(&out_filename, &metadata.raw_data);
let result = fs::write(&out_filename, metadata);
if let Err(e) = result {
sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e));
@ -366,9 +370,11 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
// code above.
match flavor {
RlibFlavor::Normal => {
// Instead of putting the metadata in an object file section, rlibs
// contain the metadata in a separate file.
ab.add_file(&emit_metadata(sess, &codegen_results.metadata, tmpdir));
// metadata in rlib files is wrapped in a "dummy" object file for
// the target platform so the rlib can be processed entirely by
// normal linkers for the platform.
let metadata = create_metadata_file(sess, &codegen_results.metadata.raw_data);
ab.add_file(&emit_metadata(sess, &metadata, tmpdir));
// After adding all files to the archive, we need to update the
// symbol table of the archive. This currently dies on macOS (see
@ -385,8 +391,137 @@ fn link_rlib<'a, B: ArchiveBuilder<'a>>(
}
}
}
return ab;
ab
// For rlibs we "pack" rustc metadata into a dummy object file. When rustc
// creates a dylib crate type it will pass `--whole-archive` (or the
// platform equivalent) to include all object files from an rlib into the
// final dylib itself. This causes linkers to iterate and try to include all
// files located in an archive, so if metadata is stored in an archive then
// it needs to be of a form that the linker will be able to process.
//
// Note, though, that we don't actually want this metadata to show up in any
// final output of the compiler. Instead this is purely for rustc's own
// metadata tracking purposes.
//
// With the above in mind, each "flavor" of object format gets special
// handling here depending on the target:
//
// * MachO - macos-like targets will insert the metadata into a section that
// is sort of fake dwarf debug info. Inspecting the source of the macos
// linker this causes these sections to be skipped automatically because
// it's not in an allowlist of otherwise well known dwarf section names to
// go into the final artifact.
//
// * WebAssembly - we actually don't have any container format for this
// target. WebAssembly doesn't support the `dylib` crate type anyway so
// there's no need for us to support this at this time. Consequently the
// metadata bytes are simply stored as-is into an rlib.
//
// * COFF - Windows-like targets create an object with a section that has
// the `IMAGE_SCN_LNK_REMOVE` flag set which ensures that if the linker
// ever sees the section it doesn't process it and it's removed.
//
// * ELF - All other targets are similar to Windows in that there's a
// `SHF_EXCLUDE` flag we can set on sections in an object file to get
// automatically removed from the final output.
//
// Note that this metdata format is kept in sync with
// `rustc_codegen_ssa/src/back/metadata.rs`.
fn create_metadata_file(sess: &Session, metadata: &[u8]) -> Vec<u8> {
let endianness = match sess.target.options.endian {
Endian::Little => Endianness::Little,
Endian::Big => Endianness::Big,
};
let architecture = match &sess.target.arch[..] {
"arm" => Architecture::Arm,
"aarch64" => Architecture::Aarch64,
"x86" => Architecture::I386,
"s390x" => Architecture::S390x,
"mips" => Architecture::Mips,
"mips64" => Architecture::Mips64,
"x86_64" => {
if sess.target.pointer_width == 32 {
Architecture::X86_64_X32
} else {
Architecture::X86_64
}
}
"powerpc" => Architecture::PowerPc,
"powerpc64" => Architecture::PowerPc64,
"riscv32" => Architecture::Riscv32,
"riscv64" => Architecture::Riscv64,
"sparc64" => Architecture::Sparc64,
// This is used to handle all "other" targets. This includes targets
// in two categories:
//
// * Some targets don't have support in the `object` crate just yet
// to write an object file. These targets are likely to get filled
// out over time.
//
// * Targets like WebAssembly don't support dylibs, so the purpose
// of putting metadata in object files, to support linking rlibs
// into dylibs, is moot.
//
// In both of these cases it means that linking into dylibs will
// not be supported by rustc. This doesn't matter for targets like
// WebAssembly and for targets not supported by the `object` crate
// yet it means that work will need to be done in the `object` crate
// to add a case above.
_ => return metadata.to_vec(),
};
if sess.target.is_like_osx {
let mut file = Object::new(BinaryFormat::MachO, architecture, endianness);
let section =
file.add_section(b"__DWARF".to_vec(), b".rmeta".to_vec(), SectionKind::Debug);
file.append_section_data(section, metadata, 1);
file.write().unwrap()
} else if sess.target.is_like_windows {
const IMAGE_SCN_LNK_REMOVE: u32 = 0;
let mut file = Object::new(BinaryFormat::Coff, architecture, endianness);
let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
file.section_mut(section).flags =
SectionFlags::Coff { characteristics: IMAGE_SCN_LNK_REMOVE };
file.append_section_data(section, metadata, 1);
file.write().unwrap()
} else {
const SHF_EXCLUDE: u64 = 0x80000000;
let mut file = Object::new(BinaryFormat::Elf, architecture, endianness);
match &sess.target.arch[..] {
// copied from `mipsel-linux-gnu-gcc foo.c -c` and
// inspecting the resulting `e_flags` field.
"mips" => {
let e_flags = elf::EF_MIPS_ARCH_32R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
file.flags = FileFlags::Elf { e_flags };
}
// copied from `mips64el-linux-gnuabi64-gcc foo.c -c`
"mips64" => {
let e_flags = elf::EF_MIPS_ARCH_64R2 | elf::EF_MIPS_CPIC | elf::EF_MIPS_PIC;
file.flags = FileFlags::Elf { e_flags };
}
// copied from `riscv64-linux-gnu-gcc foo.c -c`, note though
// that the `+d` target feature represents whether the double
// float abi is enabled.
"riscv64" if sess.target.options.features.contains("+d") => {
let e_flags = elf::EF_RISCV_RVC | elf::EF_RISCV_FLOAT_ABI_DOUBLE;
file.flags = FileFlags::Elf { e_flags };
}
_ => {}
}
let section = file.add_section(Vec::new(), b".rmeta".to_vec(), SectionKind::Debug);
file.section_mut(section).flags = SectionFlags::Elf { sh_flags: SHF_EXCLUDE };
file.append_section_data(section, metadata, 1);
file.write().unwrap()
}
}
}
/// Create a static archive.
@ -1964,11 +2099,8 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
}
// Adds the static "rlib" versions of all crates to the command line.
// There's a bit of magic which happens here specifically related to LTO and
// dynamic libraries. Specifically:
//
// * For LTO, we remove upstream object files.
// * For dylibs we remove metadata and bytecode from upstream rlibs
// There's a bit of magic which happens here specifically related to LTO,
// namely that we remove upstream object files.
//
// When performing LTO, almost(*) all of the bytecode from the upstream
// libraries has already been included in our object file output. As a
@ -1981,20 +2113,9 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
// their bytecode wasn't included. The object files in those libraries must
// still be passed to the linker.
//
// When making a dynamic library, linkers by default don't include any
// object files in an archive if they're not necessary to resolve the link.
// We basically want to convert the archive (rlib) to a dylib, though, so we
// *do* want everything included in the output, regardless of whether the
// linker thinks it's needed or not. As a result we must use the
// --whole-archive option (or the platform equivalent). When using this
// option the linker will fail if there are non-objects in the archive (such
// as our own metadata and/or bytecode). All in all, for rlibs to be
// entirely included in dylibs, we need to remove all non-object files.
//
// Note, however, that if we're not doing LTO or we're not producing a dylib
// (aka we're making an executable), we can just pass the rlib blindly to
// the linker (fast) because it's fine if it's not actually included as
// we're at the end of the dependency chain.
// Note, however, that if we're not doing LTO we can just pass the rlib
// blindly to the linker (fast) because it's fine if it's not actually
// included as we're at the end of the dependency chain.
fn add_static_crate<'a, B: ArchiveBuilder<'a>>(
cmd: &mut dyn Linker,
sess: &'a Session,
@ -2006,6 +2127,24 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
let src = &codegen_results.crate_info.used_crate_source[&cnum];
let cratepath = &src.rlib.as_ref().unwrap().0;
let mut link_upstream = |path: &Path| {
// If we're creating a dylib, then we need to include the
// whole of each object in our archive into that artifact. This is
// because a `dylib` can be reused as an intermediate artifact.
//
// Note, though, that we don't want to include the whole of a
// compiler-builtins crate (e.g., compiler-rt) because it'll get
// repeatedly linked anyway.
let path = fix_windows_verbatim_for_gcc(path);
if crate_type == CrateType::Dylib
&& codegen_results.crate_info.compiler_builtins != Some(cnum)
{
cmd.link_whole_rlib(&path);
} else {
cmd.link_rlib(&path);
}
};
// See the comment above in `link_staticlib` and `link_rlib` for why if
// there's a static library that's not relevant we skip all object
// files.
@ -2017,10 +2156,9 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
if (!are_upstream_rust_objects_already_included(sess)
|| ignored_for_lto(sess, &codegen_results.crate_info, cnum))
&& crate_type != CrateType::Dylib
&& !skip_native
{
cmd.link_rlib(&fix_windows_verbatim_for_gcc(cratepath));
link_upstream(cratepath);
return;
}
@ -2070,21 +2208,7 @@ fn add_upstream_rust_crates<'a, B: ArchiveBuilder<'a>>(
return;
}
archive.build();
// If we're creating a dylib, then we need to include the
// whole of each object in our archive into that artifact. This is
// because a `dylib` can be reused as an intermediate artifact.
//
// Note, though, that we don't want to include the whole of a
// compiler-builtins crate (e.g., compiler-rt) because it'll get
// repeatedly linked anyway.
if crate_type == CrateType::Dylib
&& codegen_results.crate_info.compiler_builtins != Some(cnum)
{
cmd.link_whole_rlib(&fix_windows_verbatim_for_gcc(&dst));
} else {
cmd.link_rlib(&fix_windows_verbatim_for_gcc(&dst));
}
link_upstream(&dst);
});
}

View File

@ -3,6 +3,7 @@
use std::fs::File;
use std::path::Path;
use object::{Object, ObjectSection};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::owning_ref::OwningRef;
use rustc_data_structures::rustc_erase_owner;
@ -46,7 +47,10 @@ impl MetadataLoader for DefaultMetadataLoader {
let entry = entry_result
.map_err(|e| format!("failed to parse rlib '{}': {}", path.display(), e))?;
if entry.name() == METADATA_FILENAME.as_bytes() {
return Ok(entry.data());
let data = entry
.data(data)
.map_err(|e| format!("failed to parse rlib '{}': {}", path.display(), e))?;
return search_for_metadata(path, data, ".rmeta");
}
}
@ -55,17 +59,27 @@ impl MetadataLoader for DefaultMetadataLoader {
}
fn get_dylib_metadata(&self, _target: &Target, path: &Path) -> Result<MetadataRef, String> {
use object::{Object, ObjectSection};
load_metadata_with(path, |data| {
let file = object::File::parse(&data)
.map_err(|e| format!("failed to parse dylib '{}': {}", path.display(), e))?;
file.section_by_name(".rustc")
.ok_or_else(|| format!("no .rustc section in '{}'", path.display()))?
.data()
.map_err(|e| {
format!("failed to read .rustc section in '{}': {}", path.display(), e)
})
})
load_metadata_with(path, |data| search_for_metadata(path, data, ".rustc"))
}
}
fn search_for_metadata<'a>(
path: &Path,
bytes: &'a [u8],
section: &str,
) -> Result<&'a [u8], String> {
let file = match object::File::parse(bytes) {
Ok(f) => f,
// The parse above could fail for odd reasons like corruption, but for
// now we just interpret it as this target doesn't support metadata
// emission in object files so the entire byte slice itself is probably
// a metadata file. Ideally though if necessary we could at least check
// the prefix of bytes to see if it's an actual metadata object and if
// not forward the error along here.
Err(_) => return Ok(bytes),
};
file.section_by_name(section)
.ok_or_else(|| format!("no `{}` section in '{}'", section, path.display()))?
.data()
.map_err(|e| format!("failed to read {} section in '{}': {}", section, path.display(), e))
}

View File

@ -985,7 +985,7 @@ fn encode_and_write_metadata(
.tempdir_in(out_filename.parent().unwrap())
.unwrap_or_else(|err| tcx.sess.fatal(&format!("couldn't create a temp dir: {}", err)));
let metadata_tmpdir = MaybeTempDir::new(metadata_tmpdir, tcx.sess.opts.cg.save_temps);
let metadata_filename = emit_metadata(tcx.sess, &metadata, &metadata_tmpdir);
let metadata_filename = emit_metadata(tcx.sess, &metadata.raw_data, &metadata_tmpdir);
if let Err(e) = util::non_durable_rename(&metadata_filename, &out_filename) {
tcx.sess.fatal(&format!("failed to write {}: {}", out_filename.display(), e));
}

View File

@ -70,17 +70,6 @@ extern "C" void LLVMRustInstallFatalErrorHandler() {
install_fatal_error_handler(FatalErrorHandler);
}
extern "C" LLVMMemoryBufferRef
LLVMRustCreateMemoryBufferWithContentsOfFile(const char *Path) {
ErrorOr<std::unique_ptr<MemoryBuffer>> BufOr =
MemoryBuffer::getFile(Path, -1, false);
if (!BufOr) {
LLVMRustSetLastError(BufOr.getError().message().c_str());
return nullptr;
}
return wrap(BufOr.get().release());
}
extern "C" char *LLVMRustGetLastError(void) {
char *Ret = LastError;
LastError = nullptr;
@ -1077,30 +1066,6 @@ extern "C" void LLVMRustWriteValueToString(LLVMValueRef V,
}
}
// Note that the two following functions look quite similar to the
// LLVMGetSectionName function. Sadly, it appears that this function only
// returns a char* pointer, which isn't guaranteed to be null-terminated. The
// function provided by LLVM doesn't return the length, so we've created our own
// function which returns the length as well as the data pointer.
//
// For an example of this not returning a null terminated string, see
// lib/Object/COFFObjectFile.cpp in the getSectionName function. One of the
// branches explicitly creates a StringRef without a null terminator, and then
// that's returned.
inline section_iterator *unwrap(LLVMSectionIteratorRef SI) {
return reinterpret_cast<section_iterator *>(SI);
}
extern "C" size_t LLVMRustGetSectionName(LLVMSectionIteratorRef SI,
const char **Ptr) {
auto NameOrErr = (*unwrap(SI))->getName();
if (!NameOrErr)
report_fatal_error(NameOrErr.takeError());
*Ptr = NameOrErr->data();
return NameOrErr->size();
}
// LLVMArrayType function does not support 64-bit ElementCount
extern "C" LLVMTypeRef LLVMRustArrayType(LLVMTypeRef ElementTy,
uint64_t ElementCount) {