Auto merge of #97485 - bjorn3:new_archive_writer, r=wesleywiser

Rewrite LLVM's archive writer in Rust

This allows it to be used by other codegen backends.

Fixes https://github.com/bjorn3/rustc_codegen_cranelift/issues/1155
This commit is contained in:
bors 2022-12-03 15:07:39 +00:00
commit cab4fd678c
24 changed files with 491 additions and 527 deletions

View File

@ -92,6 +92,15 @@ version = "1.0.65"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "98161a4e3e2184da77bb14f02184cdd111e83bbbcc9979dfee3c44b9a85f5602"
[[package]]
name = "ar_archive_writer"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "276881980556fdadeb88aa1ffc667e4d2e8fe72531dfabcb7a82bb3c9ea9ba31"
dependencies = [
"object",
]
[[package]]
name = "array_tool"
version = "1.0.3"
@ -3495,6 +3504,7 @@ dependencies = [
name = "rustc_codegen_ssa"
version = "0.0.0"
dependencies = [
"ar_archive_writer",
"bitflags",
"cc",
"itertools",

View File

@ -0,0 +1,15 @@
---- LLVM Exceptions to the Apache 2.0 License ----
As an exception, if, as a result of your compiling your source code, portions
of this Software are embedded into an Object form of such source code, you
may redistribute such embedded portions in such Object form without complying
with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
In addition, if you combine or link compiled forms of this Software with
software that is licensed under the GPLv2 ("Combined Software") and if a
court of competent jurisdiction determines that the patent provision (Section
3), the indemnity provision (Section 9) or other Section of the License
conflicts with the conditions of the GPLv2, you may retroactively and
prospectively choose to deem waived or otherwise exclude such Section(s) of
the License, but only in their entirety and only with respect to the Combined
Software.

View File

@ -19,11 +19,6 @@ version = "1.0.60"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c794e162a5eff65c72ef524dfe393eb923c354e350bb78b9c7383df13f3bc142"
[[package]]
name = "ar"
version = "0.8.0"
source = "git+https://github.com/bjorn3/rust-ar.git?branch=do_not_remove_cg_clif_ranlib#de9ab0e56bf3a208381d342aa5b60f9ff2891648"
[[package]]
name = "arrayvec"
version = "0.7.2"
@ -324,7 +319,6 @@ dependencies = [
name = "rustc_codegen_cranelift"
version = "0.1.0"
dependencies = [
"ar",
"cranelift-codegen",
"cranelift-frontend",
"cranelift-jit",

View File

@ -18,7 +18,6 @@ target-lexicon = "0.12.0"
gimli = { version = "0.26.0", default-features = false, features = ["write"]}
object = { version = "0.29.0", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
ar = { git = "https://github.com/bjorn3/rust-ar.git", branch = "do_not_remove_cg_clif_ranlib" }
indexmap = "1.9.1"
libloading = { version = "0.7.3", optional = true }
once_cell = "1.10.0"

View File

@ -1,35 +1,15 @@
//! Creation of ar archives like for the lib and staticlib crate type
use std::collections::BTreeMap;
use std::fs::File;
use std::io::{self, Read, Seek};
use std::path::{Path, PathBuf};
use rustc_codegen_ssa::back::archive::{ArchiveBuilder, ArchiveBuilderBuilder};
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
};
use rustc_session::Session;
use object::read::archive::ArchiveFile;
use object::{Object, ObjectSymbol, ReadCache};
#[derive(Debug)]
enum ArchiveEntry {
FromArchive { archive_index: usize, file_range: (u64, u64) },
File(PathBuf),
}
pub(crate) struct ArArchiveBuilderBuilder;
impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder<'a> + 'a> {
Box::new(ArArchiveBuilder {
sess,
use_gnu_style_archive: sess.target.archive_format == "gnu",
// FIXME fix builtin ranlib on macOS
no_builtin_ranlib: sess.target.is_like_osx,
src_archives: vec![],
entries: vec![],
})
Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols))
}
fn create_dll_import_lib(
@ -40,200 +20,6 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
_tmpdir: &Path,
_is_direct_dependency: bool,
) -> PathBuf {
bug!("creating dll imports is not supported");
}
}
pub(crate) struct ArArchiveBuilder<'a> {
sess: &'a Session,
use_gnu_style_archive: bool,
no_builtin_ranlib: bool,
src_archives: Vec<File>,
// Don't use `HashMap` here, as the order is important. `rust.metadata.bin` must always be at
// the end of an archive for linkers to not get confused.
entries: Vec<(Vec<u8>, ArchiveEntry)>,
}
impl<'a> ArchiveBuilder<'a> for ArArchiveBuilder<'a> {
fn add_file(&mut self, file: &Path) {
self.entries.push((
file.file_name().unwrap().to_str().unwrap().to_string().into_bytes(),
ArchiveEntry::File(file.to_owned()),
));
}
fn add_archive(
&mut self,
archive_path: &Path,
mut skip: Box<dyn FnMut(&str) -> bool + 'static>,
) -> std::io::Result<()> {
let read_cache = ReadCache::new(std::fs::File::open(&archive_path)?);
let archive = ArchiveFile::parse(&read_cache).unwrap();
let archive_index = self.src_archives.len();
for entry in archive.members() {
let entry = entry.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let file_name = String::from_utf8(entry.name().to_vec())
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
if !skip(&file_name) {
self.entries.push((
file_name.into_bytes(),
ArchiveEntry::FromArchive { archive_index, file_range: entry.file_range() },
));
}
}
self.src_archives.push(read_cache.into_inner());
Ok(())
}
fn build(mut self: Box<Self>, output: &Path) -> bool {
enum BuilderKind {
Bsd(ar::Builder<File>),
Gnu(ar::GnuBuilder<File>),
}
let sess = self.sess;
let mut symbol_table = BTreeMap::new();
let mut entries = Vec::new();
for (mut entry_name, entry) in self.entries {
// FIXME only read the symbol table of the object files to avoid having to keep all
// object files in memory at once, or read them twice.
let data = match entry {
ArchiveEntry::FromArchive { archive_index, file_range } => {
// FIXME read symbols from symtab
let src_read_cache = &mut self.src_archives[archive_index];
src_read_cache.seek(io::SeekFrom::Start(file_range.0)).unwrap();
let mut data = std::vec::from_elem(0, usize::try_from(file_range.1).unwrap());
src_read_cache.read_exact(&mut data).unwrap();
data
}
ArchiveEntry::File(file) => std::fs::read(file).unwrap_or_else(|err| {
sess.fatal(&format!(
"error while reading object file during archive building: {}",
err
));
}),
};
if !self.no_builtin_ranlib {
if symbol_table.contains_key(&entry_name) {
// The ar crate can't handle creating a symbol table in case of multiple archive
// members with the same name. Work around this by prepending a number until we
// get a unique name.
for i in 1.. {
let new_name = format!("{}_", i)
.into_bytes()
.into_iter()
.chain(entry_name.iter().copied())
.collect::<Vec<_>>();
if !symbol_table.contains_key(&new_name) {
entry_name = new_name;
break;
}
}
}
match object::File::parse(&*data) {
Ok(object) => {
symbol_table.insert(
entry_name.to_vec(),
object
.symbols()
.filter_map(|symbol| {
if symbol.is_undefined() || symbol.is_local() {
None
} else {
symbol.name().map(|name| name.as_bytes().to_vec()).ok()
}
})
.collect::<Vec<_>>(),
);
}
Err(err) => {
let err = err.to_string();
if err == "Unknown file magic" {
// Not an object file; skip it.
} else if object::read::archive::ArchiveFile::parse(&*data).is_ok() {
// Nested archive file; skip it.
} else {
sess.fatal(&format!(
"error parsing `{}` during archive creation: {}",
String::from_utf8_lossy(&entry_name),
err
));
}
}
}
}
entries.push((entry_name, data));
}
let mut builder = if self.use_gnu_style_archive {
BuilderKind::Gnu(
ar::GnuBuilder::new(
File::create(output).unwrap_or_else(|err| {
sess.fatal(&format!(
"error opening destination during archive building: {}",
err
));
}),
entries.iter().map(|(name, _)| name.clone()).collect(),
ar::GnuSymbolTableFormat::Size32,
symbol_table,
)
.unwrap(),
)
} else {
BuilderKind::Bsd(
ar::Builder::new(
File::create(output).unwrap_or_else(|err| {
sess.fatal(&format!(
"error opening destination during archive building: {}",
err
));
}),
symbol_table,
)
.unwrap(),
)
};
let any_members = !entries.is_empty();
// Add all files
for (entry_name, data) in entries.into_iter() {
let header = ar::Header::new(entry_name, data.len() as u64);
match builder {
BuilderKind::Bsd(ref mut builder) => builder.append(&header, &mut &*data).unwrap(),
BuilderKind::Gnu(ref mut builder) => builder.append(&header, &mut &*data).unwrap(),
}
}
// Finalize archive
std::mem::drop(builder);
if self.no_builtin_ranlib {
let ranlib = crate::toolchain::get_toolchain_binary(self.sess, "ranlib");
// Run ranlib to be able to link the archive
let status = std::process::Command::new(ranlib)
.arg(output)
.status()
.expect("Couldn't run ranlib");
if !status.success() {
self.sess.fatal(&format!("Ranlib exited with code {:?}", status.code()));
}
}
any_members
unimplemented!("creating dll imports is not yet supported");
}
}

View File

@ -11,12 +11,6 @@ dependencies = [
"memchr",
]
[[package]]
name = "ar"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "450575f58f7bee32816abbff470cbc47797397c2a81e0eaced4b98436daf52e1"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -212,10 +206,8 @@ dependencies = [
name = "rustc_codegen_gcc"
version = "0.1.0"
dependencies = [
"ar",
"gccjit",
"lang_tester",
"target-lexicon",
"tempfile",
]
@ -228,12 +220,6 @@ dependencies = [
"winapi-util",
]
[[package]]
name = "target-lexicon"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab0e7238dcc7b40a7be719a25365910f6807bd864f4cce6b2e6b873658e2b19d"
[[package]]
name = "tempfile"
version = "3.2.0"

View File

@ -27,10 +27,6 @@ gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
# Local copy.
#gccjit = { path = "../gccjit.rs" }
target-lexicon = "0.10.0"
ar = "0.8.0"
[dev-dependencies]
lang_tester = "0.3.9"
tempfile = "3.1.0"

View File

@ -1,44 +1,17 @@
use std::fs::File;
use std::path::{Path, PathBuf};
use crate::errors::RanlibFailure;
use rustc_codegen_ssa::back::archive::{ArchiveBuilder, ArchiveBuilderBuilder};
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
};
use rustc_session::Session;
use rustc_session::cstore::DllImport;
struct ArchiveConfig<'a> {
sess: &'a Session,
use_native_ar: bool,
use_gnu_style_archive: bool,
}
#[derive(Debug)]
enum ArchiveEntry {
FromArchive {
archive_index: usize,
entry_index: usize,
},
File(PathBuf),
}
pub struct ArArchiveBuilderBuilder;
pub(crate) struct ArArchiveBuilderBuilder;
impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder<'a> + 'a> {
let config = ArchiveConfig {
sess,
use_native_ar: false,
// FIXME test for linux and System V derivatives instead
use_gnu_style_archive: sess.target.options.archive_format == "gnu",
};
Box::new(ArArchiveBuilder {
config,
src_archives: vec![],
entries: vec![],
})
Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols))
}
fn create_dll_import_lib(
@ -49,144 +22,6 @@ impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
_tmpdir: &Path,
_is_direct_dependency: bool,
) -> PathBuf {
unimplemented!();
}
}
pub struct ArArchiveBuilder<'a> {
config: ArchiveConfig<'a>,
src_archives: Vec<(PathBuf, ar::Archive<File>)>,
// Don't use `HashMap` here, as the order is important. `rust.metadata.bin` must always be at
// the end of an archive for linkers to not get confused.
entries: Vec<(String, ArchiveEntry)>,
}
impl<'a> ArchiveBuilder<'a> for ArArchiveBuilder<'a> {
fn add_file(&mut self, file: &Path) {
self.entries.push((
file.file_name().unwrap().to_str().unwrap().to_string(),
ArchiveEntry::File(file.to_owned()),
));
}
fn add_archive(
&mut self,
archive_path: &Path,
mut skip: Box<dyn FnMut(&str) -> bool + 'static>,
) -> std::io::Result<()> {
let mut archive = ar::Archive::new(std::fs::File::open(&archive_path)?);
let archive_index = self.src_archives.len();
let mut i = 0;
while let Some(entry) = archive.next_entry() {
let entry = entry?;
let file_name = String::from_utf8(entry.header().identifier().to_vec())
.map_err(|err| std::io::Error::new(std::io::ErrorKind::InvalidData, err))?;
if !skip(&file_name) {
self.entries
.push((file_name, ArchiveEntry::FromArchive { archive_index, entry_index: i }));
}
i += 1;
}
self.src_archives.push((archive_path.to_owned(), archive));
Ok(())
}
fn build(mut self: Box<Self>, output: &Path) -> bool {
use std::process::Command;
fn add_file_using_ar(archive: &Path, file: &Path) {
Command::new("ar")
.arg("r") // add or replace file
.arg("-c") // silence created file message
.arg(archive)
.arg(&file)
.status()
.unwrap();
}
enum BuilderKind<'a> {
Bsd(ar::Builder<File>),
Gnu(ar::GnuBuilder<File>),
NativeAr(&'a Path),
}
let mut builder = if self.config.use_native_ar {
BuilderKind::NativeAr(output)
} else if self.config.use_gnu_style_archive {
BuilderKind::Gnu(ar::GnuBuilder::new(
File::create(output).unwrap(),
self.entries
.iter()
.map(|(name, _)| name.as_bytes().to_vec())
.collect(),
))
} else {
BuilderKind::Bsd(ar::Builder::new(File::create(output).unwrap()))
};
let any_members = !self.entries.is_empty();
// Add all files
for (entry_name, entry) in self.entries.into_iter() {
match entry {
ArchiveEntry::FromArchive {
archive_index,
entry_index,
} => {
let (ref src_archive_path, ref mut src_archive) =
self.src_archives[archive_index];
let entry = src_archive.jump_to_entry(entry_index).unwrap();
let header = entry.header().clone();
match builder {
BuilderKind::Bsd(ref mut builder) => {
builder.append(&header, entry).unwrap()
}
BuilderKind::Gnu(ref mut builder) => {
builder.append(&header, entry).unwrap()
}
BuilderKind::NativeAr(archive_file) => {
Command::new("ar")
.arg("x")
.arg(src_archive_path)
.arg(&entry_name)
.status()
.unwrap();
add_file_using_ar(archive_file, Path::new(&entry_name));
std::fs::remove_file(entry_name).unwrap();
}
}
}
ArchiveEntry::File(file) =>
match builder {
BuilderKind::Bsd(ref mut builder) => {
builder
.append_file(entry_name.as_bytes(), &mut File::open(file).expect("file for bsd builder"))
.unwrap()
},
BuilderKind::Gnu(ref mut builder) => {
builder
.append_file(entry_name.as_bytes(), &mut File::open(&file).expect(&format!("file {:?} for gnu builder", file)))
.unwrap()
},
BuilderKind::NativeAr(archive_file) => add_file_using_ar(archive_file, &file),
},
}
}
// Finalize archive
std::mem::drop(builder);
// Run ranlib to be able to link the archive
let status =
std::process::Command::new("ranlib").arg(output).status().expect("Couldn't run ranlib");
if !status.success() {
self.config.sess.emit_fatal(RanlibFailure::new(status.code()));
}
any_members
unimplemented!("creating dll imports is not yet supported");
}
}

View File

@ -16,18 +16,6 @@ impl IntoDiagnosticArg for ExitCode {
}
}
#[derive(Diagnostic)]
#[diag(codegen_gcc_ranlib_failure)]
pub(crate) struct RanlibFailure {
exit_code: ExitCode,
}
impl RanlibFailure {
pub fn new(exit_code: Option<i32>) -> Self {
RanlibFailure { exit_code: ExitCode(exit_code) }
}
}
#[derive(Diagnostic)]
#[diag(codegen_gcc_invalid_monomorphization_basic_integer, code = "E0511")]
pub(crate) struct InvalidMonomorphizationBasicInteger<'a> {
@ -227,7 +215,7 @@ pub(crate) struct InvalidMonomorphizationUnsupportedOperation<'a> {
#[diag(codegen_gcc_linkage_const_or_mut_type)]
pub(crate) struct LinkageConstOrMutType {
#[primary_span]
pub span: Span
pub span: Span,
}
#[derive(Diagnostic)]
@ -238,5 +226,5 @@ pub(crate) struct LTONotSupported;
#[diag(codegen_gcc_unwinding_inline_asm)]
pub(crate) struct UnwindingInlineAsm {
#[primary_span]
pub span: Span
pub span: Span,
}

View File

@ -11,7 +11,7 @@ bitflags = "1.0"
cstr = "0.2"
libc = "0.2"
measureme = "10.0.0"
object = { version = "0.29.0", default-features = false, features = ["std", "read_core", "archive", "coff", "elf", "macho", "pe"] }
object = { version = "0.29.0", default-features = false, features = ["std", "read"] }
tracing = "0.1"
rustc_middle = { path = "../rustc_middle" }
rustc-demangle = "0.1.21"

View File

@ -1,31 +1,30 @@
//! A helper class for dealing with static archives
use std::env;
use std::ffi::{CStr, CString, OsString};
use std::fs;
use std::io::{self, Write};
use std::ffi::{c_char, c_void, CStr, CString, OsString};
use std::io;
use std::mem;
use std::path::{Path, PathBuf};
use std::ptr;
use std::str;
use object::read::macho::FatArch;
use crate::common;
use crate::errors::{
ArchiveBuildFailure, DlltoolFailImportLibrary, ErrorCallingDllTool, ErrorCreatingImportLibrary,
ErrorWritingDEFFile, UnknownArchiveKind,
DlltoolFailImportLibrary, ErrorCallingDllTool, ErrorCreatingImportLibrary, ErrorWritingDEFFile,
};
use crate::llvm::archive_ro::{ArchiveRO, Child};
use crate::llvm::{self, ArchiveKind, LLVMMachineType, LLVMRustCOFFShortExport};
use rustc_codegen_ssa::back::archive::{ArchiveBuilder, ArchiveBuilderBuilder};
use rustc_data_structures::memmap::Mmap;
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, try_extract_macho_fat_archive, ArArchiveBuilder,
ArchiveBuildFailure, ArchiveBuilder, ArchiveBuilderBuilder, UnknownArchiveKind,
};
use rustc_session::cstore::DllImport;
use rustc_session::Session;
/// Helper for adding many files to an archive.
#[must_use = "must call build() to finish building the archive"]
pub struct LlvmArchiveBuilder<'a> {
pub(crate) struct LlvmArchiveBuilder<'a> {
sess: &'a Session,
additions: Vec<Addition>,
}
@ -61,57 +60,6 @@ fn llvm_machine_type(cpu: &str) -> LLVMMachineType {
}
}
fn try_filter_fat_archs(
archs: object::read::Result<&[impl FatArch]>,
target_arch: object::Architecture,
archive_path: &Path,
archive_map_data: &[u8],
) -> io::Result<Option<PathBuf>> {
let archs = archs.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
let desired = match archs.iter().filter(|a| a.architecture() == target_arch).next() {
Some(a) => a,
None => return Ok(None),
};
let (mut new_f, extracted_path) = tempfile::Builder::new()
.suffix(archive_path.file_name().unwrap())
.tempfile()?
.keep()
.unwrap();
new_f.write_all(
desired.data(archive_map_data).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?,
)?;
Ok(Some(extracted_path))
}
fn try_extract_macho_fat_archive(
sess: &Session,
archive_path: &Path,
) -> io::Result<Option<PathBuf>> {
let archive_map = unsafe { Mmap::map(fs::File::open(&archive_path)?)? };
let target_arch = match sess.target.arch.as_ref() {
"aarch64" => object::Architecture::Aarch64,
"x86_64" => object::Architecture::X86_64,
_ => return Ok(None),
};
match object::macho::FatHeader::parse(&*archive_map) {
Ok(h) if h.magic.get(object::endian::BigEndian) == object::macho::FAT_MAGIC => {
let archs = object::macho::FatHeader::parse_arch32(&*archive_map);
try_filter_fat_archs(archs, target_arch, archive_path, &*archive_map)
}
Ok(h) if h.magic.get(object::endian::BigEndian) == object::macho::FAT_MAGIC_64 => {
let archs = object::macho::FatHeader::parse_arch64(&*archive_map);
try_filter_fat_archs(archs, target_arch, archive_path, &*archive_map)
}
// Not a FatHeader at all, just return None.
_ => Ok(None),
}
}
impl<'a> ArchiveBuilder<'a> for LlvmArchiveBuilder<'a> {
fn add_archive(
&mut self,
@ -160,7 +108,11 @@ pub struct LlvmArchiveBuilderBuilder;
impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder<'a> + 'a> {
Box::new(LlvmArchiveBuilder { sess, additions: Vec::new() })
if sess.target.arch == "wasm32" || sess.target.arch == "wasm64" {
Box::new(LlvmArchiveBuilder { sess, additions: Vec::new() })
} else {
Box::new(ArArchiveBuilder::new(sess, get_llvm_object_symbols))
}
}
fn create_dll_import_lib(
@ -309,6 +261,61 @@ impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder {
}
}
// The object crate doesn't know how to get symbols for LLVM bitcode and COFF bigobj files.
// As such we need to use LLVM for them.
#[deny(unsafe_op_in_unsafe_fn)]
fn get_llvm_object_symbols(
buf: &[u8],
f: &mut dyn FnMut(&[u8]) -> io::Result<()>,
) -> io::Result<bool> {
let is_bitcode = unsafe { llvm::LLVMRustIsBitcode(buf.as_ptr(), buf.len()) };
// COFF bigobj file, msvc LTO file or import library. See
// https://github.com/llvm/llvm-project/blob/453f27bc9/llvm/lib/BinaryFormat/Magic.cpp#L38-L51
let is_unsupported_windows_obj_file = buf.get(0..4) == Some(b"\0\0\xFF\xFF");
if is_bitcode || is_unsupported_windows_obj_file {
let mut state = Box::new(f);
let err = unsafe {
llvm::LLVMRustGetSymbols(
buf.as_ptr(),
buf.len(),
&mut *state as *mut &mut _ as *mut c_void,
callback,
error_callback,
)
};
if err.is_null() {
return Ok(true);
} else {
return Err(unsafe { *Box::from_raw(err as *mut io::Error) });
}
unsafe extern "C" fn callback(
state: *mut c_void,
symbol_name: *const c_char,
) -> *mut c_void {
let f = unsafe { &mut *(state as *mut &mut dyn FnMut(&[u8]) -> io::Result<()>) };
match f(unsafe { CStr::from_ptr(symbol_name) }.to_bytes()) {
Ok(()) => std::ptr::null_mut(),
Err(err) => Box::into_raw(Box::new(err)) as *mut c_void,
}
}
unsafe extern "C" fn error_callback(error: *const c_char) -> *mut c_void {
let error = unsafe { CStr::from_ptr(error) };
Box::into_raw(Box::new(io::Error::new(
io::ErrorKind::Other,
format!("LLVM error: {}", error.to_string_lossy()),
))) as *mut c_void
}
} else {
get_native_object_symbols(buf, f)
}
}
impl<'a> LlvmArchiveBuilder<'a> {
fn build_with_llvm(&mut self, output: &Path) -> io::Result<bool> {
let kind = &*self.sess.target.archive_format;

View File

@ -72,12 +72,6 @@ pub(crate) struct LinkageConstOrMutType {
#[diag(codegen_llvm_sanitizer_memtag_requires_mte)]
pub(crate) struct SanitizerMemtagRequiresMte;
#[derive(Diagnostic)]
#[diag(codegen_llvm_archive_build_failure)]
pub(crate) struct ArchiveBuildFailure {
pub error: std::io::Error,
}
#[derive(Diagnostic)]
#[diag(codegen_llvm_error_writing_def_file)]
pub(crate) struct ErrorWritingDEFFile {
@ -97,12 +91,6 @@ pub(crate) struct DlltoolFailImportLibrary<'a> {
pub stderr: Cow<'a, str>,
}
#[derive(Diagnostic)]
#[diag(codegen_llvm_unknown_archive_kind)]
pub(crate) struct UnknownArchiveKind<'a> {
pub kind: &'a str,
}
#[derive(Diagnostic)]
#[diag(codegen_llvm_dynamic_linking_with_lto)]
#[note]

View File

@ -983,6 +983,9 @@ pub type SelfProfileBeforePassCallback =
unsafe extern "C" fn(*mut c_void, *const c_char, *const c_char);
pub type SelfProfileAfterPassCallback = unsafe extern "C" fn(*mut c_void);
pub type GetSymbolsCallback = unsafe extern "C" fn(*mut c_void, *const c_char) -> *mut c_void;
pub type GetSymbolsErrorCallback = unsafe extern "C" fn(*const c_char) -> *mut c_void;
extern "C" {
pub fn LLVMRustInstallFatalErrorHandler();
pub fn LLVMRustDisableSystemDialogsOnCrash();
@ -2474,4 +2477,14 @@ extern "C" {
pub fn LLVMRustGetMangledName(V: &Value, out: &RustString);
pub fn LLVMRustGetElementTypeArgIndex(CallSite: &Value) -> i32;
pub fn LLVMRustIsBitcode(ptr: *const u8, len: usize) -> bool;
pub fn LLVMRustGetSymbols(
buf_ptr: *const u8,
buf_len: usize,
state: *mut c_void,
callback: GetSymbolsCallback,
error_callback: GetSymbolsErrorCallback,
) -> *mut c_void;
}

View File

@ -7,6 +7,7 @@ edition = "2021"
test = false
[dependencies]
ar_archive_writer = "0.1.1"
bitflags = "1.2.1"
cc = "1.0.69"
itertools = "0.10.1"

View File

@ -6,14 +6,19 @@ use rustc_span::symbol::Symbol;
use super::metadata::search_for_section;
pub use ar_archive_writer::get_native_object_symbols;
use ar_archive_writer::{write_archive_to_stream, ArchiveKind, NewArchiveMember};
use object::read::archive::ArchiveFile;
use object::read::macho::FatArch;
use tempfile::Builder as TempFileBuilder;
use std::error::Error;
use std::fs::File;
use std::io;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use crate::errors::ExtractBundledLibsError;
// Re-exporting for rustc_codegen_llvm::back::archive
pub use crate::errors::{ArchiveBuildFailure, ExtractBundledLibsError, UnknownArchiveKind};
pub trait ArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder<'a> + 'a>;
@ -80,3 +85,225 @@ pub trait ArchiveBuilder<'a> {
fn build(self: Box<Self>, output: &Path) -> bool;
}
#[must_use = "must call build() to finish building the archive"]
pub struct ArArchiveBuilder<'a> {
sess: &'a Session,
get_object_symbols:
fn(buf: &[u8], f: &mut dyn FnMut(&[u8]) -> io::Result<()>) -> io::Result<bool>,
src_archives: Vec<(PathBuf, Mmap)>,
// Don't use an `HashMap` here, as the order is important. `lib.rmeta` needs
// to be at the end of an archive in some cases for linkers to not get confused.
entries: Vec<(Vec<u8>, ArchiveEntry)>,
}
#[derive(Debug)]
enum ArchiveEntry {
FromArchive { archive_index: usize, file_range: (u64, u64) },
File(PathBuf),
}
impl<'a> ArArchiveBuilder<'a> {
pub fn new(
sess: &'a Session,
get_object_symbols: fn(
buf: &[u8],
f: &mut dyn FnMut(&[u8]) -> io::Result<()>,
) -> io::Result<bool>,
) -> ArArchiveBuilder<'a> {
ArArchiveBuilder { sess, get_object_symbols, src_archives: vec![], entries: vec![] }
}
}
fn try_filter_fat_archs(
archs: object::read::Result<&[impl FatArch]>,
target_arch: object::Architecture,
archive_path: &Path,
archive_map_data: &[u8],
) -> io::Result<Option<PathBuf>> {
let archs = archs.map_err(|e| io::Error::new(io::ErrorKind::Other, e))?;
let desired = match archs.iter().filter(|a| a.architecture() == target_arch).next() {
Some(a) => a,
None => return Ok(None),
};
let (mut new_f, extracted_path) = tempfile::Builder::new()
.suffix(archive_path.file_name().unwrap())
.tempfile()?
.keep()
.unwrap();
new_f.write_all(
desired.data(archive_map_data).map_err(|e| io::Error::new(io::ErrorKind::Other, e))?,
)?;
Ok(Some(extracted_path))
}
pub fn try_extract_macho_fat_archive(
sess: &Session,
archive_path: &Path,
) -> io::Result<Option<PathBuf>> {
let archive_map = unsafe { Mmap::map(File::open(&archive_path)?)? };
let target_arch = match sess.target.arch.as_ref() {
"aarch64" => object::Architecture::Aarch64,
"x86_64" => object::Architecture::X86_64,
_ => return Ok(None),
};
match object::macho::FatHeader::parse(&*archive_map) {
Ok(h) if h.magic.get(object::endian::BigEndian) == object::macho::FAT_MAGIC => {
let archs = object::macho::FatHeader::parse_arch32(&*archive_map);
try_filter_fat_archs(archs, target_arch, archive_path, &*archive_map)
}
Ok(h) if h.magic.get(object::endian::BigEndian) == object::macho::FAT_MAGIC_64 => {
let archs = object::macho::FatHeader::parse_arch64(&*archive_map);
try_filter_fat_archs(archs, target_arch, archive_path, &*archive_map)
}
// Not a FatHeader at all, just return None.
_ => Ok(None),
}
}
impl<'a> ArchiveBuilder<'a> for ArArchiveBuilder<'a> {
fn add_archive(
&mut self,
archive_path: &Path,
mut skip: Box<dyn FnMut(&str) -> bool + 'static>,
) -> io::Result<()> {
let mut archive_path = archive_path.to_path_buf();
if self.sess.target.llvm_target.contains("-apple-macosx") {
if let Some(new_archive_path) =
try_extract_macho_fat_archive(&self.sess, &archive_path)?
{
archive_path = new_archive_path
}
}
if self.src_archives.iter().any(|archive| archive.0 == archive_path) {
return Ok(());
}
let archive_map = unsafe { Mmap::map(File::open(&archive_path)?)? };
let archive = ArchiveFile::parse(&*archive_map)
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let archive_index = self.src_archives.len();
for entry in archive.members() {
let entry = entry.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
let file_name = String::from_utf8(entry.name().to_vec())
.map_err(|err| io::Error::new(io::ErrorKind::InvalidData, err))?;
if !skip(&file_name) {
self.entries.push((
file_name.into_bytes(),
ArchiveEntry::FromArchive { archive_index, file_range: entry.file_range() },
));
}
}
self.src_archives.push((archive_path.to_owned(), archive_map));
Ok(())
}
/// Adds an arbitrary file to this archive
fn add_file(&mut self, file: &Path) {
self.entries.push((
file.file_name().unwrap().to_str().unwrap().to_string().into_bytes(),
ArchiveEntry::File(file.to_owned()),
));
}
/// Combine the provided files, rlibs, and native libraries into a single
/// `Archive`.
fn build(self: Box<Self>, output: &Path) -> bool {
let sess = self.sess;
match self.build_inner(output) {
Ok(any_members) => any_members,
Err(e) => sess.emit_fatal(ArchiveBuildFailure { error: e }),
}
}
}
impl<'a> ArArchiveBuilder<'a> {
fn build_inner(self, output: &Path) -> io::Result<bool> {
let archive_kind = match &*self.sess.target.archive_format {
"gnu" => ArchiveKind::Gnu,
"bsd" => ArchiveKind::Bsd,
"darwin" => ArchiveKind::Darwin,
"coff" => ArchiveKind::Coff,
kind => {
self.sess.emit_fatal(UnknownArchiveKind { kind });
}
};
let mut entries = Vec::new();
for (entry_name, entry) in self.entries {
let data =
match entry {
ArchiveEntry::FromArchive { archive_index, file_range } => {
let src_archive = &self.src_archives[archive_index];
let data = &src_archive.1
[file_range.0 as usize..file_range.0 as usize + file_range.1 as usize];
Box::new(data) as Box<dyn AsRef<[u8]>>
}
ArchiveEntry::File(file) => unsafe {
Box::new(
Mmap::map(File::open(file).map_err(|err| {
io_error_context("failed to open object file", err)
})?)
.map_err(|err| io_error_context("failed to map object file", err))?,
) as Box<dyn AsRef<[u8]>>
},
};
entries.push(NewArchiveMember {
buf: data,
get_symbols: self.get_object_symbols,
member_name: String::from_utf8(entry_name).unwrap(),
mtime: 0,
uid: 0,
gid: 0,
perms: 0o644,
})
}
// Write to a temporary file first before atomically renaming to the final name.
// This prevents programs (including rustc) from attempting to read a partial archive.
// It also enables writing an archive with the same filename as a dependency on Windows as
// required by a test.
let mut archive_tmpfile = TempFileBuilder::new()
.suffix(".temp-archive")
.tempfile_in(output.parent().unwrap_or_else(|| Path::new("")))
.map_err(|err| io_error_context("couldn't create a temp file", err))?;
write_archive_to_stream(
archive_tmpfile.as_file_mut(),
&entries,
true,
archive_kind,
true,
false,
)?;
let any_entries = !entries.is_empty();
drop(entries);
// Drop src_archives to unmap all input archives, which is necessary if we want to write the
// output archive to the same location as an input archive on Windows.
drop(self.src_archives);
archive_tmpfile
.persist(output)
.map_err(|err| io_error_context("failed to rename archive file", err.error))?;
Ok(any_entries)
}
}
fn io_error_context(context: &str, err: io::Error) -> io::Error {
io::Error::new(io::ErrorKind::Other, format!("{context}: {err}"))
}

View File

@ -534,3 +534,17 @@ pub struct ReadFileError {
#[derive(Diagnostic)]
#[diag(codegen_ssa_unsupported_link_self_contained)]
pub struct UnsupportedLinkSelfContained;
#[derive(Diagnostic)]
#[diag(codegen_ssa_archive_build_failure)]
// Public for rustc_codegen_llvm::back::archive
pub struct ArchiveBuildFailure {
pub error: std::io::Error,
}
#[derive(Diagnostic)]
#[diag(codegen_ssa_unknown_archive_kind)]
// Public for rustc_codegen_llvm::back::archive
pub struct UnknownArchiveKind<'a> {
pub kind: &'a str,
}

View File

@ -40,6 +40,12 @@ impl Deref for Mmap {
}
}
impl AsRef<[u8]> for Mmap {
fn as_ref(&self) -> &[u8] {
&*self.0
}
}
// SAFETY: On architectures other than WASM, mmap is used as backing storage. The address of this
// memory map is stable. On WASM, `Vec<u8>` is used as backing storage. The `Mmap` type doesn't
// export any function that can cause the `Vec` to be re-allocated. As such the address of the

View File

@ -1,6 +1,3 @@
codegen_gcc_ranlib_failure =
Ranlib exited with code {$exit_code}
codegen_gcc_linkage_const_or_mut_type =
must have type `*const T` or `*mut T` due to `#[linkage]` attribute

View File

@ -29,9 +29,6 @@ codegen_llvm_linkage_const_or_mut_type =
codegen_llvm_sanitizer_memtag_requires_mte =
`-Zsanitizer=memtag` requires `-Ctarget-feature=+mte`
codegen_llvm_archive_build_failure =
failed to build archive: {$error}
codegen_llvm_error_writing_def_file =
Error writing .DEF file: {$error}
@ -41,9 +38,6 @@ codegen_llvm_error_calling_dlltool =
codegen_llvm_dlltool_fail_import_library =
Dlltool could not create import library: {$stdout}\n{$stderr}
codegen_llvm_unknown_archive_kind =
Don't know how to build archive of type: {$kind}
codegen_llvm_target_feature_disable_or_enable =
the target features {$features} must all be either enabled or disabled together

View File

@ -186,3 +186,9 @@ codegen_ssa_apple_sdk_error_sdk_path = failed to get {$sdk_name} SDK path: {erro
codegen_ssa_read_file = failed to read file: {message}
codegen_ssa_unsupported_link_self_contained = option `-C link-self-contained` is not supported on this target
codegen_ssa_archive_build_failure =
failed to build archive: {$error}
codegen_ssa_unknown_archive_kind =
Don't know how to build archive of type: {$kind}

View File

@ -222,6 +222,7 @@ fn main() {
.file("llvm-wrapper/RustWrapper.cpp")
.file("llvm-wrapper/ArchiveWrapper.cpp")
.file("llvm-wrapper/CoverageMappingWrapper.cpp")
.file("llvm-wrapper/SymbolWrapper.cpp")
.file("llvm-wrapper/Linker.cpp")
.cpp(true)
.cpp_link_stdlib(None) // we handle this below

View File

@ -1967,3 +1967,7 @@ extern "C" int32_t LLVMRustGetElementTypeArgIndex(LLVMValueRef CallSite) {
#endif
return -1;
}
extern "C" bool LLVMRustIsBitcode(char *ptr, size_t len) {
return identify_magic(StringRef(ptr, len)) == file_magic::bitcode;
}

View File

@ -0,0 +1,96 @@
// Derived from code in LLVM, which is:
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Derived from:
// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/include/llvm/Object/ArchiveWriter.h
// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/lib/Object/ArchiveWriter.cpp
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/ADT/Optional.h"
using namespace llvm;
using namespace llvm::sys;
using namespace llvm::object;
static bool isArchiveSymbol(const object::BasicSymbolRef &S) {
Expected<uint32_t> SymFlagsOrErr = S.getFlags();
if (!SymFlagsOrErr)
// FIXME: Actually report errors helpfully.
report_fatal_error(SymFlagsOrErr.takeError());
if (*SymFlagsOrErr & object::SymbolRef::SF_FormatSpecific)
return false;
if (!(*SymFlagsOrErr & object::SymbolRef::SF_Global))
return false;
if (*SymFlagsOrErr & object::SymbolRef::SF_Undefined)
return false;
return true;
}
typedef void *(*LLVMRustGetSymbolsCallback)(void *, const char *);
typedef void *(*LLVMRustGetSymbolsErrorCallback)(const char *);
// Note: This is implemented in C++ instead of using the C api from Rust as IRObjectFile doesn't
// implement getSymbolName, only printSymbolName, which is inaccessible from the C api.
extern "C" void *LLVMRustGetSymbols(
char *BufPtr, size_t BufLen, void *State, LLVMRustGetSymbolsCallback Callback,
LLVMRustGetSymbolsErrorCallback ErrorCallback) {
std::unique_ptr<MemoryBuffer> Buf =
MemoryBuffer::getMemBuffer(StringRef(BufPtr, BufLen), StringRef("LLVMRustGetSymbolsObject"),
false);
SmallString<0> SymNameBuf;
raw_svector_ostream SymName(SymNameBuf);
// In the scenario when LLVMContext is populated SymbolicFile will contain a
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
std::unique_ptr<object::SymbolicFile> Obj;
const file_magic Type = identify_magic(Buf->getBuffer());
if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) {
return 0;
}
if (Type == file_magic::bitcode) {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
Buf->getMemBufferRef(), file_magic::bitcode, &Context);
if (!ObjOrErr) {
Error E = ObjOrErr.takeError();
SmallString<0> ErrorBuf;
raw_svector_ostream Error(ErrorBuf);
Error << E << '\0';
return ErrorCallback(Error.str().data());
}
Obj = std::move(*ObjOrErr);
} else {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf->getMemBufferRef());
if (!ObjOrErr) {
Error E = ObjOrErr.takeError();
SmallString<0> ErrorBuf;
raw_svector_ostream Error(ErrorBuf);
Error << E << '\0';
return ErrorCallback(Error.str().data());
}
Obj = std::move(*ObjOrErr);
}
for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
continue;
if (Error E = S.printName(SymName)) {
SmallString<0> ErrorBuf;
raw_svector_ostream Error(ErrorBuf);
Error << E << '\0';
return ErrorCallback(Error.str().data());
}
SymName << '\0';
if (void *E = Callback(State, SymNameBuf.str().data())) {
return E;
}
SymNameBuf.clear();
}
return 0;
}

View File

@ -31,18 +31,19 @@ const LICENSES: &[&str] = &[
/// tooling. It is _crucial_ that no exception crates be dependencies
/// of the Rust runtime (std/test).
const EXCEPTIONS: &[(&str, &str)] = &[
("mdbook", "MPL-2.0"), // mdbook
("openssl", "Apache-2.0"), // cargo, mdbook
("colored", "MPL-2.0"), // rustfmt
("ryu", "Apache-2.0 OR BSL-1.0"), // cargo/... (because of serde)
("bytesize", "Apache-2.0"), // cargo
("im-rc", "MPL-2.0+"), // cargo
("sized-chunks", "MPL-2.0+"), // cargo via im-rc
("bitmaps", "MPL-2.0+"), // cargo via im-rc
("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot
("snap", "BSD-3-Clause"), // rustc
("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc
("mdbook", "MPL-2.0"), // mdbook
("openssl", "Apache-2.0"), // cargo, mdbook
("colored", "MPL-2.0"), // rustfmt
("ryu", "Apache-2.0 OR BSL-1.0"), // cargo/... (because of serde)
("bytesize", "Apache-2.0"), // cargo
("im-rc", "MPL-2.0+"), // cargo
("sized-chunks", "MPL-2.0+"), // cargo via im-rc
("bitmaps", "MPL-2.0+"), // cargo via im-rc
("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot
("snap", "BSD-3-Clause"), // rustc
("fluent-langneg", "Apache-2.0"), // rustc (fluent translations)
("self_cell", "Apache-2.0"), // rustc (fluent translations)
("self_cell", "Apache-2.0"), // rustc (fluent translations)
// FIXME: this dependency violates the documentation comment above:
("fortanix-sgx-abi", "MPL-2.0"), // libstd but only for `sgx` target
("dunce", "CC0-1.0"), // cargo (dev dependency)
@ -86,6 +87,7 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
"aho-corasick",
"annotate-snippets",
"ansi_term",
"ar_archive_writer",
"arrayvec",
"atty",
"autocfg",
@ -276,7 +278,6 @@ const PERMITTED_RUSTC_DEPENDENCIES: &[&str] = &[
const PERMITTED_CRANELIFT_DEPENDENCIES: &[&str] = &[
"ahash",
"anyhow",
"ar",
"arrayvec",
"autocfg",
"bumpalo",