Rollup merge of #124033 - bjorn3:ar_archive_writer_0_3_0, r=davidtwco

Sync ar_archive_writer to LLVM 18.1.3

From LLVM 15.0.0-rc3. This adds support for COFF archives containing Arm64EC object files and has various fixes for AIX big archive files.
This commit is contained in:
Trevor Gross 2024-07-16 16:15:13 -05:00 committed by GitHub
commit 63f239c89f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 209 additions and 98 deletions

View File

@ -236,11 +236,11 @@ checksum = "d67af77d68a931ecd5cbd8a3b5987d63a1d1d1278f7f6a60ae33db485cdebb69"
[[package]]
name = "ar_archive_writer"
version = "0.2.0"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a"
checksum = "f8412a2d690663356cba5a2532f3ed55d1e8090743bc6695b88403b27df67733"
dependencies = [
"object 0.32.2",
"object 0.35.0",
]
[[package]]
@ -2648,6 +2648,15 @@ dependencies = [
"ruzstd 0.6.0",
]
[[package]]
name = "object"
version = "0.35.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e"
dependencies = [
"memchr",
]
[[package]]
name = "object"
version = "0.36.0"

View File

@ -1,7 +1,7 @@
use std::path::{Path, PathBuf};
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER,
};
use rustc_session::Session;
@ -9,7 +9,7 @@ pub(crate) struct ArArchiveBuilderBuilder;
impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder + 'a> {
Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols))
Box::new(ArArchiveBuilder::new(sess, &DEFAULT_OBJECT_READER))
}
fn create_dll_import_lib(

View File

@ -1,7 +1,7 @@
use std::path::{Path, PathBuf};
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder,
ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER,
};
use rustc_session::Session;
@ -11,7 +11,7 @@ pub(crate) struct ArArchiveBuilderBuilder;
impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder {
fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box<dyn ArchiveBuilder + 'a> {
Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols))
Box::new(ArArchiveBuilder::new(sess, &DEFAULT_OBJECT_READER))
}
fn create_dll_import_lib(

View File

@ -15,8 +15,8 @@ use crate::errors::{
use crate::llvm::archive_ro::{ArchiveRO, Child};
use crate::llvm::{self, ArchiveKind, LLVMMachineType, LLVMRustCOFFShortExport};
use rustc_codegen_ssa::back::archive::{
get_native_object_symbols, try_extract_macho_fat_archive, ArArchiveBuilder,
ArchiveBuildFailure, ArchiveBuilder, ArchiveBuilderBuilder, UnknownArchiveKind,
try_extract_macho_fat_archive, ArArchiveBuilder, ArchiveBuildFailure, ArchiveBuilder,
ArchiveBuilderBuilder, ObjectReader, UnknownArchiveKind, DEFAULT_OBJECT_READER,
};
use tracing::trace;
@ -115,7 +115,7 @@ impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder {
if true {
Box::new(LlvmArchiveBuilder { sess, additions: Vec::new() })
} else {
Box::new(ArArchiveBuilder::new(sess, get_llvm_object_symbols))
Box::new(ArArchiveBuilder::new(sess, &LLVM_OBJECT_READER))
}
}
@ -291,18 +291,33 @@ impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder {
// The object crate doesn't know how to get symbols for LLVM bitcode and COFF bigobj files.
// As such we need to use LLVM for them.
#[deny(unsafe_op_in_unsafe_fn)]
fn get_llvm_object_symbols(
buf: &[u8],
f: &mut dyn FnMut(&[u8]) -> io::Result<()>,
) -> io::Result<bool> {
static LLVM_OBJECT_READER: ObjectReader = ObjectReader {
get_symbols: get_llvm_object_symbols,
is_64_bit_object_file: llvm_is_64_bit_object_file,
is_ec_object_file: llvm_is_ec_object_file,
get_xcoff_member_alignment: DEFAULT_OBJECT_READER.get_xcoff_member_alignment,
};
fn should_use_llvm_reader(buf: &[u8]) -> bool {
let is_bitcode = unsafe { llvm::LLVMRustIsBitcode(buf.as_ptr(), buf.len()) };
// COFF bigobj file, msvc LTO file or import library. See
// https://github.com/llvm/llvm-project/blob/453f27bc9/llvm/lib/BinaryFormat/Magic.cpp#L38-L51
let is_unsupported_windows_obj_file = buf.get(0..4) == Some(b"\0\0\xFF\xFF");
if is_bitcode || is_unsupported_windows_obj_file {
is_bitcode || is_unsupported_windows_obj_file
}
#[deny(unsafe_op_in_unsafe_fn)]
fn get_llvm_object_symbols(
buf: &[u8],
f: &mut dyn FnMut(&[u8]) -> io::Result<()>,
) -> io::Result<bool> {
if !should_use_llvm_reader(buf) {
return (DEFAULT_OBJECT_READER.get_symbols)(buf, f);
}
let mut state = Box::new(f);
let err = unsafe {
@ -321,10 +336,7 @@ fn get_llvm_object_symbols(
return Err(unsafe { *Box::from_raw(err as *mut io::Error) });
}
unsafe extern "C" fn callback(
state: *mut c_void,
symbol_name: *const c_char,
) -> *mut c_void {
unsafe extern "C" fn callback(state: *mut c_void, symbol_name: *const c_char) -> *mut c_void {
let f = unsafe { &mut *(state as *mut &mut dyn FnMut(&[u8]) -> io::Result<()>) };
match f(unsafe { CStr::from_ptr(symbol_name) }.to_bytes()) {
Ok(()) => std::ptr::null_mut(),
@ -339,9 +351,22 @@ fn get_llvm_object_symbols(
format!("LLVM error: {}", error.to_string_lossy()),
))) as *mut c_void
}
} else {
get_native_object_symbols(buf, f)
}
fn llvm_is_64_bit_object_file(buf: &[u8]) -> bool {
if !should_use_llvm_reader(buf) {
return (DEFAULT_OBJECT_READER.is_64_bit_object_file)(buf);
}
unsafe { llvm::LLVMRustIs64BitSymbolicFile(buf.as_ptr(), buf.len()) }
}
fn llvm_is_ec_object_file(buf: &[u8]) -> bool {
if !should_use_llvm_reader(buf) {
return (DEFAULT_OBJECT_READER.is_ec_object_file)(buf);
}
unsafe { llvm::LLVMRustIsECObject(buf.as_ptr(), buf.len()) }
}
impl<'a> LlvmArchiveBuilder<'a> {

View File

@ -2440,4 +2440,8 @@ extern "C" {
callback: GetSymbolsCallback,
error_callback: GetSymbolsErrorCallback,
) -> *mut c_void;
pub fn LLVMRustIs64BitSymbolicFile(buf_ptr: *const u8, buf_len: usize) -> bool;
pub fn LLVMRustIsECObject(buf_ptr: *const u8, buf_len: usize) -> bool;
}

View File

@ -5,7 +5,7 @@ edition = "2021"
[dependencies]
# tidy-alphabetical-start
ar_archive_writer = "0.2.0"
ar_archive_writer = "0.3.0"
arrayvec = { version = "0.7", default-features = false }
bitflags = "2.4.1"
cc = "1.0.90"

View File

@ -6,8 +6,8 @@ use rustc_span::symbol::Symbol;
use super::metadata::search_for_section;
pub use ar_archive_writer::get_native_object_symbols;
use ar_archive_writer::{write_archive_to_stream, ArchiveKind, NewArchiveMember};
pub use ar_archive_writer::{ObjectReader, DEFAULT_OBJECT_READER};
use object::read::archive::ArchiveFile;
use object::read::macho::FatArch;
use tempfile::Builder as TempFileBuilder;
@ -89,8 +89,7 @@ pub trait ArchiveBuilder {
#[must_use = "must call build() to finish building the archive"]
pub struct ArArchiveBuilder<'a> {
sess: &'a Session,
get_object_symbols:
fn(buf: &[u8], f: &mut dyn FnMut(&[u8]) -> io::Result<()>) -> io::Result<bool>,
object_reader: &'static ObjectReader,
src_archives: Vec<(PathBuf, Mmap)>,
// Don't use an `HashMap` here, as the order is important. `lib.rmeta` needs
@ -105,14 +104,8 @@ enum ArchiveEntry {
}
impl<'a> ArArchiveBuilder<'a> {
pub fn new(
sess: &'a Session,
get_object_symbols: fn(
buf: &[u8],
f: &mut dyn FnMut(&[u8]) -> io::Result<()>,
) -> io::Result<bool>,
) -> ArArchiveBuilder<'a> {
ArArchiveBuilder { sess, get_object_symbols, src_archives: vec![], entries: vec![] }
pub fn new(sess: &'a Session, object_reader: &'static ObjectReader) -> ArArchiveBuilder<'a> {
ArArchiveBuilder { sess, object_reader, src_archives: vec![], entries: vec![] }
}
}
@ -267,7 +260,7 @@ impl<'a> ArArchiveBuilder<'a> {
entries.push(NewArchiveMember {
buf: data,
get_symbols: self.get_object_symbols,
object_reader: self.object_reader,
member_name: String::from_utf8(entry_name).unwrap(),
mtime: 0,
uid: 0,
@ -294,7 +287,13 @@ impl<'a> ArArchiveBuilder<'a> {
let mut archive_tmpfile = File::create_new(&archive_tmpfile_path)
.map_err(|err| io_error_context("couldn't create the temp file", err))?;
write_archive_to_stream(&mut archive_tmpfile, &entries, archive_kind, false)?;
write_archive_to_stream(
&mut archive_tmpfile,
&entries,
archive_kind,
false,
/* is_ec = */ self.sess.target.arch == "arm64ec",
)?;
let any_entries = !entries.is_empty();
drop(entries);

View File

@ -4,12 +4,16 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
// Derived from:
// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/include/llvm/Object/ArchiveWriter.h
// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/lib/Object/ArchiveWriter.cpp
// * https://github.com/llvm/llvm-project/blob/ef6d1ec07c693352c4a60dd58db08d2d8558f6ea/llvm/include/llvm/Object/ArchiveWriter.h
// * https://github.com/llvm/llvm-project/blob/ef6d1ec07c693352c4a60dd58db08d2d8558f6ea/llvm/lib/Object/ArchiveWriter.cpp
#include "LLVMWrapper.h"
#include "SuppressLLVMWarnings.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/COFF.h"
#include "llvm/Object/COFFImportFile.h"
#include "llvm/Object/IRObjectFile.h"
#include "llvm/Object/ObjectFile.h"
#include <llvm/Support/raw_ostream.h>
@ -34,6 +38,27 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) {
typedef void *(*LLVMRustGetSymbolsCallback)(void *, const char *);
typedef void *(*LLVMRustGetSymbolsErrorCallback)(const char *);
// This function is copied from ArchiveWriter.cpp.
static Expected<std::unique_ptr<SymbolicFile>>
getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) {
const file_magic Type = identify_magic(Buf.getBuffer());
// Don't attempt to read non-symbolic file types.
if (!object::SymbolicFile::isSymbolicFile(Type, &Context))
return nullptr;
if (Type == file_magic::bitcode) {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
Buf, file_magic::bitcode, &Context);
if (!ObjOrErr)
return ObjOrErr.takeError();
return std::move(*ObjOrErr);
} else {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf);
if (!ObjOrErr)
return ObjOrErr.takeError();
return std::move(*ObjOrErr);
}
}
// Note: This is implemented in C++ instead of using the C api from Rust as
// IRObjectFile doesn't implement getSymbolName, only printSymbolName, which is
// inaccessible from the C api.
@ -49,16 +74,8 @@ LLVMRustGetSymbols(char *BufPtr, size_t BufLen, void *State,
// In the scenario when LLVMContext is populated SymbolicFile will contain a
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
std::unique_ptr<object::SymbolicFile> Obj;
const file_magic Type = identify_magic(Buf->getBuffer());
if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) {
return 0;
}
if (Type == file_magic::bitcode) {
auto ObjOrErr = object::SymbolicFile::createSymbolicFile(
Buf->getMemBufferRef(), file_magic::bitcode, &Context);
Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
getSymbolicFile(Buf->getMemBufferRef(), Context);
if (!ObjOrErr) {
Error E = ObjOrErr.takeError();
SmallString<0> ErrorBuf;
@ -66,19 +83,7 @@ LLVMRustGetSymbols(char *BufPtr, size_t BufLen, void *State,
Error << E << '\0';
return ErrorCallback(Error.str().data());
}
Obj = std::move(*ObjOrErr);
} else {
auto ObjOrErr =
object::SymbolicFile::createSymbolicFile(Buf->getMemBufferRef());
if (!ObjOrErr) {
Error E = ObjOrErr.takeError();
SmallString<0> ErrorBuf;
auto Error = raw_svector_ostream(ErrorBuf);
Error << E << '\0';
return ErrorCallback(Error.str().data());
}
Obj = std::move(*ObjOrErr);
}
std::unique_ptr<object::SymbolicFile> Obj = std::move(*ObjOrErr);
for (const object::BasicSymbolRef &S : Obj->symbols()) {
if (!isArchiveSymbol(S))
@ -97,3 +102,72 @@ LLVMRustGetSymbols(char *BufPtr, size_t BufLen, void *State,
}
return 0;
}
// Encoding true and false as invalid pointer values
#define TRUE_PTR (void *)1
#define FALSE_PTR (void *)0
extern "C" bool LLVMRustIs64BitSymbolicFile(char *BufPtr, size_t BufLen) {
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(
StringRef(BufPtr, BufLen), StringRef("LLVMRustGetSymbolsObject"), false);
SmallString<0> SymNameBuf;
auto SymName = raw_svector_ostream(SymNameBuf);
// Code starting from this line is copied from s64BitSymbolicFile in
// ArchiveWriter.cpp.
// In the scenario when LLVMContext is populated SymbolicFile will contain a
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
getSymbolicFile(Buf->getMemBufferRef(), Context);
if (!ObjOrErr) {
return false;
}
std::unique_ptr<object::SymbolicFile> Obj = std::move(*ObjOrErr);
return Obj != nullptr ? Obj->is64Bit() : false;
}
extern "C" bool LLVMRustIsECObject(char *BufPtr, size_t BufLen) {
std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(
StringRef(BufPtr, BufLen), StringRef("LLVMRustGetSymbolsObject"), false);
SmallString<0> SymNameBuf;
auto SymName = raw_svector_ostream(SymNameBuf);
// In the scenario when LLVMContext is populated SymbolicFile will contain a
// reference to it, thus SymbolicFile should be destroyed first.
LLVMContext Context;
Expected<std::unique_ptr<object::SymbolicFile>> ObjOrErr =
getSymbolicFile(Buf->getMemBufferRef(), Context);
if (!ObjOrErr) {
return false;
}
std::unique_ptr<object::SymbolicFile> Obj = std::move(*ObjOrErr);
if (Obj == nullptr) {
return false;
}
// Code starting from this line is copied from isECObject in
// ArchiveWriter.cpp with an extra #if to work with LLVM 17.
if (Obj->isCOFF())
return cast<llvm::object::COFFObjectFile>(&*Obj)->getMachine() !=
COFF::IMAGE_FILE_MACHINE_ARM64;
#if LLVM_VERSION_GE(18, 0)
if (Obj->isCOFFImportFile())
return cast<llvm::object::COFFImportFile>(&*Obj)->getMachine() !=
COFF::IMAGE_FILE_MACHINE_ARM64;
#endif
if (Obj->isIR()) {
Expected<std::string> TripleStr =
getBitcodeTargetTriple(Obj->getMemoryBufferRef());
if (!TripleStr)
return false;
Triple T(*TripleStr);
return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64;
}
return false;
}