From 58e551433d692579faf4ad59fb7f192aa9001677 Mon Sep 17 00:00:00 2001 From: bjorn3 <17426603+bjorn3@users.noreply.github.com> Date: Tue, 16 Apr 2024 18:31:43 +0000 Subject: [PATCH] Sync ar_archive_writer to LLVM 18.1.3 From LLVM 15.0.0-rc3. This adds support for COFF archives containing Arm64EC object files and has various fixes for AIX big archive files. --- Cargo.lock | 15 +- .../rustc_codegen_cranelift/src/archive.rs | 4 +- compiler/rustc_codegen_gcc/src/archive.rs | 4 +- .../rustc_codegen_llvm/src/back/archive.rs | 117 +++++++++------- compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 4 + compiler/rustc_codegen_ssa/Cargo.toml | 2 +- .../rustc_codegen_ssa/src/back/archive.rs | 25 ++-- .../rustc_llvm/llvm-wrapper/SymbolWrapper.cpp | 128 +++++++++++++----- 8 files changed, 201 insertions(+), 98 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afeb9faec09..f3c6a1d5eb4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -236,11 +236,11 @@ checksum = "d67af77d68a931ecd5cbd8a3b5987d63a1d1d1278f7f6a60ae33db485cdebb69" [[package]] name = "ar_archive_writer" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0c269894b6fe5e9d7ada0cf69b5bf847ff35bc25fc271f08e1d080fce80339a" +checksum = "f8412a2d690663356cba5a2532f3ed55d1e8090743bc6695b88403b27df67733" dependencies = [ - "object 0.32.2", + "object 0.35.0", ] [[package]] @@ -2637,6 +2637,15 @@ dependencies = [ "ruzstd 0.6.0", ] +[[package]] +name = "object" +version = "0.35.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8ec7ab813848ba4522158d5517a6093db1ded27575b070f4177b8d12b41db5e" +dependencies = [ + "memchr", +] + [[package]] name = "object" version = "0.36.0" diff --git a/compiler/rustc_codegen_cranelift/src/archive.rs b/compiler/rustc_codegen_cranelift/src/archive.rs index 414d3db1c51..26db93a7579 100644 --- a/compiler/rustc_codegen_cranelift/src/archive.rs +++ b/compiler/rustc_codegen_cranelift/src/archive.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use rustc_codegen_ssa::back::archive::{ - get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, + ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER, }; use rustc_session::Session; @@ -9,7 +9,7 @@ pub(crate) struct ArArchiveBuilderBuilder; impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box { - Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols)) + Box::new(ArArchiveBuilder::new(sess, &DEFAULT_OBJECT_READER)) } fn create_dll_import_lib( diff --git a/compiler/rustc_codegen_gcc/src/archive.rs b/compiler/rustc_codegen_gcc/src/archive.rs index 73ff0c37b66..21676f5dbb6 100644 --- a/compiler/rustc_codegen_gcc/src/archive.rs +++ b/compiler/rustc_codegen_gcc/src/archive.rs @@ -1,7 +1,7 @@ use std::path::{Path, PathBuf}; use rustc_codegen_ssa::back::archive::{ - get_native_object_symbols, ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, + ArArchiveBuilder, ArchiveBuilder, ArchiveBuilderBuilder, DEFAULT_OBJECT_READER, }; use rustc_session::Session; @@ -11,7 +11,7 @@ pub(crate) struct ArArchiveBuilderBuilder; impl ArchiveBuilderBuilder for ArArchiveBuilderBuilder { fn new_archive_builder<'a>(&self, sess: &'a Session) -> Box { - Box::new(ArArchiveBuilder::new(sess, get_native_object_symbols)) + Box::new(ArArchiveBuilder::new(sess, &DEFAULT_OBJECT_READER)) } fn create_dll_import_lib( diff --git a/compiler/rustc_codegen_llvm/src/back/archive.rs b/compiler/rustc_codegen_llvm/src/back/archive.rs index a354f3d3536..f46c6b1c498 100644 --- a/compiler/rustc_codegen_llvm/src/back/archive.rs +++ b/compiler/rustc_codegen_llvm/src/back/archive.rs @@ -15,8 +15,8 @@ use crate::errors::{ use crate::llvm::archive_ro::{ArchiveRO, Child}; use crate::llvm::{self, ArchiveKind, LLVMMachineType, LLVMRustCOFFShortExport}; use rustc_codegen_ssa::back::archive::{ - get_native_object_symbols, try_extract_macho_fat_archive, ArArchiveBuilder, - ArchiveBuildFailure, ArchiveBuilder, ArchiveBuilderBuilder, UnknownArchiveKind, + try_extract_macho_fat_archive, ArArchiveBuilder, ArchiveBuildFailure, ArchiveBuilder, + ArchiveBuilderBuilder, ObjectReader, UnknownArchiveKind, DEFAULT_OBJECT_READER, }; use tracing::trace; @@ -115,7 +115,7 @@ impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder { if true { Box::new(LlvmArchiveBuilder { sess, additions: Vec::new() }) } else { - Box::new(ArArchiveBuilder::new(sess, get_llvm_object_symbols)) + Box::new(ArArchiveBuilder::new(sess, &LLVM_OBJECT_READER)) } } @@ -291,57 +291,82 @@ impl ArchiveBuilderBuilder for LlvmArchiveBuilderBuilder { // The object crate doesn't know how to get symbols for LLVM bitcode and COFF bigobj files. // As such we need to use LLVM for them. -#[deny(unsafe_op_in_unsafe_fn)] -fn get_llvm_object_symbols( - buf: &[u8], - f: &mut dyn FnMut(&[u8]) -> io::Result<()>, -) -> io::Result { + +static LLVM_OBJECT_READER: ObjectReader = ObjectReader { + get_symbols: get_llvm_object_symbols, + is_64_bit_object_file: llvm_is_64_bit_object_file, + is_ec_object_file: llvm_is_ec_object_file, + get_xcoff_member_alignment: DEFAULT_OBJECT_READER.get_xcoff_member_alignment, +}; + +fn should_use_llvm_reader(buf: &[u8]) -> bool { let is_bitcode = unsafe { llvm::LLVMRustIsBitcode(buf.as_ptr(), buf.len()) }; // COFF bigobj file, msvc LTO file or import library. See // https://github.com/llvm/llvm-project/blob/453f27bc9/llvm/lib/BinaryFormat/Magic.cpp#L38-L51 let is_unsupported_windows_obj_file = buf.get(0..4) == Some(b"\0\0\xFF\xFF"); - if is_bitcode || is_unsupported_windows_obj_file { - let mut state = Box::new(f); + is_bitcode || is_unsupported_windows_obj_file +} - let err = unsafe { - llvm::LLVMRustGetSymbols( - buf.as_ptr(), - buf.len(), - std::ptr::addr_of_mut!(*state) as *mut c_void, - callback, - error_callback, - ) - }; - - if err.is_null() { - return Ok(true); - } else { - return Err(unsafe { *Box::from_raw(err as *mut io::Error) }); - } - - unsafe extern "C" fn callback( - state: *mut c_void, - symbol_name: *const c_char, - ) -> *mut c_void { - let f = unsafe { &mut *(state as *mut &mut dyn FnMut(&[u8]) -> io::Result<()>) }; - match f(unsafe { CStr::from_ptr(symbol_name) }.to_bytes()) { - Ok(()) => std::ptr::null_mut(), - Err(err) => Box::into_raw(Box::new(err)) as *mut c_void, - } - } - - unsafe extern "C" fn error_callback(error: *const c_char) -> *mut c_void { - let error = unsafe { CStr::from_ptr(error) }; - Box::into_raw(Box::new(io::Error::new( - io::ErrorKind::Other, - format!("LLVM error: {}", error.to_string_lossy()), - ))) as *mut c_void - } - } else { - get_native_object_symbols(buf, f) +#[deny(unsafe_op_in_unsafe_fn)] +fn get_llvm_object_symbols( + buf: &[u8], + f: &mut dyn FnMut(&[u8]) -> io::Result<()>, +) -> io::Result { + if !should_use_llvm_reader(buf) { + return (DEFAULT_OBJECT_READER.get_symbols)(buf, f); } + + let mut state = Box::new(f); + + let err = unsafe { + llvm::LLVMRustGetSymbols( + buf.as_ptr(), + buf.len(), + std::ptr::addr_of_mut!(*state) as *mut c_void, + callback, + error_callback, + ) + }; + + if err.is_null() { + return Ok(true); + } else { + return Err(unsafe { *Box::from_raw(err as *mut io::Error) }); + } + + unsafe extern "C" fn callback(state: *mut c_void, symbol_name: *const c_char) -> *mut c_void { + let f = unsafe { &mut *(state as *mut &mut dyn FnMut(&[u8]) -> io::Result<()>) }; + match f(unsafe { CStr::from_ptr(symbol_name) }.to_bytes()) { + Ok(()) => std::ptr::null_mut(), + Err(err) => Box::into_raw(Box::new(err)) as *mut c_void, + } + } + + unsafe extern "C" fn error_callback(error: *const c_char) -> *mut c_void { + let error = unsafe { CStr::from_ptr(error) }; + Box::into_raw(Box::new(io::Error::new( + io::ErrorKind::Other, + format!("LLVM error: {}", error.to_string_lossy()), + ))) as *mut c_void + } +} + +fn llvm_is_64_bit_object_file(buf: &[u8]) -> bool { + if !should_use_llvm_reader(buf) { + return (DEFAULT_OBJECT_READER.is_64_bit_object_file)(buf); + } + + unsafe { llvm::LLVMRustIs64BitSymbolicFile(buf.as_ptr(), buf.len()) } +} + +fn llvm_is_ec_object_file(buf: &[u8]) -> bool { + if !should_use_llvm_reader(buf) { + return (DEFAULT_OBJECT_READER.is_ec_object_file)(buf); + } + + unsafe { llvm::LLVMRustIsECObject(buf.as_ptr(), buf.len()) } } impl<'a> LlvmArchiveBuilder<'a> { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 08e9e312827..f56a8036c65 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2440,4 +2440,8 @@ extern "C" { callback: GetSymbolsCallback, error_callback: GetSymbolsErrorCallback, ) -> *mut c_void; + + pub fn LLVMRustIs64BitSymbolicFile(buf_ptr: *const u8, buf_len: usize) -> bool; + + pub fn LLVMRustIsECObject(buf_ptr: *const u8, buf_len: usize) -> bool; } diff --git a/compiler/rustc_codegen_ssa/Cargo.toml b/compiler/rustc_codegen_ssa/Cargo.toml index 3771fc6b0a2..f7b5b0f310b 100644 --- a/compiler/rustc_codegen_ssa/Cargo.toml +++ b/compiler/rustc_codegen_ssa/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" [dependencies] # tidy-alphabetical-start -ar_archive_writer = "0.2.0" +ar_archive_writer = "0.3.0" arrayvec = { version = "0.7", default-features = false } bitflags = "2.4.1" cc = "1.0.90" diff --git a/compiler/rustc_codegen_ssa/src/back/archive.rs b/compiler/rustc_codegen_ssa/src/back/archive.rs index c99118f5156..f673675bdbb 100644 --- a/compiler/rustc_codegen_ssa/src/back/archive.rs +++ b/compiler/rustc_codegen_ssa/src/back/archive.rs @@ -6,8 +6,8 @@ use rustc_span::symbol::Symbol; use super::metadata::search_for_section; -pub use ar_archive_writer::get_native_object_symbols; use ar_archive_writer::{write_archive_to_stream, ArchiveKind, NewArchiveMember}; +pub use ar_archive_writer::{ObjectReader, DEFAULT_OBJECT_READER}; use object::read::archive::ArchiveFile; use object::read::macho::FatArch; use tempfile::Builder as TempFileBuilder; @@ -89,8 +89,7 @@ pub trait ArchiveBuilder { #[must_use = "must call build() to finish building the archive"] pub struct ArArchiveBuilder<'a> { sess: &'a Session, - get_object_symbols: - fn(buf: &[u8], f: &mut dyn FnMut(&[u8]) -> io::Result<()>) -> io::Result, + object_reader: &'static ObjectReader, src_archives: Vec<(PathBuf, Mmap)>, // Don't use an `HashMap` here, as the order is important. `lib.rmeta` needs @@ -105,14 +104,8 @@ enum ArchiveEntry { } impl<'a> ArArchiveBuilder<'a> { - pub fn new( - sess: &'a Session, - get_object_symbols: fn( - buf: &[u8], - f: &mut dyn FnMut(&[u8]) -> io::Result<()>, - ) -> io::Result, - ) -> ArArchiveBuilder<'a> { - ArArchiveBuilder { sess, get_object_symbols, src_archives: vec![], entries: vec![] } + pub fn new(sess: &'a Session, object_reader: &'static ObjectReader) -> ArArchiveBuilder<'a> { + ArArchiveBuilder { sess, object_reader, src_archives: vec![], entries: vec![] } } } @@ -267,7 +260,7 @@ impl<'a> ArArchiveBuilder<'a> { entries.push(NewArchiveMember { buf: data, - get_symbols: self.get_object_symbols, + object_reader: self.object_reader, member_name: String::from_utf8(entry_name).unwrap(), mtime: 0, uid: 0, @@ -294,7 +287,13 @@ impl<'a> ArArchiveBuilder<'a> { let mut archive_tmpfile = File::create_new(&archive_tmpfile_path) .map_err(|err| io_error_context("couldn't create the temp file", err))?; - write_archive_to_stream(&mut archive_tmpfile, &entries, archive_kind, false)?; + write_archive_to_stream( + &mut archive_tmpfile, + &entries, + archive_kind, + false, + self.sess.target.arch == "arm64ec", + )?; let any_entries = !entries.is_empty(); drop(entries); diff --git a/compiler/rustc_llvm/llvm-wrapper/SymbolWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/SymbolWrapper.cpp index a9d1362a338..d13dcb137a1 100644 --- a/compiler/rustc_llvm/llvm-wrapper/SymbolWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/SymbolWrapper.cpp @@ -4,12 +4,15 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // Derived from: -// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/include/llvm/Object/ArchiveWriter.h -// * https://github.com/llvm/llvm-project/blob/8ef3e895ad8ab1724e2b87cabad1dacdc7a397a3/llvm/lib/Object/ArchiveWriter.cpp +// * https://github.com/llvm/llvm-project/blob/ef6d1ec07c693352c4a60dd58db08d2d8558f6ea/llvm/include/llvm/Object/ArchiveWriter.h +// * https://github.com/llvm/llvm-project/blob/ef6d1ec07c693352c4a60dd58db08d2d8558f6ea/llvm/lib/Object/ArchiveWriter.cpp #include "SuppressLLVMWarnings.h" #include "llvm/ADT/SmallString.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/Object/COFF.h" +#include "llvm/Object/COFFImportFile.h" +#include "llvm/Object/IRObjectFile.h" #include "llvm/Object/ObjectFile.h" #include @@ -34,6 +37,26 @@ static bool isArchiveSymbol(const object::BasicSymbolRef &S) { typedef void *(*LLVMRustGetSymbolsCallback)(void *, const char *); typedef void *(*LLVMRustGetSymbolsErrorCallback)(const char *); +static Expected> +getSymbolicFile(MemoryBufferRef Buf, LLVMContext &Context) { + const file_magic Type = identify_magic(Buf.getBuffer()); + // Don't attempt to read non-symbolic file types. + if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) + return nullptr; + if (Type == file_magic::bitcode) { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile( + Buf, file_magic::bitcode, &Context); + if (!ObjOrErr) + return ObjOrErr.takeError(); + return std::move(*ObjOrErr); + } else { + auto ObjOrErr = object::SymbolicFile::createSymbolicFile(Buf); + if (!ObjOrErr) + return ObjOrErr.takeError(); + return std::move(*ObjOrErr); + } +} + // Note: This is implemented in C++ instead of using the C api from Rust as // IRObjectFile doesn't implement getSymbolName, only printSymbolName, which is // inaccessible from the C api. @@ -49,36 +72,16 @@ LLVMRustGetSymbols(char *BufPtr, size_t BufLen, void *State, // In the scenario when LLVMContext is populated SymbolicFile will contain a // reference to it, thus SymbolicFile should be destroyed first. LLVMContext Context; - std::unique_ptr Obj; - - const file_magic Type = identify_magic(Buf->getBuffer()); - if (!object::SymbolicFile::isSymbolicFile(Type, &Context)) { - return 0; - } - - if (Type == file_magic::bitcode) { - auto ObjOrErr = object::SymbolicFile::createSymbolicFile( - Buf->getMemBufferRef(), file_magic::bitcode, &Context); - if (!ObjOrErr) { - Error E = ObjOrErr.takeError(); - SmallString<0> ErrorBuf; - auto Error = raw_svector_ostream(ErrorBuf); - Error << E << '\0'; - return ErrorCallback(Error.str().data()); - } - Obj = std::move(*ObjOrErr); - } else { - auto ObjOrErr = - object::SymbolicFile::createSymbolicFile(Buf->getMemBufferRef()); - if (!ObjOrErr) { - Error E = ObjOrErr.takeError(); - SmallString<0> ErrorBuf; - auto Error = raw_svector_ostream(ErrorBuf); - Error << E << '\0'; - return ErrorCallback(Error.str().data()); - } - Obj = std::move(*ObjOrErr); + Expected> ObjOrErr = + getSymbolicFile(Buf->getMemBufferRef(), Context); + if (!ObjOrErr) { + Error E = ObjOrErr.takeError(); + SmallString<0> ErrorBuf; + auto Error = raw_svector_ostream(ErrorBuf); + Error << E << '\0'; + return ErrorCallback(Error.str().data()); } + std::unique_ptr Obj = std::move(*ObjOrErr); for (const object::BasicSymbolRef &S : Obj->symbols()) { if (!isArchiveSymbol(S)) @@ -97,3 +100,66 @@ LLVMRustGetSymbols(char *BufPtr, size_t BufLen, void *State, } return 0; } + +// Encoding true and false as invalid pointer values +#define TRUE_PTR (void *)1 +#define FALSE_PTR (void *)0 + +extern "C" bool LLVMRustIs64BitSymbolicFile(char *BufPtr, size_t BufLen) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer( + StringRef(BufPtr, BufLen), StringRef("LLVMRustGetSymbolsObject"), false); + SmallString<0> SymNameBuf; + auto SymName = raw_svector_ostream(SymNameBuf); + + // In the scenario when LLVMContext is populated SymbolicFile will contain a + // reference to it, thus SymbolicFile should be destroyed first. + LLVMContext Context; + Expected> ObjOrErr = + getSymbolicFile(Buf->getMemBufferRef(), Context); + if (!ObjOrErr) { + return false; + } + std::unique_ptr Obj = std::move(*ObjOrErr); + + return Obj != nullptr ? Obj->is64Bit() : false; +} + +extern "C" bool LLVMRustIsECObject(char *BufPtr, size_t BufLen) { + std::unique_ptr Buf = MemoryBuffer::getMemBuffer( + StringRef(BufPtr, BufLen), StringRef("LLVMRustGetSymbolsObject"), false); + SmallString<0> SymNameBuf; + auto SymName = raw_svector_ostream(SymNameBuf); + + // In the scenario when LLVMContext is populated SymbolicFile will contain a + // reference to it, thus SymbolicFile should be destroyed first. + LLVMContext Context; + Expected> ObjOrErr = + getSymbolicFile(Buf->getMemBufferRef(), Context); + if (!ObjOrErr) { + return false; + } + std::unique_ptr Obj = std::move(*ObjOrErr); + + if (Obj == nullptr) { + return false; + } + + if (Obj->isCOFF()) + return cast(&*Obj)->getMachine() != + COFF::IMAGE_FILE_MACHINE_ARM64; + + if (Obj->isCOFFImportFile()) + return cast(&*Obj)->getMachine() != + COFF::IMAGE_FILE_MACHINE_ARM64; + + if (Obj->isIR()) { + Expected TripleStr = + getBitcodeTargetTriple(Obj->getMemoryBufferRef()); + if (!TripleStr) + return false; + Triple T(*TripleStr); + return T.isWindowsArm64EC() || T.getArch() == Triple::x86_64; + } + + return false; +}