Auto merge of #132549 - Zalathar:rust-string, r=cuviper

Make `RustString` an extern type to avoid `improper_ctypes` warnings

Currently, any FFI function that uses `&RustString` needs to also add `#[ignore(improper_ctypes)]` to silence a warning.

The warning is not _completely_ bogus, because `RustString` contains `Vec<u8>` and therefore does not have a guaranteed layout. But we have no way of telling the lint that this doesn't matter, because the C++ code only uses that pointer opaquely and never relies on its underlying layout.

Ideally there would be some way to silence `improper_ctypes` at the type-definition site. But because there isn't, casting to and from a separate extern type is better than having to annotate every single use site.
This commit is contained in:
bors 2024-11-09 04:43:51 +00:00
commit 012ae13d6a
6 changed files with 62 additions and 49 deletions

View File

@ -1766,11 +1766,9 @@ unsafe extern "C" {
pub fn LLVMRustGetLastError() -> *const c_char;
/// Prints the timing information collected by `-Ztime-llvm-passes`.
#[expect(improper_ctypes)]
pub(crate) fn LLVMRustPrintPassTimings(OutStr: &RustString);
/// Prints the statistics collected by `-Zprint-codegen-stats`.
#[expect(improper_ctypes)]
pub(crate) fn LLVMRustPrintStatistics(OutStr: &RustString);
/// Prepares inline assembly.
@ -1791,7 +1789,6 @@ unsafe extern "C" {
ConstraintsLen: size_t,
) -> bool;
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustCoverageWriteFilenamesToBuffer(
Filenames: *const *const c_char,
FilenamesLen: size_t,
@ -1800,7 +1797,6 @@ unsafe extern "C" {
BufferOut: &RustString,
);
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustCoverageWriteFunctionMappingsToBuffer(
VirtualFileMappingIDs: *const c_uint,
NumVirtualFileMappingIDs: size_t,
@ -1824,13 +1820,10 @@ unsafe extern "C" {
) -> &Value;
pub(crate) fn LLVMRustCoverageHashBytes(Bytes: *const c_char, NumBytes: size_t) -> u64;
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustCoverageWriteCovmapSectionNameToString(M: &Module, OutStr: &RustString);
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustCoverageWriteCovfunSectionNameToString(M: &Module, OutStr: &RustString);
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustCoverageWriteCovmapVarNameToString(OutStr: &RustString);
pub(crate) fn LLVMRustCoverageMappingVersion() -> u32;
@ -2185,14 +2178,11 @@ unsafe extern "C" {
pub fn LLVMRustDIBuilderCreateOpPlusUconst() -> u64;
pub fn LLVMRustDIBuilderCreateOpLLVMFragment() -> u64;
#[allow(improper_ctypes)]
pub fn LLVMRustWriteTypeToString(Type: &Type, s: &RustString);
#[allow(improper_ctypes)]
pub fn LLVMRustWriteValueToString(value_ref: &Value, s: &RustString);
pub fn LLVMRustHasFeature(T: &TargetMachine, s: *const c_char) -> bool;
#[allow(improper_ctypes)]
pub(crate) fn LLVMRustPrintTargetCPUs(TM: &TargetMachine, OutStr: &RustString);
pub fn LLVMRustGetTargetFeaturesCount(T: &TargetMachine) -> size_t;
pub fn LLVMRustGetTargetFeature(
@ -2297,10 +2287,8 @@ unsafe extern "C" {
pub fn LLVMRustArchiveIteratorFree<'a>(AIR: &'a mut ArchiveIterator<'a>);
pub fn LLVMRustDestroyArchive(AR: &'static mut Archive);
#[allow(improper_ctypes)]
pub fn LLVMRustWriteTwineToString(T: &Twine, s: &RustString);
#[allow(improper_ctypes)]
pub fn LLVMRustUnpackOptimizationDiagnostic<'a>(
DI: &'a DiagnosticInfo,
pass_name_out: &RustString,
@ -2318,7 +2306,6 @@ unsafe extern "C" {
message_out: &mut Option<&'a Twine>,
);
#[allow(improper_ctypes)]
pub fn LLVMRustWriteDiagnosticInfoToString(DI: &DiagnosticInfo, s: &RustString);
pub fn LLVMRustGetDiagInfoKind(DI: &DiagnosticInfo) -> DiagnosticKind;
@ -2327,7 +2314,6 @@ unsafe extern "C" {
cookie_out: &mut c_uint,
) -> &'a SMDiagnostic;
#[allow(improper_ctypes)]
pub fn LLVMRustUnpackSMDiagnostic(
d: &SMDiagnostic,
message_out: &RustString,
@ -2374,7 +2360,6 @@ unsafe extern "C" {
pub fn LLVMRustModuleBufferLen(p: &ModuleBuffer) -> usize;
pub fn LLVMRustModuleBufferFree(p: &'static mut ModuleBuffer);
pub fn LLVMRustModuleCost(M: &Module) -> u64;
#[allow(improper_ctypes)]
pub fn LLVMRustModuleInstructionStats(M: &Module, Str: &RustString);
pub fn LLVMRustThinLTOBufferCreate(
@ -2427,7 +2412,6 @@ unsafe extern "C" {
bytecode_len: usize,
) -> bool;
pub fn LLVMRustLinkerFree<'a>(linker: &'a mut Linker<'a>);
#[allow(improper_ctypes)]
pub fn LLVMRustComputeLTOCacheKey(
key_out: &RustString,
mod_id: *const c_char,
@ -2450,7 +2434,6 @@ unsafe extern "C" {
pgo_available: bool,
);
#[allow(improper_ctypes)]
pub fn LLVMRustGetMangledName(V: &Value, out: &RustString);
pub fn LLVMRustGetElementTypeArgIndex(CallSite: &Value) -> i32;

View File

@ -1,6 +1,5 @@
#![allow(non_snake_case)]
use std::cell::RefCell;
use std::ffi::{CStr, CString};
use std::ops::Deref;
use std::ptr;
@ -301,15 +300,11 @@ pub fn set_value_name(value: &Value, name: &[u8]) {
}
pub fn build_string(f: impl FnOnce(&RustString)) -> Result<String, FromUtf8Error> {
let sr = RustString { bytes: RefCell::new(Vec::new()) };
f(&sr);
String::from_utf8(sr.bytes.into_inner())
String::from_utf8(RustString::build_byte_buffer(f))
}
pub fn build_byte_buffer(f: impl FnOnce(&RustString)) -> Vec<u8> {
let sr = RustString { bytes: RefCell::new(Vec::new()) };
f(&sr);
sr.bytes.into_inner()
RustString::build_byte_buffer(f)
}
pub fn twine_to_string(tr: &Twine) -> String {

View File

@ -104,8 +104,9 @@ typedef struct OpaqueRustString *RustStringRef;
typedef struct LLVMOpaqueTwine *LLVMTwineRef;
typedef struct LLVMOpaqueSMDiagnostic *LLVMSMDiagnosticRef;
extern "C" void LLVMRustStringWriteImpl(RustStringRef Str, const char *Ptr,
size_t Size);
extern "C" void LLVMRustStringWriteImpl(RustStringRef buf,
const char *slice_ptr,
size_t slice_len);
class RawRustStringOstream : public llvm::raw_ostream {
RustStringRef Str;

View File

@ -1624,5 +1624,6 @@ extern "C" void LLVMRustComputeLTOCacheKey(RustStringRef KeyOut,
CfiFunctionDefs, CfiFunctionDecls);
#endif
LLVMRustStringWriteImpl(KeyOut, Key.c_str(), Key.size());
auto OS = RawRustStringOstream(KeyOut);
OS << Key.str();
}

View File

@ -1510,8 +1510,8 @@ LLVMRustUnpackSMDiagnostic(LLVMSMDiagnosticRef DRef, RustStringRef MessageOut,
const SourceMgr &LSM = *D.getSourceMgr();
const MemoryBuffer *LBuf =
LSM.getMemoryBuffer(LSM.FindBufferContainingLoc(D.getLoc()));
LLVMRustStringWriteImpl(BufferOut, LBuf->getBufferStart(),
LBuf->getBufferSize());
auto BufferOS = RawRustStringOstream(BufferOut);
BufferOS << LBuf->getBuffer();
*LocOut = D.getLoc().getPointer() - LBuf->getBufferStart();

View File

@ -2,42 +2,75 @@
#![allow(internal_features)]
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
#![doc(rust_logo)]
#![feature(extern_types)]
#![feature(rustdoc_internals)]
#![warn(unreachable_pub)]
// tidy-alphabetical-end
// NOTE: This crate only exists to allow linking on mingw targets.
use std::cell::RefCell;
use std::slice;
use std::{ptr, slice};
use libc::{c_char, size_t};
use libc::size_t;
#[repr(C)]
pub struct RustString {
pub bytes: RefCell<Vec<u8>>,
unsafe extern "C" {
/// Opaque type that allows C++ code to write bytes to a Rust-side buffer,
/// in conjunction with `RawRustStringOstream`. Use this as `&RustString`
/// (Rust) and `RustStringRef` (C++) in FFI signatures.
pub type RustString;
}
impl RustString {
pub fn len(&self) -> usize {
self.bytes.borrow().len()
}
pub fn is_empty(&self) -> bool {
self.bytes.borrow().is_empty()
pub fn build_byte_buffer(closure: impl FnOnce(&Self)) -> Vec<u8> {
let buf = RustStringInner::default();
closure(buf.as_opaque());
buf.into_inner()
}
}
/// Appending to a Rust string -- used by RawRustStringOstream.
/// Underlying implementation of [`RustString`].
///
/// Having two separate types makes it possible to use the opaque [`RustString`]
/// in FFI signatures without `improper_ctypes` warnings. This is a workaround
/// for the fact that there is no way to opt out of `improper_ctypes` when
/// _declaring_ a type (as opposed to using that type).
#[derive(Default)]
struct RustStringInner {
bytes: RefCell<Vec<u8>>,
}
impl RustStringInner {
fn as_opaque(&self) -> &RustString {
let ptr: *const RustStringInner = ptr::from_ref(self);
// We can't use `ptr::cast` here because extern types are `!Sized`.
let ptr = ptr as *const RustString;
unsafe { &*ptr }
}
fn from_opaque(opaque: &RustString) -> &Self {
// SAFETY: A valid `&RustString` must have been created via `as_opaque`.
let ptr: *const RustString = ptr::from_ref(opaque);
let ptr: *const RustStringInner = ptr.cast();
unsafe { &*ptr }
}
fn into_inner(self) -> Vec<u8> {
self.bytes.into_inner()
}
}
/// Appends the contents of a byte slice to a [`RustString`].
///
/// This function is implemented in `rustc_llvm` so that the C++ code in this
/// crate can link to it directly, without an implied link-time dependency on
/// `rustc_codegen_llvm`.
#[unsafe(no_mangle)]
pub unsafe extern "C" fn LLVMRustStringWriteImpl(
sr: &RustString,
ptr: *const c_char,
size: size_t,
buf: &RustString,
slice_ptr: *const u8, // Same ABI as `*const c_char`
slice_len: size_t,
) {
let slice = unsafe { slice::from_raw_parts(ptr as *const u8, size) };
sr.bytes.borrow_mut().extend_from_slice(slice);
let slice = unsafe { slice::from_raw_parts(slice_ptr, slice_len) };
RustStringInner::from_opaque(buf).bytes.borrow_mut().extend_from_slice(slice);
}
/// Initialize targets enabled by the build script via `cfg(llvm_component = "...")`.