copy byval argument to alloca if alignment is insufficient

This commit is contained in:
Erik Desjardins 2024-03-08 17:11:27 -05:00
parent e919669d42
commit 207fe38630
3 changed files with 220 additions and 72 deletions

View File

@ -203,57 +203,63 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
val: &'ll Value,
dst: PlaceRef<'tcx, &'ll Value>,
) {
if self.is_ignore() {
return;
}
if self.is_sized_indirect() {
OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
} else if self.is_unsized_indirect() {
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
} else if let PassMode::Cast { cast, pad_i32: _ } = &self.mode {
// FIXME(eddyb): Figure out when the simpler Store is safe, clang
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
bx.store(val, dst.llval, self.layout.align.abi);
} else {
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.
// We instead thus allocate some scratch space...
let scratch_size = cast.size(bx);
let scratch_align = cast.align(bx);
let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size);
// ... where we first store the value...
bx.store(val, llscratch, scratch_align);
// ... and then memcpy it to the intended destination.
bx.memcpy(
dst.llval,
self.layout.align.abi,
llscratch,
scratch_align,
bx.const_usize(self.layout.size.bytes()),
MemFlags::empty(),
);
bx.lifetime_end(llscratch, scratch_size);
match &self.mode {
PassMode::Ignore => {}
// Sized indirect arguments
PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
let align = attrs.pointee_align.unwrap_or(self.layout.align.abi);
OperandValue::Ref(val, None, align).store(bx, dst);
}
// Unsized indirect qrguments
PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
}
PassMode::Cast { cast, pad_i32: _ } => {
// FIXME(eddyb): Figure out when the simpler Store is safe, clang
// uses it for i16 -> {i8, i8}, but not for i24 -> {i8, i8, i8}.
let can_store_through_cast_ptr = false;
if can_store_through_cast_ptr {
bx.store(val, dst.llval, self.layout.align.abi);
} else {
// The actual return type is a struct, but the ABI
// adaptation code has cast it into some scalar type. The
// code that follows is the only reliable way I have
// found to do a transform like i64 -> {i32,i32}.
// Basically we dump the data onto the stack then memcpy it.
//
// Other approaches I tried:
// - Casting rust ret pointer to the foreign type and using Store
// is (a) unsafe if size of foreign type > size of rust type and
// (b) runs afoul of strict aliasing rules, yielding invalid
// assembly under -O (specifically, the store gets removed).
// - Truncating foreign type to correct integral type and then
// bitcasting to the struct type yields invalid cast errors.
// We instead thus allocate some scratch space...
let scratch_size = cast.size(bx);
let scratch_align = cast.align(bx);
let llscratch = bx.alloca(cast.llvm_type(bx), scratch_align);
bx.lifetime_start(llscratch, scratch_size);
// ... where we first store the value...
bx.store(val, llscratch, scratch_align);
// ... and then memcpy it to the intended destination.
bx.memcpy(
dst.llval,
self.layout.align.abi,
llscratch,
scratch_align,
bx.const_usize(self.layout.size.bytes()),
MemFlags::empty(),
);
bx.lifetime_end(llscratch, scratch_size);
}
}
_ => {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
}
} else {
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
}
}

View File

@ -367,29 +367,45 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
}
}
if arg.is_sized_indirect() {
// Don't copy an indirect argument to an alloca, the caller
// already put it in a temporary alloca and gave it up.
// FIXME: lifetimes
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
} else if arg.is_unsized_indirect() {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);
match arg.mode {
// Sized indirect arguments
PassMode::Indirect { attrs, meta_attrs: None, on_stack: _ } => {
// Don't copy an indirect argument to an alloca, the caller already put it
// in a temporary alloca and gave it up.
// FIXME: lifetimes
if let Some(pointee_align) = attrs.pointee_align
&& pointee_align < arg.layout.align.abi
{
// ...unless the argument is underaligned, then we need to copy it to
// a higher-aligned alloca.
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
} else {
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
LocalRef::Place(PlaceRef::new_sized(llarg, arg.layout))
}
}
// Unsized indirect qrguments
PassMode::Indirect { attrs: _, meta_attrs: Some(_), on_stack: _ } => {
// As the storage for the indirect argument lives during
// the whole function call, we just copy the fat pointer.
let llarg = bx.get_param(llarg_idx);
llarg_idx += 1;
let llextra = bx.get_param(llarg_idx);
llarg_idx += 1;
let indirect_operand = OperandValue::Pair(llarg, llextra);
let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp)
} else {
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
let tmp = PlaceRef::alloca_unsized_indirect(bx, arg.layout);
indirect_operand.store(bx, tmp);
LocalRef::UnsizedPlace(tmp)
}
_ => {
let tmp = PlaceRef::alloca(bx, arg.layout);
bx.store_fn_arg(arg, &mut llarg_idx, tmp);
LocalRef::Place(tmp)
}
}
})
.collect::<Vec<_>>();

View File

@ -0,0 +1,126 @@
// ignore-tidy-linelength
//@ revisions:i686-linux x86_64-linux
//@[i686-linux] compile-flags: --target i686-unknown-linux-gnu
//@[i686-linux] needs-llvm-components: x86
//@[x86_64-linux] compile-flags: --target x86_64-unknown-linux-gnu
//@[x86_64-linux] needs-llvm-components: x86
// Tests that we correctly copy arguments into allocas when the alignment of the byval argument
// is different from the alignment of the Rust type.
// For the following test cases:
// All of the `*_decreases_alignment` functions should codegen to a direct call, since the
// alignment is already sufficient.
// All off the `*_increases_alignment` functions should copy the argument to an alloca
// on i686-unknown-linux-gnu, since the alignment needs to be increased, and should codegen
// to a direct call on x86_64-unknown-linux-gnu, where byval alignment matches Rust alignment.
#![feature(no_core, lang_items)]
#![crate_type = "lib"]
#![no_std]
#![no_core]
#![allow(non_camel_case_types)]
#[lang = "sized"]
trait Sized {}
#[lang = "freeze"]
trait Freeze {}
#[lang = "copy"]
trait Copy {}
// This type has align 1 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(packed)]
struct Align1 {
x: u128,
y: u128,
z: u128,
}
// This type has align 16 in Rust, but as a byval argument on i686-linux, it will have align 4.
#[repr(C)]
#[repr(align(16))]
struct Align16 {
x: u128,
y: u128,
z: u128,
}
extern "C" {
fn extern_c_align1(x: Align1);
fn extern_c_align16(x: Align16);
}
// CHECK-LABEL: @rust_to_c_increases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_increases_alignment(x: Align1) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align1, align 4
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 4 {{.*}}[[ALLOCA]], ptr {{.*}}align 1 {{.*}}%x
// i686-linux-NEXT: call void @extern_c_align1({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_c_align1
extern_c_align1(x);
}
// CHECK-LABEL: @rust_to_c_decreases_alignment
#[no_mangle]
pub unsafe fn rust_to_c_decreases_alignment(x: Align16) {
// CHECK: start:
// CHECK-NEXT: call void @extern_c_align16
extern_c_align16(x);
}
extern "Rust" {
fn extern_rust_align1(x: Align1);
fn extern_rust_align16(x: Align16);
}
// CHECK-LABEL: @c_to_rust_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_align1
extern_rust_align1(x);
}
// CHECK-LABEL: @c_to_rust_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_align16({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_align16
extern_rust_align16(x);
}
extern "Rust" {
fn extern_rust_ref_align1(x: &Align1);
fn extern_rust_ref_align16(x: &Align16);
}
// CHECK-LABEL: @c_to_rust_ref_decreases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_decreases_alignment(x: Align1) {
// CHECK: start:
// CHECK-NEXT: call void @extern_rust_ref_align1
extern_rust_ref_align1(&x);
}
// CHECK-LABEL: @c_to_rust_ref_increases_alignment
#[no_mangle]
pub unsafe extern "C" fn c_to_rust_ref_increases_alignment(x: Align16) {
// i686-linux: start:
// i686-linux-NEXT: [[ALLOCA:%[0-9a-z]+]] = alloca %Align16, align 16
// i686-linux-NEXT: call void @llvm.memcpy.{{.+}}(ptr {{.*}}align 16 {{.*}}[[ALLOCA]], ptr {{.*}}align 4 {{.*}}%0
// i686-linux-NEXT: call void @extern_rust_ref_align16({{.+}} [[ALLOCA]])
// x86_64-linux: start:
// x86_64-linux-NEXT: call void @extern_rust_ref_align16
extern_rust_ref_align16(&x);
}