From 86158f581d20948507bef65e6162e3bbf5f4fa91 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Mon, 20 May 2024 01:09:29 -0400 Subject: [PATCH 1/3] Make repr(packed) vectors work with SIMD intrinsics --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 49 +++++++++++++++++++- tests/ui/simd/repr_packed.rs | 18 ++----- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index c0a1208a8c7..83a71752ffd 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -480,8 +480,55 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> { } _ if name.as_str().starts_with("simd_") => { + // Unpack non-power-of-2 #[repr(packed)] + let mut loaded_args = Vec::new(); + for (ty, arg) in arg_tys.iter().zip(args) { + loaded_args.push( + if ty.is_simd() + && let OperandValue::Ref(place) = arg.val + { + let (size, elem_ty) = ty.simd_size_and_type(self.tcx()); + let elem_ll_ty = match elem_ty.kind() { + ty::Float(f) => self.type_float_from_ty(*f), + ty::Int(i) => self.type_int_from_ty(*i), + ty::Uint(u) => self.type_uint_from_ty(*u), + ty::RawPtr(_, _) => self.type_ptr(), + _ => unreachable!(), + }; + let loaded = + self.load_from_place(self.type_vector(elem_ll_ty, size), place); + OperandRef::from_immediate_or_packed_pair(self, loaded, arg.layout) + } else { + *arg + }, + ); + } + + let llret_ty = if ret_ty.is_simd() + && let abi::Abi::Aggregate { .. } = self.layout_of(ret_ty).layout.abi + { + let (size, elem_ty) = ret_ty.simd_size_and_type(self.tcx()); + let elem_ll_ty = match elem_ty.kind() { + ty::Float(f) => self.type_float_from_ty(*f), + ty::Int(i) => self.type_int_from_ty(*i), + ty::Uint(u) => self.type_uint_from_ty(*u), + ty::RawPtr(_, _) => self.type_ptr(), + _ => unreachable!(), + }; + self.type_vector(elem_ll_ty, size) + } else { + llret_ty + }; + match generic_simd_intrinsic( - self, name, callee_ty, fn_args, args, ret_ty, llret_ty, span, + self, + name, + callee_ty, + fn_args, + &loaded_args, + ret_ty, + llret_ty, + span, ) { Ok(llval) => llval, Err(()) => return Ok(()), diff --git a/tests/ui/simd/repr_packed.rs b/tests/ui/simd/repr_packed.rs index 411bba3454e..52c794563de 100644 --- a/tests/ui/simd/repr_packed.rs +++ b/tests/ui/simd/repr_packed.rs @@ -6,9 +6,6 @@ #[repr(simd, packed)] struct Simd([T; N]); -#[repr(simd)] -struct FullSimd([T; N]); - fn check_size_align() { use std::mem; assert_eq!(mem::size_of::>(), mem::size_of::<[T; N]>()); @@ -44,16 +41,9 @@ fn main() { simd_add(Simd::([0., 1., 2., 3.]), Simd::([2., 2., 2., 2.])); assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]); - // non-powers-of-two have padding and need to be expanded to full vectors - fn load(v: Simd) -> FullSimd { - unsafe { - let mut tmp = core::mem::MaybeUninit::>::uninit(); - std::ptr::copy_nonoverlapping(&v as *const _, tmp.as_mut_ptr().cast(), 1); - tmp.assume_init() - } - } - let x: FullSimd = - simd_add(load(Simd::([0., 1., 2.])), load(Simd::([2., 2., 2.]))); - assert_eq!(x.0, [2., 3., 4.]); + // non-powers-of-two have padding and lesser alignment, but the intrinsic handles it + let x: Simd = simd_add(Simd::([0., 1., 2.]), Simd::([2., 2., 2.])); + let arr: [f64; 3] = x.0; + assert_eq!(arr, [2., 3., 4.]); } } From 9bdc5b2455bbd8d71e912b5ceaeb390abb987c91 Mon Sep 17 00:00:00 2001 From: Caleb Zulawski Date: Sat, 1 Jun 2024 14:17:16 -0400 Subject: [PATCH 2/3] Improve documentation --- compiler/rustc_codegen_llvm/src/intrinsic.rs | 7 ++++++- tests/ui/simd/repr_packed.rs | 5 +++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 83a71752ffd..87098566f6b 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -480,10 +480,15 @@ impl<'ll, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'_, 'll, 'tcx> { } _ if name.as_str().starts_with("simd_") => { - // Unpack non-power-of-2 #[repr(packed)] + // Unpack non-power-of-2 #[repr(packed, simd)] arguments. + // This gives them the expected layout of a regular #[repr(simd)] vector. let mut loaded_args = Vec::new(); for (ty, arg) in arg_tys.iter().zip(args) { loaded_args.push( + // #[repr(packed, simd)] vectors are passed like arrays (as references, + // with reduced alignment and no padding) rather than as immediates. + // We can use a vector load to fix the layout and turn the argument + // into an immediate. if ty.is_simd() && let OperandValue::Ref(place) = arg.val { diff --git a/tests/ui/simd/repr_packed.rs b/tests/ui/simd/repr_packed.rs index 52c794563de..1ba15bda98d 100644 --- a/tests/ui/simd/repr_packed.rs +++ b/tests/ui/simd/repr_packed.rs @@ -36,12 +36,13 @@ fn main() { check_ty::(); unsafe { - // powers-of-two have no padding and work as usual + // powers-of-two have no padding and have the same layout as #[repr(simd)] let x: Simd = simd_add(Simd::([0., 1., 2., 3.]), Simd::([2., 2., 2., 2.])); assert_eq!(std::mem::transmute::<_, [f64; 4]>(x), [2., 3., 4., 5.]); - // non-powers-of-two have padding and lesser alignment, but the intrinsic handles it + // non-powers-of-two should have padding (which is removed by #[repr(packed)]), + // but the intrinsic handles it let x: Simd = simd_add(Simd::([0., 1., 2.]), Simd::([2., 2., 2.])); let arr: [f64; 3] = x.0; assert_eq!(arr, [2., 3., 4.]); From 5c32f84048f12b4cdbb404c0c543d38dec622d26 Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Sun, 2 Jun 2024 01:03:51 -0700 Subject: [PATCH 3/3] Test codegen for repr(packed,simd) --- tests/codegen/simd/packed-simd-alignment.rs | 44 +++++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 tests/codegen/simd/packed-simd-alignment.rs diff --git a/tests/codegen/simd/packed-simd-alignment.rs b/tests/codegen/simd/packed-simd-alignment.rs new file mode 100644 index 00000000000..53e88d8e5cf --- /dev/null +++ b/tests/codegen/simd/packed-simd-alignment.rs @@ -0,0 +1,44 @@ +//@ compile-flags: -Cno-prepopulate-passes + +#![crate_type = "lib"] +#![feature(repr_simd, core_intrinsics)] +// make sure that codegen emits correctly-aligned loads and stores for repr(packed, simd) types +// the alignment of a load should be no less than T, and no more than the size of the vector type +use std::intrinsics::simd as intrinsics; + +#[derive(Copy, Clone)] +#[repr(packed, simd)] +struct f32x3([f32; 3]); + +#[derive(Copy, Clone)] +#[repr(packed, simd)] +struct f32x4([f32; 4]); + +// CHECK-LABEL: load_f32x3 +#[no_mangle] +pub fn load_f32x3(floats: &f32x3) -> f32x3 { + // FIXME: Is a memcpy really the best we can do? + // CHECK: @llvm.memcpy.{{.*}}ptr align 4 {{.*}}ptr align 4 + *floats +} + +// CHECK-LABEL: load_f32x4 +#[no_mangle] +pub fn load_f32x4(floats: &f32x4) -> f32x4 { + // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} + *floats +} + +// CHECK-LABEL: add_f32x3 +#[no_mangle] +pub fn add_f32x3(x: f32x3, y: f32x3) -> f32x3 { + // CHECK: load <3 x float>, ptr %{{[a-z0-9_]*}}, align 4 + unsafe { intrinsics::simd_add(x, y) } +} + +// CHECK-LABEL: add_f32x4 +#[no_mangle] +pub fn add_f32x4(x: f32x4, y: f32x4) -> f32x4 { + // CHECK: load <4 x float>, ptr %{{[a-z0-9_]*}}, align {{4|8|16}} + unsafe { intrinsics::simd_add(x, y) } +}