mirror of
https://github.com/rust-lang/rust.git
synced 2025-04-14 04:56:49 +00:00
Ensure swap_nonoverlapping
is really always untyped
This commit is contained in:
parent
f06e5c1e35
commit
50d0ce1b42
@ -398,6 +398,7 @@ use crate::cmp::Ordering;
|
||||
use crate::intrinsics::const_eval_select;
|
||||
use crate::marker::FnPtr;
|
||||
use crate::mem::{self, MaybeUninit, SizedTypeProperties};
|
||||
use crate::num::NonZero;
|
||||
use crate::{fmt, hash, intrinsics, ub_checks};
|
||||
|
||||
mod alignment;
|
||||
@ -1094,51 +1095,25 @@ pub const unsafe fn swap_nonoverlapping<T>(x: *mut T, y: *mut T, count: usize) {
|
||||
// are pointers inside `T` we will copy them in one go rather than trying to copy a part
|
||||
// of a pointer (which would not work).
|
||||
// SAFETY: Same preconditions as this function
|
||||
unsafe { swap_nonoverlapping_simple_untyped(x, y, count) }
|
||||
unsafe { swap_nonoverlapping_const(x, y, count) }
|
||||
} else {
|
||||
macro_rules! attempt_swap_as_chunks {
|
||||
($ChunkTy:ty) => {
|
||||
if align_of::<T>() >= align_of::<$ChunkTy>()
|
||||
&& size_of::<T>() % size_of::<$ChunkTy>() == 0
|
||||
{
|
||||
let x: *mut $ChunkTy = x.cast();
|
||||
let y: *mut $ChunkTy = y.cast();
|
||||
let count = count * (size_of::<T>() / size_of::<$ChunkTy>());
|
||||
// SAFETY: these are the same bytes that the caller promised were
|
||||
// ok, just typed as `MaybeUninit<ChunkTy>`s instead of as `T`s.
|
||||
// The `if` condition above ensures that we're not violating
|
||||
// alignment requirements, and that the division is exact so
|
||||
// that we don't lose any bytes off the end.
|
||||
return unsafe { swap_nonoverlapping_simple_untyped(x, y, count) };
|
||||
}
|
||||
};
|
||||
// Going though a slice here helps codegen know the size fits in `isize`
|
||||
let slice = slice_from_raw_parts_mut(x, count);
|
||||
// SAFETY: This is all readable from the pointer, meaning it's one
|
||||
// allocated object, and thus cannot be more than isize::MAX bytes.
|
||||
let bytes = unsafe { mem::size_of_val_raw::<[T]>(slice) };
|
||||
if let Some(bytes) = NonZero::new(bytes) {
|
||||
// SAFETY: These are the same ranges, just expressed in a different
|
||||
// type, so they're still non-overlapping.
|
||||
unsafe { swap_nonoverlapping_bytes(x.cast(), y.cast(), bytes) };
|
||||
}
|
||||
|
||||
// Split up the slice into small power-of-two-sized chunks that LLVM is able
|
||||
// to vectorize (unless it's a special type with more-than-pointer alignment,
|
||||
// because we don't want to pessimize things like slices of SIMD vectors.)
|
||||
if align_of::<T>() <= size_of::<usize>()
|
||||
&& (!size_of::<T>().is_power_of_two()
|
||||
|| size_of::<T>() > size_of::<usize>() * 2)
|
||||
{
|
||||
attempt_swap_as_chunks!(usize);
|
||||
attempt_swap_as_chunks!(u8);
|
||||
}
|
||||
|
||||
// SAFETY: Same preconditions as this function
|
||||
unsafe { swap_nonoverlapping_simple_untyped(x, y, count) }
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/// Same behavior and safety conditions as [`swap_nonoverlapping`]
|
||||
///
|
||||
/// LLVM can vectorize this (at least it can for the power-of-two-sized types
|
||||
/// `swap_nonoverlapping` tries to use) so no need to manually SIMD it.
|
||||
#[inline]
|
||||
const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, count: usize) {
|
||||
let x = x.cast::<MaybeUninit<T>>();
|
||||
let y = y.cast::<MaybeUninit<T>>();
|
||||
const unsafe fn swap_nonoverlapping_const<T>(x: *mut T, y: *mut T, count: usize) {
|
||||
let mut i = 0;
|
||||
while i < count {
|
||||
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
|
||||
@ -1147,26 +1122,91 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
|
||||
// and it's distinct from `x` since the ranges are non-overlapping
|
||||
let y = unsafe { y.add(i) };
|
||||
|
||||
// If we end up here, it's because we're using a simple type -- like
|
||||
// a small power-of-two-sized thing -- or a special type with particularly
|
||||
// large alignment, particularly SIMD types.
|
||||
// Thus, we're fine just reading-and-writing it, as either it's small
|
||||
// and that works well anyway or it's special and the type's author
|
||||
// presumably wanted things to be done in the larger chunk.
|
||||
|
||||
// SAFETY: we're only ever given pointers that are valid to read/write,
|
||||
// including being aligned, and nothing here panics so it's drop-safe.
|
||||
unsafe {
|
||||
let a: MaybeUninit<T> = read(x);
|
||||
let b: MaybeUninit<T> = read(y);
|
||||
write(x, b);
|
||||
write(y, a);
|
||||
// Note that it's critical that these use `copy_nonoverlapping`,
|
||||
// rather than `read`/`write`, to avoid #134713 if T has padding.
|
||||
let mut temp = MaybeUninit::<T>::uninit();
|
||||
copy_nonoverlapping(x, temp.as_mut_ptr(), 1);
|
||||
copy_nonoverlapping(y, x, 1);
|
||||
copy_nonoverlapping(temp.as_ptr(), y, 1);
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Don't let MIR inline this, because we really want it to keep its noalias metadata
|
||||
#[rustc_no_mir_inline]
|
||||
#[inline]
|
||||
fn swap_chunk<const N: usize>(x: &mut MaybeUninit<[u8; N]>, y: &mut MaybeUninit<[u8; N]>) {
|
||||
let a = *x;
|
||||
let b = *y;
|
||||
*x = b;
|
||||
*y = a;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
unsafe fn swap_nonoverlapping_bytes(x: *mut u8, y: *mut u8, bytes: NonZero<usize>) {
|
||||
// Same as `swap_nonoverlapping::<[u8; N]>`.
|
||||
unsafe fn swap_nonoverlapping_chunks<const N: usize>(
|
||||
x: *mut MaybeUninit<[u8; N]>,
|
||||
y: *mut MaybeUninit<[u8; N]>,
|
||||
chunks: NonZero<usize>,
|
||||
) {
|
||||
let chunks = chunks.get();
|
||||
for i in 0..chunks {
|
||||
// SAFETY: i is in [0, chunks) so the adds and dereferences are in-bounds.
|
||||
unsafe { swap_chunk(&mut *x.add(i), &mut *y.add(i)) };
|
||||
}
|
||||
}
|
||||
|
||||
// Same as `swap_nonoverlapping_bytes`, but accepts at most 1+2+4=7 bytes
|
||||
#[inline]
|
||||
unsafe fn swap_nonoverlapping_short(x: *mut u8, y: *mut u8, bytes: NonZero<usize>) {
|
||||
// Tail handling for auto-vectorized code sometimes has element-at-a-time behaviour,
|
||||
// see <https://github.com/rust-lang/rust/issues/134946>.
|
||||
// By swapping as different sizes, rather than as a loop over bytes,
|
||||
// we make sure not to end up with, say, seven byte-at-a-time copies.
|
||||
|
||||
let bytes = bytes.get();
|
||||
let mut i = 0;
|
||||
macro_rules! swap_prefix {
|
||||
($($n:literal)+) => {$(
|
||||
if (bytes & $n) != 0 {
|
||||
// SAFETY: `i` can only have the same bits set as those in bytes,
|
||||
// so these `add`s are in-bounds of `bytes`. But the bit for
|
||||
// `$n` hasn't been set yet, so the `$n` bytes that `swap_chunk`
|
||||
// will read and write are within the usable range.
|
||||
unsafe { swap_chunk::<$n>(&mut*x.add(i).cast(), &mut*y.add(i).cast()) };
|
||||
i |= $n;
|
||||
}
|
||||
)+};
|
||||
}
|
||||
swap_prefix!(4 2 1);
|
||||
debug_assert_eq!(i, bytes);
|
||||
}
|
||||
|
||||
const CHUNK_SIZE: usize = size_of::<*const ()>();
|
||||
let bytes = bytes.get();
|
||||
|
||||
let chunks = bytes / CHUNK_SIZE;
|
||||
let tail = bytes % CHUNK_SIZE;
|
||||
if let Some(chunks) = NonZero::new(chunks) {
|
||||
// SAFETY: this is bytes/CHUNK_SIZE*CHUNK_SIZE bytes, which is <= bytes,
|
||||
// so it's within the range of our non-overlapping bytes.
|
||||
unsafe { swap_nonoverlapping_chunks::<CHUNK_SIZE>(x.cast(), y.cast(), chunks) };
|
||||
}
|
||||
if let Some(tail) = NonZero::new(tail) {
|
||||
const { assert!(CHUNK_SIZE <= 8) };
|
||||
let delta = chunks * CHUNK_SIZE;
|
||||
// SAFETY: the tail length is below CHUNK SIZE because of the remainder,
|
||||
// and CHUNK_SIZE is at most 8 by the const assert, so tail <= 7
|
||||
unsafe { swap_nonoverlapping_short(x.add(delta), y.add(delta), tail) };
|
||||
}
|
||||
}
|
||||
|
||||
/// Moves `src` into the pointed `dst`, returning the previous `dst` value.
|
||||
///
|
||||
/// Neither value is dropped.
|
||||
|
@ -984,3 +984,39 @@ fn test_ptr_metadata_in_const() {
|
||||
assert_eq!(SLICE_META, 3);
|
||||
assert_eq!(DYN_META.size_of(), 42);
|
||||
}
|
||||
|
||||
// See <https://github.com/rust-lang/rust/issues/134713>
|
||||
const fn ptr_swap_nonoverlapping_is_untyped_inner() {
|
||||
#[repr(C)]
|
||||
struct HasPadding(usize, u8);
|
||||
|
||||
let buf1: [usize; 2] = [1000, 2000];
|
||||
let buf2: [usize; 2] = [3000, 4000];
|
||||
|
||||
// HasPadding and [usize; 2] have the same size and alignment,
|
||||
// so swap_nonoverlapping should treat them the same
|
||||
assert!(size_of::<HasPadding>() == size_of::<[usize; 2]>());
|
||||
assert!(align_of::<HasPadding>() == align_of::<[usize; 2]>());
|
||||
|
||||
let mut b1 = buf1;
|
||||
let mut b2 = buf2;
|
||||
// Safety: b1 and b2 are distinct local variables,
|
||||
// with the same size and alignment as HasPadding.
|
||||
unsafe {
|
||||
std::ptr::swap_nonoverlapping(
|
||||
b1.as_mut_ptr().cast::<HasPadding>(),
|
||||
b2.as_mut_ptr().cast::<HasPadding>(),
|
||||
1,
|
||||
);
|
||||
}
|
||||
assert!(b1[0] == buf2[0]);
|
||||
assert!(b1[1] == buf2[1]);
|
||||
assert!(b2[0] == buf1[0]);
|
||||
assert!(b2[1] == buf1[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ptr_swap_nonoverlapping_is_untyped() {
|
||||
ptr_swap_nonoverlapping_is_untyped_inner();
|
||||
const { ptr_swap_nonoverlapping_is_untyped_inner() };
|
||||
}
|
||||
|
@ -51,3 +51,31 @@ pub fn swap_simd(x: &mut __m128, y: &mut __m128) {
|
||||
// CHECK-NEXT: retq
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: swap_string:
|
||||
#[no_mangle]
|
||||
pub fn swap_string(x: &mut String, y: &mut String) {
|
||||
// CHECK-NOT: mov
|
||||
// CHECK-COUNT-4: movups
|
||||
// CHECK-NOT: mov
|
||||
// CHECK-COUNT-4: movq
|
||||
// CHECK-NOT: mov
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: swap_44_bytes:
|
||||
#[no_mangle]
|
||||
pub fn swap_44_bytes(x: &mut [u8; 44], y: &mut [u8; 44]) {
|
||||
// Ensure we do better than a long run of byte copies,
|
||||
// see <https://github.com/rust-lang/rust/issues/134946>
|
||||
|
||||
// CHECK-NOT: movb
|
||||
// CHECK-COUNT-8: movups{{.+}}xmm
|
||||
// CHECK-NOT: movb
|
||||
// CHECK-COUNT-4: movq
|
||||
// CHECK-NOT: movb
|
||||
// CHECK-COUNT-4: movl
|
||||
// CHECK-NOT: movb
|
||||
// CHECK: retq
|
||||
swap(x, y)
|
||||
}
|
||||
|
@ -23,8 +23,8 @@ pub fn swap_single_m256(x: &mut __m256, y: &mut __m256) {
|
||||
#[no_mangle]
|
||||
pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <8 x float>{{.+}}align 32
|
||||
// CHECK: store <8 x float>{{.+}}align 32
|
||||
// CHECK-COUNT-2: load <4 x i64>{{.+}}align 32
|
||||
// CHECK-COUNT-2: store <4 x i64>{{.+}}align 32
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
@ -34,7 +34,7 @@ pub fn swap_m256_slice(x: &mut [__m256], y: &mut [__m256]) {
|
||||
#[no_mangle]
|
||||
pub fn swap_bytes32(x: &mut [u8; 32], y: &mut [u8; 32]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <32 x i8>{{.+}}align 1
|
||||
// CHECK: store <32 x i8>{{.+}}align 1
|
||||
// CHECK-COUNT-2: load <4 x i64>{{.+}}align 1
|
||||
// CHECK-COUNT-2: store <4 x i64>{{.+}}align 1
|
||||
swap(x, y)
|
||||
}
|
||||
|
@ -12,6 +12,16 @@ type KeccakBuffer = [[u64; 5]; 5];
|
||||
// to stack for large types, which is completely unnecessary as the lack of
|
||||
// overlap means we can just do whatever fits in registers at a time.
|
||||
|
||||
// The tests here (after the first one showing that the problem still exists)
|
||||
// are less about testing *exactly* what the codegen is, and more about testing
|
||||
// 1) That things are swapped directly from one argument to the other,
|
||||
// never going through stack along the way, and
|
||||
// 2) That we're doing the swapping for big things using large vector types,
|
||||
// rather then `i64` or `<8 x i8>` (or, even worse, `i8`) at a time.
|
||||
//
|
||||
// (There are separate tests for intrinsics::typed_swap_nonoverlapping that
|
||||
// check that it, as an intrinsic, are emitting exactly what it should.)
|
||||
|
||||
// CHECK-LABEL: @swap_basic
|
||||
#[no_mangle]
|
||||
pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
|
||||
@ -26,55 +36,55 @@ pub fn swap_basic(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
|
||||
}
|
||||
}
|
||||
|
||||
// This test verifies that the library does something smarter, and thus
|
||||
// doesn't need any scratch space on the stack.
|
||||
|
||||
// CHECK-LABEL: @swap_std
|
||||
#[no_mangle]
|
||||
pub fn swap_std(x: &mut KeccakBuffer, y: &mut KeccakBuffer) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
// CHECK: load <{{2|4}} x i64>
|
||||
// CHECK: store <{{2|4}} x i64>
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// Verify that types with usize alignment are swapped via vectored usizes,
|
||||
// not falling back to byte-level code.
|
||||
|
||||
// CHECK-LABEL: @swap_slice
|
||||
#[no_mangle]
|
||||
pub fn swap_slice(x: &mut [KeccakBuffer], y: &mut [KeccakBuffer]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
// CHECK: load <{{2|4}} x i64>
|
||||
// CHECK: store <{{2|4}} x i64>
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
}
|
||||
|
||||
// But for a large align-1 type, vectorized byte copying is what we want.
|
||||
|
||||
type OneKilobyteBuffer = [u8; 1024];
|
||||
|
||||
// CHECK-LABEL: @swap_1kb_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_1kb_slices(x: &mut [OneKilobyteBuffer], y: &mut [OneKilobyteBuffer]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i8>
|
||||
// CHECK: store <{{[0-9]+}} x i8>
|
||||
|
||||
// CHECK-NOT: load i32
|
||||
// CHECK-NOT: store i32
|
||||
// CHECK-NOT: load i16
|
||||
// CHECK-NOT: store i16
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: store i8
|
||||
|
||||
// CHECK: load <{{2|4}} x i64>{{.+}}align 1,
|
||||
// CHECK: store <{{2|4}} x i64>{{.+}}align 1,
|
||||
|
||||
// CHECK-NOT: load i32
|
||||
// CHECK-NOT: store i32
|
||||
// CHECK-NOT: load i16
|
||||
// CHECK-NOT: store i16
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: store i8
|
||||
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
}
|
||||
|
||||
// This verifies that the 2×read + 2×write optimizes to just 3 memcpys
|
||||
// for an unusual type like this. It's not clear whether we should do anything
|
||||
// smarter in Rust for these, so for now it's fine to leave these up to the backend.
|
||||
// That's not as bad as it might seem, as for example, LLVM will lower the
|
||||
// memcpys below to VMOVAPS on YMMs if one enables the AVX target feature.
|
||||
// Eventually we'll be able to pass `align_of::<T>` to a const generic and
|
||||
// thus pick a smarter chunk size ourselves without huge code duplication.
|
||||
|
||||
#[repr(align(64))]
|
||||
pub struct BigButHighlyAligned([u8; 64 * 3]);
|
||||
|
||||
@ -82,9 +92,25 @@ pub struct BigButHighlyAligned([u8; 64 * 3]);
|
||||
#[no_mangle]
|
||||
pub fn swap_big_aligned(x: &mut BigButHighlyAligned, y: &mut BigButHighlyAligned) {
|
||||
// CHECK-NOT: call void @llvm.memcpy
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192)
|
||||
// CHECK: call void @llvm.memcpy.{{.+}}(ptr noundef nonnull align 64 dereferenceable(192)
|
||||
// CHECK-NOT: load i32
|
||||
// CHECK-NOT: store i32
|
||||
// CHECK-NOT: load i16
|
||||
// CHECK-NOT: store i16
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: store i8
|
||||
|
||||
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 64,
|
||||
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 64,
|
||||
|
||||
// CHECK-COUNT-2: load <{{2|4}} x i64>{{.+}}align 32,
|
||||
// CHECK-COUNT-2: store <{{2|4}} x i64>{{.+}}align 32,
|
||||
|
||||
// CHECK-NOT: load i32
|
||||
// CHECK-NOT: store i32
|
||||
// CHECK-NOT: load i16
|
||||
// CHECK-NOT: store i16
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: store i8
|
||||
// CHECK-NOT: call void @llvm.memcpy
|
||||
swap(x, y)
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
//@ compile-flags: -Copt-level=3 -Z merge-functions=disabled
|
||||
//@ only-x86_64
|
||||
//@ min-llvm-version: 20
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
||||
@ -27,13 +28,19 @@ pub fn swap_rgb48_manually(x: &mut RGB48, y: &mut RGB48) {
|
||||
pub fn swap_rgb48(x: &mut RGB48, y: &mut RGB48) {
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// Whether `i8` is the best for this is unclear, but
|
||||
// might as well record what's actually happening right now.
|
||||
// Swapping `i48` might be cleaner in LLVM-IR here, but `i32`+`i16` isn't bad,
|
||||
// and is closer to the assembly it generates anyway.
|
||||
|
||||
// CHECK: load i8
|
||||
// CHECK: load i8
|
||||
// CHECK: store i8
|
||||
// CHECK: store i8
|
||||
// CHECK-NOT: load{{ }}
|
||||
// CHECK: load i32{{.+}}align 2
|
||||
// CHECK-NEXT: load i32{{.+}}align 2
|
||||
// CHECK-NEXT: store i32{{.+}}align 2
|
||||
// CHECK-NEXT: store i32{{.+}}align 2
|
||||
// CHECK: load i16{{.+}}align 2
|
||||
// CHECK-NEXT: load i16{{.+}}align 2
|
||||
// CHECK-NEXT: store i16{{.+}}align 2
|
||||
// CHECK-NEXT: store i16{{.+}}align 2
|
||||
// CHECK-NOT: store{{ }}
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
@ -76,30 +83,49 @@ pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// LLVM doesn't vectorize a loop over 3-byte elements,
|
||||
// so we chunk it down to bytes and loop over those instead.
|
||||
type RGB24 = [u8; 3];
|
||||
|
||||
// CHECK-LABEL: @swap_rgb24_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_rgb24_slices(x: &mut [RGB24], y: &mut [RGB24]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i8>
|
||||
// CHECK: store <{{[0-9]+}} x i8>
|
||||
|
||||
// CHECK: mul nuw nsw i64 %{{x|y}}.1, 3
|
||||
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
|
||||
// CHECK-COUNT-2: load i32
|
||||
// CHECK-COUNT-2: store i32
|
||||
// CHECK-COUNT-2: load i16
|
||||
// CHECK-COUNT-2: store i16
|
||||
// CHECK-COUNT-2: load i8
|
||||
// CHECK-COUNT-2: store i8
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
}
|
||||
|
||||
// This one has a power-of-two size, so we iterate over it directly
|
||||
type RGBA32 = [u8; 4];
|
||||
|
||||
// CHECK-LABEL: @swap_rgba32_slices
|
||||
#[no_mangle]
|
||||
pub fn swap_rgba32_slices(x: &mut [RGBA32], y: &mut [RGBA32]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i32>
|
||||
// CHECK: store <{{[0-9]+}} x i32>
|
||||
|
||||
// Because the size in bytes in a multiple of 4, we can skip the smallest sizes.
|
||||
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
|
||||
// CHECK-COUNT-2: load i32
|
||||
// CHECK-COUNT-2: store i32
|
||||
|
||||
// CHECK-NOT: load i16
|
||||
// CHECK-NOT: store i16
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: store i8
|
||||
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
@ -113,8 +139,8 @@ const _: () = assert!(!std::mem::size_of::<String>().is_power_of_two());
|
||||
#[no_mangle]
|
||||
pub fn swap_string_slices(x: &mut [String], y: &mut [String]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load <{{[0-9]+}} x i64>
|
||||
// CHECK: store <{{[0-9]+}} x i64>
|
||||
// CHECK: load <{{[0-9]+}} x i64>{{.+}}, align 8,
|
||||
// CHECK: store <{{[0-9]+}} x i64>{{.+}}, align 8,
|
||||
if x.len() == y.len() {
|
||||
x.swap_with_slice(y);
|
||||
}
|
||||
@ -130,6 +156,26 @@ pub struct Packed {
|
||||
#[no_mangle]
|
||||
pub fn swap_packed_structs(x: &mut Packed, y: &mut Packed) {
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// CHECK-NOT: load
|
||||
// CHECK-NOT: store
|
||||
|
||||
// CHECK: %[[A:.+]] = load i64, ptr %x, align 1,
|
||||
// CHECK-NEXT: %[[B:.+]] = load i64, ptr %y, align 1,
|
||||
// CHECK-NEXT: store i64 %[[B]], ptr %x, align 1,
|
||||
// CHECK-NEXT: store i64 %[[A]], ptr %y, align 1,
|
||||
|
||||
// CHECK-NOT: load
|
||||
// CHECK-NOT: store
|
||||
|
||||
// CHECK: %[[C:.+]] = load i8, ptr %[[X8:.+]], align 1,
|
||||
// CHECK-NEXT: %[[D:.+]] = load i8, ptr %[[Y8:.+]], align 1,
|
||||
// CHECK-NEXT: store i8 %[[D]], ptr %[[X8]], align 1,
|
||||
// CHECK-NEXT: store i8 %[[C]], ptr %[[Y8]], align 1,
|
||||
|
||||
// CHECK-NOT: load
|
||||
// CHECK-NOT: store
|
||||
|
||||
// CHECK: ret void
|
||||
swap(x, y)
|
||||
}
|
||||
|
@ -12,10 +12,10 @@ note: inside `swap_nonoverlapping::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `swap_nonoverlapping::compiletime::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `std::ptr::swap_nonoverlapping_simple_untyped::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `std::ptr::read::<MaybeUninit<MaybeUninit<u8>>>`
|
||||
note: inside `std::ptr::swap_nonoverlapping_const::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `copy_nonoverlapping::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/intrinsics/mod.rs:LL:COL
|
||||
= help: this code performed an operation that depends on the underlying bytes representing a pointer
|
||||
= help: the absolute address of a pointer is not known at compile-time, so such operations are not supported
|
||||
= note: this error originates in the macro `$crate::intrinsics::const_eval_select` which comes from the expansion of the macro `const_eval_select` (in Nightly builds, run with -Z macro-backtrace for more info)
|
||||
|
Loading…
Reference in New Issue
Block a user