mirror of
https://github.com/rust-lang/rust.git
synced 2024-10-30 14:01:51 +00:00
Auto merge of #122582 - scottmcm:swap-intrinsic-v2, r=oli-obk
Let codegen decide when to `mem::swap` with immediates Making `libcore` decide this is silly; the backend has so much better information about when it's a good idea. Thus this PR introduces a new `typed_swap` intrinsic with a fallback body, and replaces that fallback implementation when swapping immediates or scalar pairs. r? oli-obk Replaces #111744, and means we'll never need more libs PRs like #111803 or #107140
This commit is contained in:
commit
d6eb0f5a09
@ -9,6 +9,7 @@ use crate::traits::*;
|
||||
use crate::MemFlags;
|
||||
|
||||
use rustc_middle::ty::{self, Ty, TyCtxt};
|
||||
use rustc_session::config::OptLevel;
|
||||
use rustc_span::{sym, Span};
|
||||
use rustc_target::abi::{
|
||||
call::{FnAbi, PassMode},
|
||||
@ -75,6 +76,29 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
|
||||
let name = bx.tcx().item_name(def_id);
|
||||
let name_str = name.as_str();
|
||||
|
||||
// If we're swapping something that's *not* an `OperandValue::Ref`,
|
||||
// then we can do it directly and avoid the alloca.
|
||||
// Otherwise, we'll let the fallback MIR body take care of it.
|
||||
if let sym::typed_swap = name {
|
||||
let pointee_ty = fn_args.type_at(0);
|
||||
let pointee_layout = bx.layout_of(pointee_ty);
|
||||
if !bx.is_backend_ref(pointee_layout)
|
||||
// But if we're not going to optimize, trying to use the fallback
|
||||
// body just makes things worse, so don't bother.
|
||||
|| bx.sess().opts.optimize == OptLevel::No
|
||||
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
|
||||
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
|
||||
// block optimization in `ptr::swap_nonoverlapping` is hard to rewrite back
|
||||
// into the (unoptimized) direct swapping implementation, so we disable it.
|
||||
|| bx.sess().target.arch == "spirv"
|
||||
{
|
||||
let x_place = PlaceRef::new_sized(args[0].immediate(), pointee_layout);
|
||||
let y_place = PlaceRef::new_sized(args[1].immediate(), pointee_layout);
|
||||
bx.typed_place_swap(x_place, y_place);
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
|
||||
let llret_ty = bx.backend_type(bx.layout_of(ret_ty));
|
||||
let result = PlaceRef::new_sized(llresult, fn_abi.ret.layout);
|
||||
|
||||
|
@ -1,22 +1,24 @@
|
||||
use super::abi::AbiBuilderMethods;
|
||||
use super::asm::AsmBuilderMethods;
|
||||
use super::consts::ConstMethods;
|
||||
use super::coverageinfo::CoverageInfoBuilderMethods;
|
||||
use super::debuginfo::DebugInfoBuilderMethods;
|
||||
use super::intrinsic::IntrinsicCallMethods;
|
||||
use super::misc::MiscMethods;
|
||||
use super::type_::{ArgAbiMethods, BaseTypeMethods};
|
||||
use super::type_::{ArgAbiMethods, BaseTypeMethods, LayoutTypeMethods};
|
||||
use super::{HasCodegen, StaticBuilderMethods};
|
||||
|
||||
use crate::common::{
|
||||
AtomicOrdering, AtomicRmwBinOp, IntPredicate, RealPredicate, SynchronizationScope, TypeKind,
|
||||
};
|
||||
use crate::mir::operand::OperandRef;
|
||||
use crate::mir::operand::{OperandRef, OperandValue};
|
||||
use crate::mir::place::PlaceRef;
|
||||
use crate::MemFlags;
|
||||
|
||||
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrs;
|
||||
use rustc_middle::ty::layout::{HasParamEnv, TyAndLayout};
|
||||
use rustc_middle::ty::Ty;
|
||||
use rustc_session::config::OptLevel;
|
||||
use rustc_span::Span;
|
||||
use rustc_target::abi::call::FnAbi;
|
||||
use rustc_target::abi::{Abi, Align, Scalar, Size, WrappingRange};
|
||||
@ -267,6 +269,54 @@ pub trait BuilderMethods<'a, 'tcx>:
|
||||
flags: MemFlags,
|
||||
);
|
||||
|
||||
/// *Typed* copy for non-overlapping places.
|
||||
///
|
||||
/// Has a default implementation in terms of `memcpy`, but specific backends
|
||||
/// can override to do something smarter if possible.
|
||||
///
|
||||
/// (For example, typed load-stores with alias metadata.)
|
||||
fn typed_place_copy(
|
||||
&mut self,
|
||||
dst: PlaceRef<'tcx, Self::Value>,
|
||||
src: PlaceRef<'tcx, Self::Value>,
|
||||
) {
|
||||
debug_assert!(src.llextra.is_none());
|
||||
debug_assert!(dst.llextra.is_none());
|
||||
debug_assert_eq!(dst.layout.size, src.layout.size);
|
||||
if self.sess().opts.optimize == OptLevel::No && self.is_backend_immediate(dst.layout) {
|
||||
// If we're not optimizing, the aliasing information from `memcpy`
|
||||
// isn't useful, so just load-store the value for smaller code.
|
||||
let temp = self.load_operand(src);
|
||||
temp.val.store(self, dst);
|
||||
} else if !dst.layout.is_zst() {
|
||||
let bytes = self.const_usize(dst.layout.size.bytes());
|
||||
self.memcpy(dst.llval, dst.align, src.llval, src.align, bytes, MemFlags::empty());
|
||||
}
|
||||
}
|
||||
|
||||
/// *Typed* swap for non-overlapping places.
|
||||
///
|
||||
/// Avoids `alloca`s for Immediates and ScalarPairs.
|
||||
///
|
||||
/// FIXME: Maybe do something smarter for Ref types too?
|
||||
/// For now, the `typed_swap` intrinsic just doesn't call this for those
|
||||
/// cases (in non-debug), preferring the fallback body instead.
|
||||
fn typed_place_swap(
|
||||
&mut self,
|
||||
left: PlaceRef<'tcx, Self::Value>,
|
||||
right: PlaceRef<'tcx, Self::Value>,
|
||||
) {
|
||||
let mut temp = self.load_operand(left);
|
||||
if let OperandValue::Ref(..) = temp.val {
|
||||
// The SSA value isn't stand-alone, so we need to copy it elsewhere
|
||||
let alloca = PlaceRef::alloca(self, left.layout);
|
||||
self.typed_place_copy(alloca, left);
|
||||
temp = self.load_operand(alloca);
|
||||
}
|
||||
self.typed_place_copy(left, right);
|
||||
temp.val.store(self, right);
|
||||
}
|
||||
|
||||
fn select(
|
||||
&mut self,
|
||||
cond: Self::Value,
|
||||
|
@ -120,6 +120,20 @@ pub trait LayoutTypeMethods<'tcx>: Backend<'tcx> {
|
||||
immediate: bool,
|
||||
) -> Self::Type;
|
||||
|
||||
/// A type that produces an [`OperandValue::Ref`] when loaded.
|
||||
///
|
||||
/// AKA one that's not a ZST, not `is_backend_immediate`, and
|
||||
/// not `is_backend_scalar_pair`. For such a type, a
|
||||
/// [`load_operand`] doesn't actually `load` anything.
|
||||
///
|
||||
/// [`OperandValue::Ref`]: crate::mir::operand::OperandValue::Ref
|
||||
/// [`load_operand`]: super::BuilderMethods::load_operand
|
||||
fn is_backend_ref(&self, layout: TyAndLayout<'tcx>) -> bool {
|
||||
!(layout.is_zst()
|
||||
|| self.is_backend_immediate(layout)
|
||||
|| self.is_backend_scalar_pair(layout))
|
||||
}
|
||||
|
||||
/// A type that can be used in a [`super::BuilderMethods::load`] +
|
||||
/// [`super::BuilderMethods::store`] pair to implement a *typed* copy,
|
||||
/// such as a MIR `*_0 = *_1`.
|
||||
|
@ -21,8 +21,8 @@ use rustc_span::symbol::{sym, Symbol};
|
||||
use rustc_target::abi::Size;
|
||||
|
||||
use super::{
|
||||
util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx, MPlaceTy, Machine, OpTy,
|
||||
Pointer,
|
||||
memory::MemoryKind, util::ensure_monomorphic_enough, CheckInAllocMsg, ImmTy, InterpCx,
|
||||
MPlaceTy, Machine, OpTy, Pointer,
|
||||
};
|
||||
|
||||
use crate::fluent_generated as fluent;
|
||||
@ -414,6 +414,9 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
|
||||
let result = self.raw_eq_intrinsic(&args[0], &args[1])?;
|
||||
self.write_scalar(result, dest)?;
|
||||
}
|
||||
sym::typed_swap => {
|
||||
self.typed_swap_intrinsic(&args[0], &args[1])?;
|
||||
}
|
||||
|
||||
sym::vtable_size => {
|
||||
let ptr = self.read_pointer(&args[0])?;
|
||||
@ -607,6 +610,24 @@ impl<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> InterpCx<'mir, 'tcx, M> {
|
||||
self.mem_copy(src, dst, size, nonoverlapping)
|
||||
}
|
||||
|
||||
/// Does a *typed* swap of `*left` and `*right`.
|
||||
fn typed_swap_intrinsic(
|
||||
&mut self,
|
||||
left: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
|
||||
right: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
|
||||
) -> InterpResult<'tcx> {
|
||||
let left = self.deref_pointer(left)?;
|
||||
let right = self.deref_pointer(right)?;
|
||||
debug_assert_eq!(left.layout, right.layout);
|
||||
let kind = MemoryKind::Stack;
|
||||
let temp = self.allocate(left.layout, kind)?;
|
||||
self.copy_op(&left, &temp)?;
|
||||
self.copy_op(&right, &left)?;
|
||||
self.copy_op(&temp, &right)?;
|
||||
self.deallocate_ptr(temp.ptr(), None, kind)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub(crate) fn write_bytes_intrinsic(
|
||||
&mut self,
|
||||
dst: &OpTy<'tcx, <M as Machine<'mir, 'tcx>>::Provenance>,
|
||||
|
@ -484,6 +484,8 @@ pub fn check_intrinsic_type(
|
||||
(1, 0, vec![Ty::new_mut_ptr(tcx, param(0)), param(0)], Ty::new_unit(tcx))
|
||||
}
|
||||
|
||||
sym::typed_swap => (1, 1, vec![Ty::new_mut_ptr(tcx, param(0)); 2], Ty::new_unit(tcx)),
|
||||
|
||||
sym::discriminant_value => {
|
||||
let assoc_items = tcx.associated_item_def_ids(
|
||||
tcx.require_lang_item(hir::LangItem::DiscriminantKind, None),
|
||||
|
@ -1836,6 +1836,7 @@ symbols! {
|
||||
type_macros,
|
||||
type_name,
|
||||
type_privacy_lints,
|
||||
typed_swap,
|
||||
u128,
|
||||
u128_legacy_const_max,
|
||||
u128_legacy_const_min,
|
||||
|
@ -66,6 +66,7 @@
|
||||
use crate::marker::DiscriminantKind;
|
||||
use crate::marker::Tuple;
|
||||
use crate::mem::align_of;
|
||||
use crate::ptr;
|
||||
|
||||
pub mod mir;
|
||||
pub mod simd;
|
||||
@ -2638,6 +2639,27 @@ pub const fn is_val_statically_known<T: Copy>(_arg: T) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
/// Non-overlapping *typed* swap of a single value.
|
||||
///
|
||||
/// The codegen backends will replace this with a better implementation when
|
||||
/// `T` is a simple type that can be loaded and stored as an immediate.
|
||||
///
|
||||
/// The stabilized form of this intrinsic is [`crate::mem::swap`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// `x` and `y` are readable and writable as `T`, and non-overlapping.
|
||||
#[rustc_nounwind]
|
||||
#[inline]
|
||||
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
|
||||
// This has fallback `const fn` MIR, so shouldn't need stability, see #122652
|
||||
#[rustc_const_unstable(feature = "const_typed_swap", issue = "none")]
|
||||
pub const unsafe fn typed_swap<T>(x: *mut T, y: *mut T) {
|
||||
// SAFETY: The caller provided single non-overlapping items behind
|
||||
// pointers, so swapping them with `count: 1` is fine.
|
||||
unsafe { ptr::swap_nonoverlapping(x, y, 1) };
|
||||
}
|
||||
|
||||
/// Returns whether we should check for library UB. This evaluate to the value of `cfg!(debug_assertions)`
|
||||
/// during monomorphization.
|
||||
///
|
||||
|
@ -170,6 +170,7 @@
|
||||
#![feature(const_try)]
|
||||
#![feature(const_type_id)]
|
||||
#![feature(const_type_name)]
|
||||
#![feature(const_typed_swap)]
|
||||
#![feature(const_unicode_case_lookup)]
|
||||
#![feature(const_unsafecell_get_mut)]
|
||||
#![feature(const_waker)]
|
||||
|
@ -726,63 +726,9 @@ pub unsafe fn uninitialized<T>() -> T {
|
||||
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
|
||||
#[rustc_diagnostic_item = "mem_swap"]
|
||||
pub const fn swap<T>(x: &mut T, y: &mut T) {
|
||||
// NOTE(eddyb) SPIR-V's Logical addressing model doesn't allow for arbitrary
|
||||
// reinterpretation of values as (chunkable) byte arrays, and the loop in the
|
||||
// block optimization in `swap_slice` is hard to rewrite back
|
||||
// into the (unoptimized) direct swapping implementation, so we disable it.
|
||||
#[cfg(not(any(target_arch = "spirv")))]
|
||||
{
|
||||
// For types that are larger multiples of their alignment, the simple way
|
||||
// tends to copy the whole thing to stack rather than doing it one part
|
||||
// at a time, so instead treat them as one-element slices and piggy-back
|
||||
// the slice optimizations that will split up the swaps.
|
||||
if const { size_of::<T>() / align_of::<T>() > 2 } {
|
||||
// SAFETY: exclusive references always point to one non-overlapping
|
||||
// element and are non-null and properly aligned.
|
||||
return unsafe { ptr::swap_nonoverlapping(x, y, 1) };
|
||||
}
|
||||
}
|
||||
|
||||
// If a scalar consists of just a small number of alignment units, let
|
||||
// the codegen just swap those pieces directly, as it's likely just a
|
||||
// few instructions and anything else is probably overcomplicated.
|
||||
//
|
||||
// Most importantly, this covers primitives and simd types that tend to
|
||||
// have size=align where doing anything else can be a pessimization.
|
||||
// (This will also be used for ZSTs, though any solution works for them.)
|
||||
swap_simple(x, y);
|
||||
}
|
||||
|
||||
/// Same as [`swap`] semantically, but always uses the simple implementation.
|
||||
///
|
||||
/// Used elsewhere in `mem` and `ptr` at the bottom layer of calls.
|
||||
#[rustc_const_unstable(feature = "const_swap", issue = "83163")]
|
||||
#[inline]
|
||||
pub(crate) const fn swap_simple<T>(x: &mut T, y: &mut T) {
|
||||
// We arrange for this to typically be called with small types,
|
||||
// so this reads-and-writes approach is actually better than using
|
||||
// copy_nonoverlapping as it easily puts things in LLVM registers
|
||||
// directly and doesn't end up inlining allocas.
|
||||
// And LLVM actually optimizes it to 3×memcpy if called with
|
||||
// a type larger than it's willing to keep in a register.
|
||||
// Having typed reads and writes in MIR here is also good as
|
||||
// it lets Miri and CTFE understand them better, including things
|
||||
// like enforcing type validity for them.
|
||||
// Importantly, read+copy_nonoverlapping+write introduces confusing
|
||||
// asymmetry to the behaviour where one value went through read+write
|
||||
// whereas the other was copied over by the intrinsic (see #94371).
|
||||
// Furthermore, using only read+write here benefits limited backends
|
||||
// such as SPIR-V that work on an underlying *typed* view of memory,
|
||||
// and thus have trouble with Rust's untyped memory operations.
|
||||
|
||||
// SAFETY: exclusive references are always valid to read/write,
|
||||
// including being aligned, and nothing here panics so it's drop-safe.
|
||||
unsafe {
|
||||
let a = ptr::read(x);
|
||||
let b = ptr::read(y);
|
||||
ptr::write(x, b);
|
||||
ptr::write(y, a);
|
||||
}
|
||||
// SAFETY: `&mut` guarantees these are typed readable and writable
|
||||
// as well as non-overlapping.
|
||||
unsafe { intrinsics::typed_swap(x, y) }
|
||||
}
|
||||
|
||||
/// Replaces `dest` with the default value of `T`, returning the previous `dest` value.
|
||||
|
@ -1062,11 +1062,26 @@ const unsafe fn swap_nonoverlapping_simple_untyped<T>(x: *mut T, y: *mut T, coun
|
||||
let mut i = 0;
|
||||
while i < count {
|
||||
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
|
||||
let x = unsafe { &mut *x.add(i) };
|
||||
let x = unsafe { x.add(i) };
|
||||
// SAFETY: By precondition, `i` is in-bounds because it's below `n`
|
||||
// and it's distinct from `x` since the ranges are non-overlapping
|
||||
let y = unsafe { &mut *y.add(i) };
|
||||
mem::swap_simple::<MaybeUninit<T>>(x, y);
|
||||
let y = unsafe { y.add(i) };
|
||||
|
||||
// If we end up here, it's because we're using a simple type -- like
|
||||
// a small power-of-two-sized thing -- or a special type with particularly
|
||||
// large alignment, particularly SIMD types.
|
||||
// Thus we're fine just reading-and-writing it, as either it's small
|
||||
// and that works well anyway or it's special and the type's author
|
||||
// presumably wanted things to be done in the larger chunk.
|
||||
|
||||
// SAFETY: we're only ever given pointers that are valid to read/write,
|
||||
// including being aligned, and nothing here panics so it's drop-safe.
|
||||
unsafe {
|
||||
let a: MaybeUninit<T> = read(x);
|
||||
let b: MaybeUninit<T> = read(y);
|
||||
write(x, b);
|
||||
write(y, a);
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
@ -0,0 +1,19 @@
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(rustc_attrs)]
|
||||
|
||||
use std::intrinsics::typed_swap;
|
||||
use std::ptr::addr_of_mut;
|
||||
|
||||
fn invalid_array() {
|
||||
let mut a = [1_u8; 100];
|
||||
let mut b = [2_u8; 100];
|
||||
unsafe {
|
||||
let a = addr_of_mut!(a).cast::<[bool; 100]>();
|
||||
let b = addr_of_mut!(b).cast::<[bool; 100]>();
|
||||
typed_swap(a, b); //~ERROR: constructing invalid value
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
invalid_array();
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
error: Undefined Behavior: constructing invalid value at [0]: encountered 0x02, but expected a boolean
|
||||
--> $DIR/typed-swap-invalid-array.rs:LL:CC
|
||||
|
|
||||
LL | typed_swap(a, b);
|
||||
| ^^^^^^^^^^^^^^^^ constructing invalid value at [0]: encountered 0x02, but expected a boolean
|
||||
|
|
||||
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
|
||||
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
|
||||
= note: BACKTRACE:
|
||||
= note: inside `invalid_array` at $DIR/typed-swap-invalid-array.rs:LL:CC
|
||||
note: inside `main`
|
||||
--> $DIR/typed-swap-invalid-array.rs:LL:CC
|
||||
|
|
||||
LL | invalid_array();
|
||||
| ^^^^^^^^^^^^^^^
|
||||
|
||||
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
|
||||
|
||||
error: aborting due to 1 previous error
|
||||
|
@ -0,0 +1,19 @@
|
||||
#![feature(core_intrinsics)]
|
||||
#![feature(rustc_attrs)]
|
||||
|
||||
use std::intrinsics::typed_swap;
|
||||
use std::ptr::addr_of_mut;
|
||||
|
||||
fn invalid_scalar() {
|
||||
let mut a = 1_u8;
|
||||
let mut b = 2_u8;
|
||||
unsafe {
|
||||
let a = addr_of_mut!(a).cast::<bool>();
|
||||
let b = addr_of_mut!(b).cast::<bool>();
|
||||
typed_swap(a, b); //~ERROR: constructing invalid value
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
invalid_scalar();
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
error: Undefined Behavior: constructing invalid value: encountered 0x02, but expected a boolean
|
||||
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
|
||||
|
|
||||
LL | typed_swap(a, b);
|
||||
| ^^^^^^^^^^^^^^^^ constructing invalid value: encountered 0x02, but expected a boolean
|
||||
|
|
||||
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
|
||||
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
|
||||
= note: BACKTRACE:
|
||||
= note: inside `invalid_scalar` at $DIR/typed-swap-invalid-scalar.rs:LL:CC
|
||||
note: inside `main`
|
||||
--> $DIR/typed-swap-invalid-scalar.rs:LL:CC
|
||||
|
|
||||
LL | invalid_scalar();
|
||||
| ^^^^^^^^^^^^^^^^
|
||||
|
||||
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
|
||||
|
||||
error: aborting due to 1 previous error
|
||||
|
53
tests/assembly/x86_64-typed-swap.rs
Normal file
53
tests/assembly/x86_64-typed-swap.rs
Normal file
@ -0,0 +1,53 @@
|
||||
//@ revisions: WIN LIN
|
||||
//@ [WIN] only-windows
|
||||
//@ [LIN] only-linux
|
||||
//@ only-x86_64
|
||||
//@ assembly-output: emit-asm
|
||||
//@ compile-flags: --crate-type=lib -O
|
||||
|
||||
use std::arch::x86_64::__m128;
|
||||
use std::mem::swap;
|
||||
|
||||
// CHECK-LABEL: swap_i32:
|
||||
#[no_mangle]
|
||||
pub fn swap_i32(x: &mut i32, y: &mut i32) {
|
||||
// CHECK: movl (%[[ARG1:.+]]), %[[T1:.+]]
|
||||
// CHECK: movl (%[[ARG2:.+]]), %[[T2:.+]]
|
||||
// CHECK: movl %[[T2]], (%[[ARG1]])
|
||||
// CHECK: movl %[[T1]], (%[[ARG2]])
|
||||
// CHECK: retq
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: swap_pair:
|
||||
#[no_mangle]
|
||||
pub fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
|
||||
// CHECK: movq (%[[ARG1]]), %[[T1:.+]]
|
||||
// CHECK: movq (%[[ARG2]]), %[[T2:.+]]
|
||||
// CHECK: movq %[[T2]], (%[[ARG1]])
|
||||
// CHECK: movq %[[T1]], (%[[ARG2]])
|
||||
// CHECK: retq
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: swap_str:
|
||||
#[no_mangle]
|
||||
pub fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
|
||||
// CHECK: movups (%[[ARG1]]), %[[T1:xmm.]]
|
||||
// CHECK: movups (%[[ARG2]]), %[[T2:xmm.]]
|
||||
// CHECK: movups %[[T2]], (%[[ARG1]])
|
||||
// CHECK: movups %[[T1]], (%[[ARG2]])
|
||||
// CHECK: retq
|
||||
swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: swap_simd:
|
||||
#[no_mangle]
|
||||
pub fn swap_simd(x: &mut __m128, y: &mut __m128) {
|
||||
// CHECK: movaps (%[[ARG1]]), %[[T1:xmm.]]
|
||||
// CHECK: movaps (%[[ARG2]]), %[[T2:xmm.]]
|
||||
// CHECK: movaps %[[T2]], (%[[ARG1]])
|
||||
// CHECK: movaps %[[T1]], (%[[ARG2]])
|
||||
// CHECK: retq
|
||||
swap(x, y)
|
||||
}
|
78
tests/codegen/intrinsics/typed_swap.rs
Normal file
78
tests/codegen/intrinsics/typed_swap.rs
Normal file
@ -0,0 +1,78 @@
|
||||
//@ revisions: OPT0 OPT3
|
||||
//@ [OPT0] compile-flags: -Copt-level=0
|
||||
//@ [OPT3] compile-flags: -Copt-level=3
|
||||
//@ compile-flags: -C no-prepopulate-passes
|
||||
//@ only-64bit (so I don't need to worry about usize)
|
||||
// ignore-tidy-linelength (the memcpy calls get long)
|
||||
|
||||
#![crate_type = "lib"]
|
||||
#![feature(core_intrinsics)]
|
||||
|
||||
use std::intrinsics::typed_swap;
|
||||
|
||||
// CHECK-LABEL: @swap_unit(
|
||||
#[no_mangle]
|
||||
pub unsafe fn swap_unit(x: &mut (), y: &mut ()) {
|
||||
// CHECK: start
|
||||
// CHECK-NEXT: ret void
|
||||
typed_swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_i32(
|
||||
#[no_mangle]
|
||||
pub unsafe fn swap_i32(x: &mut i32, y: &mut i32) {
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// CHECK: %[[TEMP:.+]] = load i32, ptr %x, align 4
|
||||
// CHECK-SAME: !noundef
|
||||
// OPT0: %[[TEMP2:.+]] = load i32, ptr %y, align 4
|
||||
// OPT0-SAME: !noundef
|
||||
// OPT0: store i32 %[[TEMP2]], ptr %x, align 4
|
||||
// OPT0-NOT: memcpy
|
||||
// OPT3-NOT: load
|
||||
// OPT3: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 4, i1 false)
|
||||
// CHECK: store i32 %[[TEMP]], ptr %y, align 4
|
||||
// CHECK: ret void
|
||||
typed_swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_pair(
|
||||
#[no_mangle]
|
||||
pub unsafe fn swap_pair(x: &mut (i32, u32), y: &mut (i32, u32)) {
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// CHECK: load i32
|
||||
// CHECK-SAME: !noundef
|
||||
// CHECK: load i32
|
||||
// CHECK-SAME: !noundef
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 4 %x, ptr align 4 %y, i64 8, i1 false)
|
||||
// CHECK: store i32
|
||||
// CHECK: store i32
|
||||
typed_swap(x, y)
|
||||
}
|
||||
|
||||
// CHECK-LABEL: @swap_str(
|
||||
#[no_mangle]
|
||||
pub unsafe fn swap_str<'a>(x: &mut &'a str, y: &mut &'a str) {
|
||||
// CHECK-NOT: alloca
|
||||
|
||||
// CHECK: load ptr
|
||||
// CHECK-SAME: !nonnull
|
||||
// CHECK-SAME: !noundef
|
||||
// CHECK: load i64
|
||||
// CHECK-SAME: !noundef
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %x, ptr align 8 %y, i64 16, i1 false)
|
||||
// CHECK: store ptr
|
||||
// CHECK: store i64
|
||||
typed_swap(x, y)
|
||||
}
|
||||
|
||||
// OPT0-LABEL: @swap_string(
|
||||
#[no_mangle]
|
||||
pub unsafe fn swap_string(x: &mut String, y: &mut String) {
|
||||
// OPT0: %[[TEMP:.+]] = alloca {{.+}}, align 8
|
||||
// OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %[[TEMP]], ptr align 8 %x, i64 24, i1 false)
|
||||
// OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %x, ptr align 8 %y, i64 24, i1 false)
|
||||
// OPT0: call void @llvm.memcpy.p0.p0.i64(ptr align 8 %y, ptr align 8 %[[TEMP]], i64 24, i1 false)
|
||||
typed_swap(x, y)
|
||||
}
|
@ -70,10 +70,7 @@ pub fn swap_slices<'a>(x: &mut &'a [u32], y: &mut &'a [u32]) {
|
||||
// CHECK-NOT: alloca
|
||||
// CHECK: load ptr
|
||||
// CHECK: load i64
|
||||
// CHECK: load ptr
|
||||
// CHECK: load i64
|
||||
// CHECK: store ptr
|
||||
// CHECK: store i64
|
||||
// CHECK: call void @llvm.memcpy.p0.p0.i64({{.+}}, i64 16, i1 false)
|
||||
// CHECK: store ptr
|
||||
// CHECK: store i64
|
||||
swap(x, y)
|
||||
|
@ -5,8 +5,6 @@ error[E0080]: evaluation of constant value failed
|
||||
|
|
||||
note: inside `std::ptr::read::<MaybeUninit<MaybeUninit<u8>>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `mem::swap_simple::<MaybeUninit<MaybeUninit<u8>>>`
|
||||
--> $SRC_DIR/core/src/mem/mod.rs:LL:COL
|
||||
note: inside `std::ptr::swap_nonoverlapping_simple_untyped::<MaybeUninit<u8>>`
|
||||
--> $SRC_DIR/core/src/ptr/mod.rs:LL:COL
|
||||
note: inside `swap_nonoverlapping::<MaybeUninit<u8>>`
|
||||
|
Loading…
Reference in New Issue
Block a user