mirror of
https://github.com/rust-lang/rust.git
synced 2025-01-31 17:12:53 +00:00
Auto merge of #90755 - scottmcm:spec-array-clone, r=jackh726
Specialize array cloning for Copy types Because after PR 86041, the optimizer no longer load-merges at the LLVM IR level, which might be part of the perf loss. (I'll run perf and see if this makes a difference.) Also I added a codegen test so this hopefully won't regress in future -- it passes on stable and with my change here, but not on the 2021-11-09 nightly. Example on current nightly: <https://play.rust-lang.org/?version=nightly&mode=release&edition=2021&gist=1f52d46fb8fc3ca3ac9f097390085ffa> ```rust type T = u8; const N: usize = 3; pub fn demo_clone(x: &[T; N]) -> [T; N] { x.clone() } pub fn demo_copy(x: &[T; N]) -> [T; N] { *x } ``` ```llvm-ir ; playground::demo_clone ; Function Attrs: mustprogress nofree nosync nounwind nonlazybind uwtable willreturn define i24 `@_ZN10playground10demo_clone17h98a4f11453d1a753E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #0 personality i32 (i32, i32, i64, %"unwind::libunwind::_Unwind_Exception"*, %"unwind::libunwind::_Unwind_Context"*)* `@rust_eh_personality` { start: %0 = getelementptr [3 x i8], [3 x i8]* %x, i64 0, i64 0 %1 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 1 %.val.i.i.i.i.i.i.i.i.i = load i8, i8* %0, align 1, !alias.scope !2, !noalias !9 %2 = getelementptr inbounds [3 x i8], [3 x i8]* %x, i64 0, i64 2 %.val.i.i.i.i.i.1.i.i.i.i = load i8, i8* %1, align 1, !alias.scope !2, !noalias !20 %.val.i.i.i.i.i.2.i.i.i.i = load i8, i8* %2, align 1, !alias.scope !2, !noalias !23 %array.sroa.6.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.2.i.i.i.i to i32 %array.sroa.6.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.6.0.insert.ext.i.i.i.i, 16 %array.sroa.5.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.1.i.i.i.i to i32 %array.sroa.5.0.insert.shift.i.i.i.i = shl nuw nsw i32 %array.sroa.5.0.insert.ext.i.i.i.i, 8 %array.sroa.0.0.insert.ext.i.i.i.i = zext i8 %.val.i.i.i.i.i.i.i.i.i to i32 %array.sroa.5.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.shift.i.i.i.i, %array.sroa.0.0.insert.ext.i.i.i.i %array.sroa.0.0.insert.insert.i.i.i.i = or i32 %array.sroa.5.0.insert.insert.i.i.i.i, %array.sroa.6.0.insert.shift.i.i.i.i %.sroa.4.0.extract.trunc.i.i.i.i = trunc i32 %array.sroa.0.0.insert.insert.i.i.i.i to i24 ret i24 %.sroa.4.0.extract.trunc.i.i.i.i } ; playground::demo_copy ; Function Attrs: mustprogress nofree norecurse nosync nounwind nonlazybind readonly uwtable willreturn define i24 `@_ZN10playground9demo_copy17h7817453f9291d746E([3` x i8]* noalias nocapture readonly align 1 dereferenceable(3) %x) unnamed_addr #1 { start: %.sroa.0.0..sroa_cast = bitcast [3 x i8]* %x to i24* %.sroa.0.0.copyload = load i24, i24* %.sroa.0.0..sroa_cast, align 1 ret i24 %.sroa.0.0.copyload } ```
This commit is contained in:
commit
62efba8a05
@ -339,9 +339,7 @@ impl<T: Copy, const N: usize> Copy for [T; N] {}
|
||||
impl<T: Clone, const N: usize> Clone for [T; N] {
|
||||
#[inline]
|
||||
fn clone(&self) -> Self {
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut self.iter().cloned()) }
|
||||
SpecArrayClone::clone(self)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -350,6 +348,29 @@ impl<T: Clone, const N: usize> Clone for [T; N] {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(bootstrap))]
|
||||
trait SpecArrayClone: Clone {
|
||||
fn clone<const N: usize>(array: &[Self; N]) -> [Self; N];
|
||||
}
|
||||
|
||||
#[cfg(not(bootstrap))]
|
||||
impl<T: Clone> SpecArrayClone for T {
|
||||
#[inline]
|
||||
default fn clone<const N: usize>(array: &[T; N]) -> [T; N] {
|
||||
// SAFETY: we know for certain that this iterator will yield exactly `N`
|
||||
// items.
|
||||
unsafe { collect_into_array_unchecked(&mut array.iter().cloned()) }
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(bootstrap))]
|
||||
impl<T: Copy> SpecArrayClone for T {
|
||||
#[inline]
|
||||
fn clone<const N: usize>(array: &[T; N]) -> [T; N] {
|
||||
*array
|
||||
}
|
||||
}
|
||||
|
||||
// The Default impls cannot be done with const generics because `[T; 0]` doesn't
|
||||
// require Default to be implemented, and having different impl blocks for
|
||||
// different numbers isn't supported yet.
|
||||
|
15
src/test/codegen/array-clone.rs
Normal file
15
src/test/codegen/array-clone.rs
Normal file
@ -0,0 +1,15 @@
|
||||
// compile-flags: -O
|
||||
|
||||
#![crate_type = "lib"]
|
||||
|
||||
// CHECK-LABEL: @array_clone
|
||||
#[no_mangle]
|
||||
pub fn array_clone(a: &[u8; 2]) -> [u8; 2] {
|
||||
// CHECK-NOT: getelementptr
|
||||
// CHECK-NOT: load i8
|
||||
// CHECK-NOT: zext
|
||||
// CHECK-NOT: shl
|
||||
// CHECK: load i16
|
||||
// CHECK-NEXT: ret i16
|
||||
a.clone()
|
||||
}
|
Loading…
Reference in New Issue
Block a user