rust/library
Joshua Wong c585541e67 optimize in-place collection of Vec
LLVM does not know that the multiplication never overflows, which causes
it to generate unnecessary instructions. Use `usize::unchecked_mul`, so
that it can fold the `dst_cap` calculation when `size_of::<I::SRC>() ==
size_of::<T>()`.

Running:

```
rustc -C llvm-args=-x86-asm-syntax=intel -O src/lib.rs --emit asm`
```

```rust

pub struct Foo([usize; 3]);

pub fn unwrap_copy(v: Vec<Foo>) -> Vec<[usize; 3]> {
    v.into_iter().map(|f| f.0).collect()
}
```

Before this commit:

```
define void @unwrap_copy(ptr noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24
  %_16.i.i = mul i64 %me.sroa.0.0.copyload.i, 24
  %dst_cap.i.i = udiv i64 %_16.i.i, 24
  store i64 %dst_cap.i.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8
  ret void
}
```

After:

```
define void @unwrap_copy(ptr noalias nocapture noundef writeonly sret([24 x i8]) align 8 dereferenceable(24) %_0, ptr noalias nocapture noundef readonly align 8 dereferenceable(24) %iter) {
start:
  %me.sroa.0.0.copyload.i = load i64, ptr %iter, align 8
  %me.sroa.4.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 8
  %me.sroa.4.0.copyload.i = load ptr, ptr %me.sroa.4.0.self.sroa_idx.i, align 8
  %me.sroa.5.0.self.sroa_idx.i = getelementptr inbounds i8, ptr %iter, i64 16
  %me.sroa.5.0.copyload.i = load i64, ptr %me.sroa.5.0.self.sroa_idx.i, align 8
  %_19.i.idx = mul nsw i64 %me.sroa.5.0.copyload.i, 24
  %0 = udiv i64 %_19.i.idx, 24
  store i64 %me.sroa.0.0.copyload.i, ptr %_0, align 8
  %1 = getelementptr inbounds i8, ptr %_0, i64 8
  store ptr %me.sroa.4.0.copyload.i, ptr %1, align 8
  %2 = getelementptr inbounds i8, ptr %_0, i64 16
  store i64 %0, ptr %2, align 8, !alias.scope !9, !noalias !14
  ret void
}
```

Note that there is still one more `mul,udiv` pair that I couldn't get
rid of. The root cause is the same issue as #121239, the `nuw` gets
stripped off of `ptr::sub_ptr`.
2024-05-18 18:30:20 -05:00
..
alloc optimize in-place collection of Vec 2024-05-18 18:30:20 -05:00
backtrace@e151306182 Update backtrace submodule 2024-04-12 16:28:19 -07:00
core Inline Duration construction into Duration::from_{millis,micros,nanos} 2024-05-17 18:37:59 -05:00
panic_abort Add support for Arm64EC to the Standard Library 2024-04-15 16:05:16 -07:00
panic_unwind Replace libc::c_int with core::ffi::c_int 2024-04-14 07:11:51 +00:00
portable-simd Stabilise inline_const 2024-04-24 13:12:25 +01:00
proc_macro Replace version placeholders for 1.79 2024-05-01 21:01:51 -04:00
profiler_builtins Update cc crate to v1.0.97 2024-05-08 15:06:35 +00:00
rtstartup library: Fix warnings in rtstartup 2024-01-06 01:32:03 +03:00
rustc-std-workspace-alloc Replace libstd, libcore, liballoc in line comments. 2022-12-30 14:00:42 +01:00
rustc-std-workspace-core
rustc-std-workspace-std
std android: use posix_memalign for aligned allocations 2024-05-18 12:49:01 +02:00
stdarch@df3618d9f3 feat: update stdarch submodule for intrinsics on ARM 2024-05-15 15:38:58 -04:00
sysroot Expose compiler-builtins-weak-intrinsics feature for -Zbuild-std 2023-06-23 11:15:34 +01:00
test emit fractional benchmark nanoseconds in libtest's JSON output format 2024-05-06 00:25:00 +02:00
unwind Fix unwinding on 32-bit watchOS ARM 2024-05-05 15:41:55 +02:00