From 86d445eda8ca2d884551fd0ac00a89e4bfe1cc81 Mon Sep 17 00:00:00 2001
From: AngelicosPhosphoros <xuzin.timur@gmail.com>
Date: Tue, 31 May 2022 15:14:55 +0300
Subject: [PATCH] Support vec zero-alloc optimization for tuples and byte
 arrays

* Implement IsZero trait for tuples up to 8 IsZero elements;
* Implement IsZero for u8/i8, leading to implementation of it for arrays of them too;
* Add more codegen tests for this optimization.
* Lower size of array for IsZero trait because it fails to inline checks
---
 library/alloc/src/vec/is_zero.rs        |  36 ++++++-
 library/alloc/src/vec/spec_from_elem.rs |  27 ++---
 src/test/codegen/vec-calloc.rs          | 136 +++++++++++++++++++++---
 3 files changed, 170 insertions(+), 29 deletions(-)

diff --git a/library/alloc/src/vec/is_zero.rs b/library/alloc/src/vec/is_zero.rs
index edf270db81d..92a32779b8e 100644
--- a/library/alloc/src/vec/is_zero.rs
+++ b/library/alloc/src/vec/is_zero.rs
@@ -17,12 +17,14 @@ macro_rules! impl_is_zero {
     };
 }
 
+impl_is_zero!(i8, |x| x == 0); // It is needed to impl for arrays and tuples of i8.
 impl_is_zero!(i16, |x| x == 0);
 impl_is_zero!(i32, |x| x == 0);
 impl_is_zero!(i64, |x| x == 0);
 impl_is_zero!(i128, |x| x == 0);
 impl_is_zero!(isize, |x| x == 0);
 
+impl_is_zero!(u8, |x| x == 0); // It is needed to impl for arrays and tuples of u8.
 impl_is_zero!(u16, |x| x == 0);
 impl_is_zero!(u32, |x| x == 0);
 impl_is_zero!(u64, |x| x == 0);
@@ -54,15 +56,41 @@ unsafe impl<T: IsZero, const N: usize> IsZero for [T; N] {
     fn is_zero(&self) -> bool {
         // Because this is generated as a runtime check, it's not obvious that
         // it's worth doing if the array is really long.  The threshold here
-        // is largely arbitrary, but was picked because as of 2022-05-01 LLVM
-        // can const-fold the check in `vec![[0; 32]; n]` but not in
-        // `vec![[0; 64]; n]`: https://godbolt.org/z/WTzjzfs5b
+        // is largely arbitrary, but was picked because as of 2022-07-01 LLVM
+        // fails to const-fold the check in `vec![[1; 32]; n]`
+        // See https://github.com/rust-lang/rust/pull/97581#issuecomment-1166628022
         // Feel free to tweak if you have better evidence.
 
-        N <= 32 && self.iter().all(IsZero::is_zero)
+        N <= 16 && self.iter().all(IsZero::is_zero)
     }
 }
 
+// This is recursive macro.
+macro_rules! impl_for_tuples {
+    // Stopper
+    () => {
+        // No use for implementing for empty tuple because it is ZST.
+    };
+    ($first_arg:ident $(,$rest:ident)*) => {
+        unsafe impl <$first_arg: IsZero, $($rest: IsZero,)*> IsZero for ($first_arg, $($rest,)*){
+            #[inline]
+            fn is_zero(&self) -> bool{
+                // Destructure tuple to N references
+                // Rust allows to hide generic params by local variable names.
+                #[allow(non_snake_case)]
+                let ($first_arg, $($rest,)*) = self;
+
+                $first_arg.is_zero()
+                    $( && $rest.is_zero() )*
+            }
+        }
+
+        impl_for_tuples!($($rest),*);
+    }
+}
+
+impl_for_tuples!(A, B, C, D, E, F, G, H);
+
 // `Option<&T>` and `Option<Box<T>>` are guaranteed to represent `None` as null.
 // For fat pointers, the bytes that would be the pointer metadata in the `Some`
 // variant are padding in the `None` variant, so ignoring them and
diff --git a/library/alloc/src/vec/spec_from_elem.rs b/library/alloc/src/vec/spec_from_elem.rs
index de610174783..ff364c033ee 100644
--- a/library/alloc/src/vec/spec_from_elem.rs
+++ b/library/alloc/src/vec/spec_from_elem.rs
@@ -1,6 +1,7 @@
+use core::ptr;
+
 use crate::alloc::Allocator;
 use crate::raw_vec::RawVec;
-use core::ptr::{self};
 
 use super::{ExtendElement, IsZero, Vec};
 
@@ -17,6 +18,18 @@ impl<T: Clone> SpecFromElem for T {
     }
 }
 
+impl<T: Clone + IsZero> SpecFromElem for T {
+    #[inline]
+    default fn from_elem<A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<T, A> {
+        if elem.is_zero() {
+            return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
+        }
+        let mut v = Vec::with_capacity_in(n, alloc);
+        v.extend_with(n, ExtendElement(elem));
+        v
+    }
+}
+
 impl SpecFromElem for i8 {
     #[inline]
     fn from_elem<A: Allocator>(elem: i8, n: usize, alloc: A) -> Vec<i8, A> {
@@ -46,15 +59,3 @@ impl SpecFromElem for u8 {
         }
     }
 }
-
-impl<T: Clone + IsZero> SpecFromElem for T {
-    #[inline]
-    fn from_elem<A: Allocator>(elem: T, n: usize, alloc: A) -> Vec<T, A> {
-        if elem.is_zero() {
-            return Vec { buf: RawVec::with_capacity_zeroed_in(n, alloc), len: n };
-        }
-        let mut v = Vec::with_capacity_in(n, alloc);
-        v.extend_with(n, ExtendElement(elem));
-        v
-    }
-}
diff --git a/src/test/codegen/vec-calloc.rs b/src/test/codegen/vec-calloc.rs
index c616e9f1145..08302796c41 100644
--- a/src/test/codegen/vec-calloc.rs
+++ b/src/test/codegen/vec-calloc.rs
@@ -4,29 +4,141 @@
 
 #![crate_type = "lib"]
 
+// CHECK-LABEL: @vec_zero_bytes
+#[no_mangle]
+pub fn vec_zero_bytes(n: usize) -> Vec<u8> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+    // CHECK-NOT: call {{.*}}llvm.memset
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+    // CHECK-NOT: call {{.*}}llvm.memset
+
+    // CHECK: ret void
+    vec![0; n]
+}
+
+// CHECK-LABEL: @vec_one_bytes
+#[no_mangle]
+pub fn vec_one_bytes(n: usize) -> Vec<u8> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: call {{.*}}__rust_alloc(
+    // CHECK: call {{.*}}llvm.memset
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: ret void
+    vec![1; n]
+}
+
 // CHECK-LABEL: @vec_zero_scalar
 #[no_mangle]
 pub fn vec_zero_scalar(n: usize) -> Vec<i32> {
-    // CHECK-NOT: __rust_alloc(
-    // CHECK: __rust_alloc_zeroed(
-    // CHECK-NOT: __rust_alloc(
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: ret void
     vec![0; n]
 }
 
+// CHECK-LABEL: @vec_one_scalar
+#[no_mangle]
+pub fn vec_one_scalar(n: usize) -> Vec<i32> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: call {{.*}}__rust_alloc(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: ret void
+    vec![1; n]
+}
+
 // CHECK-LABEL: @vec_zero_rgb48
 #[no_mangle]
 pub fn vec_zero_rgb48(n: usize) -> Vec<[u16; 3]> {
-    // CHECK-NOT: __rust_alloc(
-    // CHECK: __rust_alloc_zeroed(
-    // CHECK-NOT: __rust_alloc(
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: ret void
     vec![[0, 0, 0]; n]
 }
 
-// CHECK-LABEL: @vec_zero_array_32
+// CHECK-LABEL: @vec_zero_array_16
 #[no_mangle]
-pub fn vec_zero_array_32(n: usize) -> Vec<[i64; 32]> {
-    // CHECK-NOT: __rust_alloc(
-    // CHECK: __rust_alloc_zeroed(
-    // CHECK-NOT: __rust_alloc(
-    vec![[0_i64; 32]; n]
+pub fn vec_zero_array_16(n: usize) -> Vec<[i64; 16]> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: ret void
+    vec![[0_i64; 16]; n]
+}
+
+// CHECK-LABEL: @vec_zero_tuple
+#[no_mangle]
+pub fn vec_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc(
+
+    // CHECK: ret void
+    vec![(0, 0, '\0'); n]
+}
+
+// CHECK-LABEL: @vec_non_zero_tuple
+#[no_mangle]
+pub fn vec_non_zero_tuple(n: usize) -> Vec<(i16, u8, char)> {
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: call {{.*}}__rust_alloc(
+
+    // CHECK-NOT: call {{.*}}alloc::vec::from_elem
+    // CHECK-NOT: call {{.*}}reserve
+    // CHECK-NOT: call {{.*}}__rust_alloc_zeroed(
+
+    // CHECK: ret void
+    vec![(0, 0, 'A'); n]
 }