From d721c1f9e3caf1f4a0e0afb0151ec127d4fd2771 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Wed, 26 Jul 2017 16:23:07 +0200
Subject: [PATCH 1/5] trans: Reorder basic blocks in slice_for_each

This is mainly for readability of the generated LLVM IR and subsequently
assembly. There is a slight positive performance impact, likely due to
I-cache effects.
---
 src/librustc_trans/tvec.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/librustc_trans/tvec.rs b/src/librustc_trans/tvec.rs
index 4216a73a8dd..de4d217c735 100644
--- a/src/librustc_trans/tvec.rs
+++ b/src/librustc_trans/tvec.rs
@@ -30,8 +30,8 @@ pub fn slice_for_each<'a, 'tcx, F>(
     };
 
     let body_bcx = bcx.build_sibling_block("slice_loop_body");
-    let next_bcx = bcx.build_sibling_block("slice_loop_next");
     let header_bcx = bcx.build_sibling_block("slice_loop_header");
+    let next_bcx = bcx.build_sibling_block("slice_loop_next");
 
     let start = if zst {
         C_uint(bcx.ccx, 0usize)

From ac43d58d3aca4b578864ec6dbb24d68a9f9c201c Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Wed, 26 Jul 2017 16:27:25 +0200
Subject: [PATCH 2/5] trans: Optimize initialization using repeat expressions

This elides initialization for zero-sized arrays:
* for zero-sized elements we previously emitted an empty loop
* for arrays with a length of zero we previously emitted a loop with zero
  iterations

This emits llvm.memset() instead of a loop over each element when:
* all elements are zero integers
* elements are byte sized
---
 src/librustc_trans/common.rs     |  2 +-
 src/librustc_trans/mir/rvalue.rs | 41 ++++++++++++++++--
 src/test/codegen/slice-init.rs   | 74 ++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 src/test/codegen/slice-init.rs

diff --git a/src/librustc_trans/common.rs b/src/librustc_trans/common.rs
index 9b0803908b1..c5f69bd638f 100644
--- a/src/librustc_trans/common.rs
+++ b/src/librustc_trans/common.rs
@@ -372,7 +372,7 @@ pub fn const_to_uint(v: ValueRef) -> u64 {
     }
 }
 
-fn is_const_integral(v: ValueRef) -> bool {
+pub fn is_const_integral(v: ValueRef) -> bool {
     unsafe {
         !llvm::LLVMIsAConstantInt(v).is_null()
     }
diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs
index 4bd5091a4f3..2cae2150885 100644
--- a/src/librustc_trans/mir/rvalue.rs
+++ b/src/librustc_trans/mir/rvalue.rs
@@ -11,7 +11,7 @@
 use llvm::{self, ValueRef};
 use rustc::ty::{self, Ty};
 use rustc::ty::cast::{CastTy, IntTy};
-use rustc::ty::layout::{Layout, LayoutTyper};
+use rustc::ty::layout::{self, Layout, LayoutTyper, Primitive};
 use rustc::mir::tcx::LvalueTy;
 use rustc::mir;
 use rustc::middle::lang_items::ExchangeMallocFnLangItem;
@@ -20,7 +20,7 @@ use base;
 use builder::Builder;
 use callee;
 use common::{self, val_ty, C_bool, C_null, C_uint};
-use common::{C_integral};
+use common::{C_integral, C_i32};
 use adt;
 use machine;
 use monomorphize;
@@ -93,12 +93,47 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
             }
 
             mir::Rvalue::Repeat(ref elem, ref count) => {
+                let dest_ty = dest.ty.to_ty(bcx.tcx());
+
+                // No need to inizialize memory of a zero-sized slice
+                if common::type_is_zero_size(bcx.ccx, dest_ty) {
+                    return bcx;
+                }
+
                 let tr_elem = self.trans_operand(&bcx, elem);
                 let size = count.as_u64(bcx.tcx().sess.target.uint_type);
                 let size = C_uint(bcx.ccx, size);
                 let base = base::get_dataptr(&bcx, dest.llval);
+                let align = dest.alignment.to_align();
+
+                if let OperandValue::Immediate(v) = tr_elem.val {
+                    if common::is_const_integral(v) && common::const_to_uint(v) == 0 {
+                        let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
+                        let align = C_i32(bcx.ccx, align as i32);
+                        let ty = type_of::type_of(bcx.ccx, dest_ty);
+                        let size = machine::llsize_of(bcx.ccx, ty);
+                        let fill = C_integral(Type::i8(bcx.ccx), 0, false);
+                        base::call_memset(&bcx, base, fill, size, align, false);
+                        return bcx;
+                    }
+                }
+
+                // Use llvm.memset.p0i8.* to initialize byte arrays
+                let elem_layout = bcx.ccx.layout_of(tr_elem.ty).layout;
+                match *elem_layout {
+                    Layout::Scalar { value: Primitive::Int(layout::I8), .. } |
+                    Layout::CEnum { discr: layout::I8, .. } => {
+                        let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
+                        let align = C_i32(bcx.ccx, align as i32);
+                        let fill = tr_elem.immediate();
+                        base::call_memset(&bcx, base, fill, size, align, false);
+                        return bcx;
+                    }
+                    _ => ()
+                }
+
                 tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| {
-                    self.store_operand(bcx, llslot, dest.alignment.to_align(), tr_elem);
+                    self.store_operand(bcx, llslot, align, tr_elem);
                     bcx.br(loop_bb);
                 })
             }
diff --git a/src/test/codegen/slice-init.rs b/src/test/codegen/slice-init.rs
new file mode 100644
index 00000000000..cb684af3953
--- /dev/null
+++ b/src/test/codegen/slice-init.rs
@@ -0,0 +1,74 @@
+// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
+// file at the top-level directory of this distribution and at
+// http://rust-lang.org/COPYRIGHT.
+//
+// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
+// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
+// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
+// option. This file may not be copied, modified, or distributed
+// except according to those terms.
+
+// compile-flags: -C no-prepopulate-passes
+
+#![crate_type = "lib"]
+
+// CHECK-LABEL: @zero_sized_elem
+#[no_mangle]
+pub fn zero_sized_elem() {
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [(); 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @zero_len_array
+#[no_mangle]
+pub fn zero_len_array() {
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [4; 0];
+    drop(&x);
+}
+
+// CHECK-LABEL: @byte_array
+#[no_mangle]
+pub fn byte_array() {
+    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 7, i64 4
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [7u8; 4];
+    drop(&x);
+}
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+enum Init {
+    Loop,
+    Memset,
+}
+
+// CHECK-LABEL: @byte_enum_array
+#[no_mangle]
+pub fn byte_enum_array() {
+    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 {{.*}}, i64 4
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [Init::Memset; 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @zeroed_integer_array
+#[no_mangle]
+pub fn zeroed_integer_array() {
+    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 0, i64 16
+    // CHECK-NOT: br label %slice_loop_header{{.*}}
+    let x = [0u32; 4];
+    drop(&x);
+}
+
+// CHECK-LABEL: @nonzero_integer_array
+#[no_mangle]
+pub fn nonzero_integer_array() {
+    // CHECK: br label %slice_loop_header{{.*}}
+    // CHECK-NOT: call void @llvm.memset.p0i8
+    let x = [0x1a_2b_3c_4d_u32; 4];
+    drop(&x);
+}

From c3603f3ec669f8370fbe4bf98032527ee8f1c489 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Wed, 2 Aug 2017 00:32:14 +0200
Subject: [PATCH 3/5] trans: Check LLVM type instead of Layout

---
 src/librustc_trans/mir/rvalue.rs | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs
index 2cae2150885..0485054a12a 100644
--- a/src/librustc_trans/mir/rvalue.rs
+++ b/src/librustc_trans/mir/rvalue.rs
@@ -11,7 +11,7 @@
 use llvm::{self, ValueRef};
 use rustc::ty::{self, Ty};
 use rustc::ty::cast::{CastTy, IntTy};
-use rustc::ty::layout::{self, Layout, LayoutTyper, Primitive};
+use rustc::ty::layout::{Layout, LayoutTyper};
 use rustc::mir::tcx::LvalueTy;
 use rustc::mir;
 use rustc::middle::lang_items::ExchangeMallocFnLangItem;
@@ -107,6 +107,7 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
                 let align = dest.alignment.to_align();
 
                 if let OperandValue::Immediate(v) = tr_elem.val {
+                    // Use llvm.memset.p0i8.* to initialize all zero arrays
                     if common::is_const_integral(v) && common::const_to_uint(v) == 0 {
                         let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
                         let align = C_i32(bcx.ccx, align as i32);
@@ -116,20 +117,15 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
                         base::call_memset(&bcx, base, fill, size, align, false);
                         return bcx;
                     }
-                }
 
-                // Use llvm.memset.p0i8.* to initialize byte arrays
-                let elem_layout = bcx.ccx.layout_of(tr_elem.ty).layout;
-                match *elem_layout {
-                    Layout::Scalar { value: Primitive::Int(layout::I8), .. } |
-                    Layout::CEnum { discr: layout::I8, .. } => {
+                    // Use llvm.memset.p0i8.* to initialize byte arrays
+                    if common::val_ty(v) == Type::i8(bcx.ccx) {
                         let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
                         let align = C_i32(bcx.ccx, align as i32);
                         let fill = tr_elem.immediate();
                         base::call_memset(&bcx, base, fill, size, align, false);
                         return bcx;
                     }
-                    _ => ()
                 }
 
                 tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| {

From 67044501bc851f36327e429c0b7ab935d375d71a Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Fri, 4 Aug 2017 02:27:30 +0200
Subject: [PATCH 4/5] trans: Reuse immediate value in call to call_memset()

---
 src/librustc_trans/mir/rvalue.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs
index 0485054a12a..a23e1a0684b 100644
--- a/src/librustc_trans/mir/rvalue.rs
+++ b/src/librustc_trans/mir/rvalue.rs
@@ -122,8 +122,7 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> {
                     if common::val_ty(v) == Type::i8(bcx.ccx) {
                         let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty));
                         let align = C_i32(bcx.ccx, align as i32);
-                        let fill = tr_elem.immediate();
-                        base::call_memset(&bcx, base, fill, size, align, false);
+                        base::call_memset(&bcx, base, v, size, align, false);
                         return bcx;
                     }
                 }

From 11d6312abd614fca3970902f137225e0437d0a09 Mon Sep 17 00:00:00 2001
From: Florian Zeitz <florob@babelmonkeys.de>
Date: Fri, 4 Aug 2017 16:58:12 +0200
Subject: [PATCH 5/5] codegen tests: Check type of `len` argument to
 `llvm.memset.*` based on the exact intrinsic used

---
 src/test/codegen/slice-init.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/test/codegen/slice-init.rs b/src/test/codegen/slice-init.rs
index cb684af3953..569d937c812 100644
--- a/src/test/codegen/slice-init.rs
+++ b/src/test/codegen/slice-init.rs
@@ -33,7 +33,7 @@ pub fn zero_len_array() {
 // CHECK-LABEL: @byte_array
 #[no_mangle]
 pub fn byte_array() {
-    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 7, i64 4
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 7, i[[WIDTH]] 4
     // CHECK-NOT: br label %slice_loop_header{{.*}}
     let x = [7u8; 4];
     drop(&x);
@@ -49,7 +49,7 @@ enum Init {
 // CHECK-LABEL: @byte_enum_array
 #[no_mangle]
 pub fn byte_enum_array() {
-    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 {{.*}}, i64 4
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 {{.*}}, i[[WIDTH]] 4
     // CHECK-NOT: br label %slice_loop_header{{.*}}
     let x = [Init::Memset; 4];
     drop(&x);
@@ -58,7 +58,7 @@ pub fn byte_enum_array() {
 // CHECK-LABEL: @zeroed_integer_array
 #[no_mangle]
 pub fn zeroed_integer_array() {
-    // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 0, i64 16
+    // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 0, i[[WIDTH]] 16
     // CHECK-NOT: br label %slice_loop_header{{.*}}
     let x = [0u32; 4];
     drop(&x);