From d721c1f9e3caf1f4a0e0afb0151ec127d4fd2771 Mon Sep 17 00:00:00 2001 From: Florian Zeitz <florob@babelmonkeys.de> Date: Wed, 26 Jul 2017 16:23:07 +0200 Subject: [PATCH 1/5] trans: Reorder basic blocks in slice_for_each This is mainly for readability of the generated LLVM IR and subsequently assembly. There is a slight positive performance impact, likely due to I-cache effects. --- src/librustc_trans/tvec.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/librustc_trans/tvec.rs b/src/librustc_trans/tvec.rs index 4216a73a8dd..de4d217c735 100644 --- a/src/librustc_trans/tvec.rs +++ b/src/librustc_trans/tvec.rs @@ -30,8 +30,8 @@ pub fn slice_for_each<'a, 'tcx, F>( }; let body_bcx = bcx.build_sibling_block("slice_loop_body"); - let next_bcx = bcx.build_sibling_block("slice_loop_next"); let header_bcx = bcx.build_sibling_block("slice_loop_header"); + let next_bcx = bcx.build_sibling_block("slice_loop_next"); let start = if zst { C_uint(bcx.ccx, 0usize) From ac43d58d3aca4b578864ec6dbb24d68a9f9c201c Mon Sep 17 00:00:00 2001 From: Florian Zeitz <florob@babelmonkeys.de> Date: Wed, 26 Jul 2017 16:27:25 +0200 Subject: [PATCH 2/5] trans: Optimize initialization using repeat expressions This elides initialization for zero-sized arrays: * for zero-sized elements we previously emitted an empty loop * for arrays with a length of zero we previously emitted a loop with zero iterations This emits llvm.memset() instead of a loop over each element when: * all elements are zero integers * elements are byte sized --- src/librustc_trans/common.rs | 2 +- src/librustc_trans/mir/rvalue.rs | 41 ++++++++++++++++-- src/test/codegen/slice-init.rs | 74 ++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 4 deletions(-) create mode 100644 src/test/codegen/slice-init.rs diff --git a/src/librustc_trans/common.rs b/src/librustc_trans/common.rs index 9b0803908b1..c5f69bd638f 100644 --- a/src/librustc_trans/common.rs +++ b/src/librustc_trans/common.rs @@ -372,7 +372,7 @@ pub fn const_to_uint(v: ValueRef) -> u64 { } } -fn is_const_integral(v: ValueRef) -> bool { +pub fn is_const_integral(v: ValueRef) -> bool { unsafe { !llvm::LLVMIsAConstantInt(v).is_null() } diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs index 4bd5091a4f3..2cae2150885 100644 --- a/src/librustc_trans/mir/rvalue.rs +++ b/src/librustc_trans/mir/rvalue.rs @@ -11,7 +11,7 @@ use llvm::{self, ValueRef}; use rustc::ty::{self, Ty}; use rustc::ty::cast::{CastTy, IntTy}; -use rustc::ty::layout::{Layout, LayoutTyper}; +use rustc::ty::layout::{self, Layout, LayoutTyper, Primitive}; use rustc::mir::tcx::LvalueTy; use rustc::mir; use rustc::middle::lang_items::ExchangeMallocFnLangItem; @@ -20,7 +20,7 @@ use base; use builder::Builder; use callee; use common::{self, val_ty, C_bool, C_null, C_uint}; -use common::{C_integral}; +use common::{C_integral, C_i32}; use adt; use machine; use monomorphize; @@ -93,12 +93,47 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> { } mir::Rvalue::Repeat(ref elem, ref count) => { + let dest_ty = dest.ty.to_ty(bcx.tcx()); + + // No need to inizialize memory of a zero-sized slice + if common::type_is_zero_size(bcx.ccx, dest_ty) { + return bcx; + } + let tr_elem = self.trans_operand(&bcx, elem); let size = count.as_u64(bcx.tcx().sess.target.uint_type); let size = C_uint(bcx.ccx, size); let base = base::get_dataptr(&bcx, dest.llval); + let align = dest.alignment.to_align(); + + if let OperandValue::Immediate(v) = tr_elem.val { + if common::is_const_integral(v) && common::const_to_uint(v) == 0 { + let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty)); + let align = C_i32(bcx.ccx, align as i32); + let ty = type_of::type_of(bcx.ccx, dest_ty); + let size = machine::llsize_of(bcx.ccx, ty); + let fill = C_integral(Type::i8(bcx.ccx), 0, false); + base::call_memset(&bcx, base, fill, size, align, false); + return bcx; + } + } + + // Use llvm.memset.p0i8.* to initialize byte arrays + let elem_layout = bcx.ccx.layout_of(tr_elem.ty).layout; + match *elem_layout { + Layout::Scalar { value: Primitive::Int(layout::I8), .. } | + Layout::CEnum { discr: layout::I8, .. } => { + let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty)); + let align = C_i32(bcx.ccx, align as i32); + let fill = tr_elem.immediate(); + base::call_memset(&bcx, base, fill, size, align, false); + return bcx; + } + _ => () + } + tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| { - self.store_operand(bcx, llslot, dest.alignment.to_align(), tr_elem); + self.store_operand(bcx, llslot, align, tr_elem); bcx.br(loop_bb); }) } diff --git a/src/test/codegen/slice-init.rs b/src/test/codegen/slice-init.rs new file mode 100644 index 00000000000..cb684af3953 --- /dev/null +++ b/src/test/codegen/slice-init.rs @@ -0,0 +1,74 @@ +// Copyright 2017 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or +// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license +// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// compile-flags: -C no-prepopulate-passes + +#![crate_type = "lib"] + +// CHECK-LABEL: @zero_sized_elem +#[no_mangle] +pub fn zero_sized_elem() { + // CHECK-NOT: br label %slice_loop_header{{.*}} + // CHECK-NOT: call void @llvm.memset.p0i8 + let x = [(); 4]; + drop(&x); +} + +// CHECK-LABEL: @zero_len_array +#[no_mangle] +pub fn zero_len_array() { + // CHECK-NOT: br label %slice_loop_header{{.*}} + // CHECK-NOT: call void @llvm.memset.p0i8 + let x = [4; 0]; + drop(&x); +} + +// CHECK-LABEL: @byte_array +#[no_mangle] +pub fn byte_array() { + // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 7, i64 4 + // CHECK-NOT: br label %slice_loop_header{{.*}} + let x = [7u8; 4]; + drop(&x); +} + +#[allow(dead_code)] +#[derive(Copy, Clone)] +enum Init { + Loop, + Memset, +} + +// CHECK-LABEL: @byte_enum_array +#[no_mangle] +pub fn byte_enum_array() { + // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 {{.*}}, i64 4 + // CHECK-NOT: br label %slice_loop_header{{.*}} + let x = [Init::Memset; 4]; + drop(&x); +} + +// CHECK-LABEL: @zeroed_integer_array +#[no_mangle] +pub fn zeroed_integer_array() { + // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 0, i64 16 + // CHECK-NOT: br label %slice_loop_header{{.*}} + let x = [0u32; 4]; + drop(&x); +} + +// CHECK-LABEL: @nonzero_integer_array +#[no_mangle] +pub fn nonzero_integer_array() { + // CHECK: br label %slice_loop_header{{.*}} + // CHECK-NOT: call void @llvm.memset.p0i8 + let x = [0x1a_2b_3c_4d_u32; 4]; + drop(&x); +} From c3603f3ec669f8370fbe4bf98032527ee8f1c489 Mon Sep 17 00:00:00 2001 From: Florian Zeitz <florob@babelmonkeys.de> Date: Wed, 2 Aug 2017 00:32:14 +0200 Subject: [PATCH 3/5] trans: Check LLVM type instead of Layout --- src/librustc_trans/mir/rvalue.rs | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs index 2cae2150885..0485054a12a 100644 --- a/src/librustc_trans/mir/rvalue.rs +++ b/src/librustc_trans/mir/rvalue.rs @@ -11,7 +11,7 @@ use llvm::{self, ValueRef}; use rustc::ty::{self, Ty}; use rustc::ty::cast::{CastTy, IntTy}; -use rustc::ty::layout::{self, Layout, LayoutTyper, Primitive}; +use rustc::ty::layout::{Layout, LayoutTyper}; use rustc::mir::tcx::LvalueTy; use rustc::mir; use rustc::middle::lang_items::ExchangeMallocFnLangItem; @@ -107,6 +107,7 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> { let align = dest.alignment.to_align(); if let OperandValue::Immediate(v) = tr_elem.val { + // Use llvm.memset.p0i8.* to initialize all zero arrays if common::is_const_integral(v) && common::const_to_uint(v) == 0 { let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty)); let align = C_i32(bcx.ccx, align as i32); @@ -116,20 +117,15 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> { base::call_memset(&bcx, base, fill, size, align, false); return bcx; } - } - // Use llvm.memset.p0i8.* to initialize byte arrays - let elem_layout = bcx.ccx.layout_of(tr_elem.ty).layout; - match *elem_layout { - Layout::Scalar { value: Primitive::Int(layout::I8), .. } | - Layout::CEnum { discr: layout::I8, .. } => { + // Use llvm.memset.p0i8.* to initialize byte arrays + if common::val_ty(v) == Type::i8(bcx.ccx) { let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty)); let align = C_i32(bcx.ccx, align as i32); let fill = tr_elem.immediate(); base::call_memset(&bcx, base, fill, size, align, false); return bcx; } - _ => () } tvec::slice_for_each(&bcx, base, tr_elem.ty, size, |bcx, llslot, loop_bb| { From 67044501bc851f36327e429c0b7ab935d375d71a Mon Sep 17 00:00:00 2001 From: Florian Zeitz <florob@babelmonkeys.de> Date: Fri, 4 Aug 2017 02:27:30 +0200 Subject: [PATCH 4/5] trans: Reuse immediate value in call to call_memset() --- src/librustc_trans/mir/rvalue.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/librustc_trans/mir/rvalue.rs b/src/librustc_trans/mir/rvalue.rs index 0485054a12a..a23e1a0684b 100644 --- a/src/librustc_trans/mir/rvalue.rs +++ b/src/librustc_trans/mir/rvalue.rs @@ -122,8 +122,7 @@ impl<'a, 'tcx> MirContext<'a, 'tcx> { if common::val_ty(v) == Type::i8(bcx.ccx) { let align = align.unwrap_or_else(|| bcx.ccx.align_of(tr_elem.ty)); let align = C_i32(bcx.ccx, align as i32); - let fill = tr_elem.immediate(); - base::call_memset(&bcx, base, fill, size, align, false); + base::call_memset(&bcx, base, v, size, align, false); return bcx; } } From 11d6312abd614fca3970902f137225e0437d0a09 Mon Sep 17 00:00:00 2001 From: Florian Zeitz <florob@babelmonkeys.de> Date: Fri, 4 Aug 2017 16:58:12 +0200 Subject: [PATCH 5/5] codegen tests: Check type of `len` argument to `llvm.memset.*` based on the exact intrinsic used --- src/test/codegen/slice-init.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/test/codegen/slice-init.rs b/src/test/codegen/slice-init.rs index cb684af3953..569d937c812 100644 --- a/src/test/codegen/slice-init.rs +++ b/src/test/codegen/slice-init.rs @@ -33,7 +33,7 @@ pub fn zero_len_array() { // CHECK-LABEL: @byte_array #[no_mangle] pub fn byte_array() { - // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 7, i64 4 + // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 7, i[[WIDTH]] 4 // CHECK-NOT: br label %slice_loop_header{{.*}} let x = [7u8; 4]; drop(&x); @@ -49,7 +49,7 @@ enum Init { // CHECK-LABEL: @byte_enum_array #[no_mangle] pub fn byte_enum_array() { - // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 {{.*}}, i64 4 + // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 {{.*}}, i[[WIDTH]] 4 // CHECK-NOT: br label %slice_loop_header{{.*}} let x = [Init::Memset; 4]; drop(&x); @@ -58,7 +58,7 @@ pub fn byte_enum_array() { // CHECK-LABEL: @zeroed_integer_array #[no_mangle] pub fn zeroed_integer_array() { - // CHECK: call void @llvm.memset.p0i8.i{{[0-9]+}}(i8* {{.*}}, i8 0, i64 16 + // CHECK: call void @llvm.memset.p0i8.i[[WIDTH:[0-9]+]](i8* {{.*}}, i8 0, i[[WIDTH]] 16 // CHECK-NOT: br label %slice_loop_header{{.*}} let x = [0u32; 4]; drop(&x);