Avoid muliplications by 1

```
Benchmark #1: ./raytracer_cg_clif_pre
  Time (mean ± σ):      9.553 s ±  0.129 s    [User: 9.543 s, System: 0.008 s]
  Range (min … max):    9.438 s …  9.837 s    10 runs

Benchmark #2: ./raytracer_cg_clif_post
  Time (mean ± σ):      9.463 s ±  0.055 s    [User: 9.452 s, System: 0.008 s]
  Range (min … max):    9.387 s …  9.518 s    10 runs

Summary
  './raytracer_cg_clif_post' ran
    1.01 ± 0.01 times faster than './raytracer_cg_clif_pre'
```
This commit is contained in:
bjorn3 2020-11-12 11:09:58 +01:00
parent 4700926e54
commit 96c4542dc3

View File

@ -497,12 +497,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
};
copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
let elem_size = fx
.bcx
.ins()
.iconst(fx.pointer_type, elem_size as i64);
assert_eq!(args.len(), 3);
let byte_amount = fx.bcx.ins().imul(count, elem_size);
let byte_amount = if elem_size != 1 {
fx.bcx.ins().imul_imm(count, elem_size as i64)
} else {
count
};
if intrinsic.contains("nonoverlapping") {
// FIXME emit_small_memcpy
@ -515,12 +515,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
// NOTE: the volatile variants have src and dst swapped
volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
let elem_size = fx
.bcx
.ins()
.iconst(fx.pointer_type, elem_size as i64);
assert_eq!(args.len(), 3);
let byte_amount = fx.bcx.ins().imul(count, elem_size);
let byte_amount = if elem_size != 1 {
fx.bcx.ins().imul_imm(count, elem_size as i64)
} else {
count
};
// FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
if intrinsic.contains("nonoverlapping") {
@ -676,7 +676,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
offset | arith_offset, (c base, v offset) {
let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
let ptr_diff = if pointee_size != 1 {
fx.bcx.ins().imul_imm(offset, pointee_size as i64)
} else {
offset
};
let base_val = base.load_scalar(fx);
let res = fx.bcx.ins().iadd(base_val, ptr_diff);
ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
@ -688,7 +692,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
write_bytes | volatile_set_memory, (c dst, v val, v count) {
let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
let count = if pointee_size != 1 {
fx.bcx.ins().imul_imm(count, pointee_size as i64)
} else {
count
};
let dst_ptr = dst.load_scalar(fx);
// FIXME make the memset actually volatile when switching to emit_small_memset
// FIXME use emit_small_memset