mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-12 23:13:15 +00:00
Avoid muliplications by 1
``` Benchmark #1: ./raytracer_cg_clif_pre Time (mean ± σ): 9.553 s ± 0.129 s [User: 9.543 s, System: 0.008 s] Range (min … max): 9.438 s … 9.837 s 10 runs Benchmark #2: ./raytracer_cg_clif_post Time (mean ± σ): 9.463 s ± 0.055 s [User: 9.452 s, System: 0.008 s] Range (min … max): 9.387 s … 9.518 s 10 runs Summary './raytracer_cg_clif_post' ran 1.01 ± 0.01 times faster than './raytracer_cg_clif_pre' ```
This commit is contained in:
parent
4700926e54
commit
96c4542dc3
@ -497,12 +497,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
||||
};
|
||||
copy | copy_nonoverlapping, <elem_ty> (v src, v dst, v count) {
|
||||
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
|
||||
let elem_size = fx
|
||||
.bcx
|
||||
.ins()
|
||||
.iconst(fx.pointer_type, elem_size as i64);
|
||||
assert_eq!(args.len(), 3);
|
||||
let byte_amount = fx.bcx.ins().imul(count, elem_size);
|
||||
let byte_amount = if elem_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, elem_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
|
||||
if intrinsic.contains("nonoverlapping") {
|
||||
// FIXME emit_small_memcpy
|
||||
@ -515,12 +515,12 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
||||
// NOTE: the volatile variants have src and dst swapped
|
||||
volatile_copy_memory | volatile_copy_nonoverlapping_memory, <elem_ty> (v dst, v src, v count) {
|
||||
let elem_size: u64 = fx.layout_of(elem_ty).size.bytes();
|
||||
let elem_size = fx
|
||||
.bcx
|
||||
.ins()
|
||||
.iconst(fx.pointer_type, elem_size as i64);
|
||||
assert_eq!(args.len(), 3);
|
||||
let byte_amount = fx.bcx.ins().imul(count, elem_size);
|
||||
let byte_amount = if elem_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, elem_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
|
||||
// FIXME make the copy actually volatile when using emit_small_mem{cpy,move}
|
||||
if intrinsic.contains("nonoverlapping") {
|
||||
@ -676,7 +676,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
||||
offset | arith_offset, (c base, v offset) {
|
||||
let pointee_ty = base.layout().ty.builtin_deref(true).unwrap().ty;
|
||||
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
|
||||
let ptr_diff = fx.bcx.ins().imul_imm(offset, pointee_size as i64);
|
||||
let ptr_diff = if pointee_size != 1 {
|
||||
fx.bcx.ins().imul_imm(offset, pointee_size as i64)
|
||||
} else {
|
||||
offset
|
||||
};
|
||||
let base_val = base.load_scalar(fx);
|
||||
let res = fx.bcx.ins().iadd(base_val, ptr_diff);
|
||||
ret.write_cvalue(fx, CValue::by_val(res, base.layout()));
|
||||
@ -688,7 +692,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
|
||||
write_bytes | volatile_set_memory, (c dst, v val, v count) {
|
||||
let pointee_ty = dst.layout().ty.builtin_deref(true).unwrap().ty;
|
||||
let pointee_size = fx.layout_of(pointee_ty).size.bytes();
|
||||
let count = fx.bcx.ins().imul_imm(count, pointee_size as i64);
|
||||
let count = if pointee_size != 1 {
|
||||
fx.bcx.ins().imul_imm(count, pointee_size as i64)
|
||||
} else {
|
||||
count
|
||||
};
|
||||
let dst_ptr = dst.load_scalar(fx);
|
||||
// FIXME make the memset actually volatile when switching to emit_small_memset
|
||||
// FIXME use emit_small_memset
|
||||
|
Loading…
Reference in New Issue
Block a user