mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Rollup merge of #130325 - workingjubilee:plus-minus-zero-redux, r=RalfJung,jieyouxu
Use -0.0 in `intrinsics::simd::reduce_add_unordered` -0.0 is the actual neutral additive float, not +0.0, and this matters to codegen. try-job: aarch64-gnu
This commit is contained in:
commit
68758c0560
@ -2066,14 +2066,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
|
||||
};
|
||||
}
|
||||
|
||||
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0);
|
||||
arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0);
|
||||
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
|
||||
arith_red!(
|
||||
simd_reduce_add_unordered: vector_reduce_add,
|
||||
vector_reduce_fadd_reassoc,
|
||||
false,
|
||||
add,
|
||||
0.0
|
||||
-0.0
|
||||
);
|
||||
arith_red!(
|
||||
simd_reduce_mul_unordered: vector_reduce_mul,
|
||||
|
29
tests/assembly/simd/reduce-fadd-unordered.rs
Normal file
29
tests/assembly/simd/reduce-fadd-unordered.rs
Normal file
@ -0,0 +1,29 @@
|
||||
//@ revisions: x86_64 aarch64
|
||||
//@ assembly-output: emit-asm
|
||||
//@ compile-flags: --crate-type=lib -O
|
||||
//@[aarch64] only-aarch64
|
||||
//@[x86_64] only-x86_64
|
||||
//@[x86_64] compile-flags: -Ctarget-feature=+sse3
|
||||
#![feature(portable_simd)]
|
||||
#![feature(core_intrinsics)]
|
||||
use std::intrinsics::simd as intrinsics;
|
||||
use std::simd::*;
|
||||
// Regression test for https://github.com/rust-lang/rust/issues/130028
|
||||
// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because
|
||||
// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute!
|
||||
// It would emit about an extra fadd, depending on the architecture.
|
||||
|
||||
// CHECK-LABEL: reduce_fadd_negative_zero
|
||||
pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 {
|
||||
// x86_64: addps
|
||||
// x86_64-NEXT: movshdup
|
||||
// x86_64-NEXT: addss
|
||||
// x86_64-NOT: xorps
|
||||
|
||||
// aarch64: faddp
|
||||
// aarch64-NEXT: faddp
|
||||
|
||||
// CHECK-NOT: {{f?}}add{{p?s*}}
|
||||
// CHECK: ret
|
||||
intrinsics::simd_reduce_add_unordered(v)
|
||||
}
|
Loading…
Reference in New Issue
Block a user