diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index 9705dd506b9..15c519dfcb4 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -2066,14 +2066,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>( }; } - arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, 0.0); + arith_red!(simd_reduce_add_ordered: vector_reduce_add, vector_reduce_fadd, true, add, -0.0); arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0); arith_red!( simd_reduce_add_unordered: vector_reduce_add, vector_reduce_fadd_reassoc, false, add, - 0.0 + -0.0 ); arith_red!( simd_reduce_mul_unordered: vector_reduce_mul, diff --git a/tests/assembly/simd/reduce-fadd-unordered.rs b/tests/assembly/simd/reduce-fadd-unordered.rs new file mode 100644 index 00000000000..fa9ce6bd35e --- /dev/null +++ b/tests/assembly/simd/reduce-fadd-unordered.rs @@ -0,0 +1,29 @@ +//@ revisions: x86_64 aarch64 +//@ assembly-output: emit-asm +//@ compile-flags: --crate-type=lib -O +//@[aarch64] only-aarch64 +//@[x86_64] only-x86_64 +//@[x86_64] compile-flags: -Ctarget-feature=+sse3 +#![feature(portable_simd)] +#![feature(core_intrinsics)] +use std::intrinsics::simd as intrinsics; +use std::simd::*; +// Regression test for https://github.com/rust-lang/rust/issues/130028 +// This intrinsic produces much worse code if you use +0.0 instead of -0.0 because +// +0.0 isn't as easy to algebraically reassociate, even using LLVM's reassoc attribute! +// It would emit about an extra fadd, depending on the architecture. + +// CHECK-LABEL: reduce_fadd_negative_zero +pub unsafe fn reduce_fadd_negative_zero(v: f32x4) -> f32 { + // x86_64: addps + // x86_64-NEXT: movshdup + // x86_64-NEXT: addss + // x86_64-NOT: xorps + + // aarch64: faddp + // aarch64-NEXT: faddp + + // CHECK-NOT: {{f?}}add{{p?s*}} + // CHECK: ret + intrinsics::simd_reduce_add_unordered(v) +}