From 32202f20cdfb1e01d88d2b61edb492daf3f33e5e Mon Sep 17 00:00:00 2001
From: bjorn3 <bjorn3@users.noreply.github.com>
Date: Fri, 22 Apr 2022 21:11:38 +0200
Subject: [PATCH] Merge commit 'f2cdd4a78d89c009342197cf5844a21f8aa813df' into
 sync_cg_clif-2022-04-22

---
 Cargo.lock                                    | 40 +++++++-------
 Cargo.toml                                    | 12 ++---
 build_sysroot/Cargo.lock                      |  9 ++--
 example/float-minmax-pass.rs                  | 53 +++++++++++++++++++
 example/mini_core.rs                          | 16 ++++--
 example/mini_core_hello_world.rs              |  4 +-
 example/std_example.rs                        |  6 ++-
 ...table-simd-Disable-unsupported-tests.patch | 36 -------------
 rust-toolchain                                |  2 +-
 scripts/filter_profile.rs                     |  2 +-
 scripts/setup_rust_fork.sh                    |  2 +-
 scripts/test_rustc_tests.sh                   | 19 +++----
 scripts/tests.sh                              |  4 ++
 src/base.rs                                   |  3 +-
 src/cast.rs                                   | 29 +++++++---
 src/config.rs                                 |  9 ----
 src/discriminant.rs                           | 12 ++++-
 src/intrinsics/mod.rs                         | 24 ++-------
 src/intrinsics/simd.rs                        | 29 +++++-----
 src/lib.rs                                    |  2 -
 src/num.rs                                    | 18 +++++++
 21 files changed, 189 insertions(+), 142 deletions(-)
 create mode 100644 example/float-minmax-pass.rs

diff --git a/Cargo.lock b/Cargo.lock
index 30e9627c48d..7b8e43b639f 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -33,18 +33,18 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
 [[package]]
 name = "cranelift-bforest"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d16922317bd7dd104d509a373887822caa0242fc1def00de66abb538db221db4"
+checksum = "ed44413e7e2fe3260d0ed73e6956ab188b69c10ee92b892e401e0f4f6808c68b"
 dependencies = [
  "cranelift-entity",
 ]
 
 [[package]]
 name = "cranelift-codegen"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b80bf40380256307b68a3dcbe1b91cac92a533e212b5b635abc3e4525781a0a"
+checksum = "0b5d83f0f26bf213f971f45589d17e5b65e4861f9ed22392b0cbb6eaa5bd329c"
 dependencies = [
  "cranelift-bforest",
  "cranelift-codegen-meta",
@@ -59,30 +59,30 @@ dependencies = [
 
 [[package]]
 name = "cranelift-codegen-meta"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "703d0ed7d3bc6c7a814ca12858175bf4e93167a3584127858c686e4b5dd6e432"
+checksum = "6800dc386177df6ecc5a32680607ed8ba1fa0d31a2a59c8c61fbf44826b8191d"
 dependencies = [
  "cranelift-codegen-shared",
 ]
 
 [[package]]
 name = "cranelift-codegen-shared"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "80f52311e1c90de12dcf8c4b9999c6ebfd1ed360373e88c357160936844511f6"
+checksum = "c961f85070985ebc8fcdb81b838a5cf842294d1e6ed4852446161c7e246fd455"
 
 [[package]]
 name = "cranelift-entity"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66bc82ef522c1f643baf7d4d40b7c52643ee4549d8960b0e6a047daacb83f897"
+checksum = "2347b2b8d1d5429213668f2a8e36c85ee3c73984a2f6a79007e365d3e575e7ed"
 
 [[package]]
 name = "cranelift-frontend"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3cc35e4251864b17515845ba47447bca88fec9ca1a4186b19fe42526e36140e8"
+checksum = "4cbcdbf7bed29e363568b778649b69dabc3d727256d5d25236096ef693757654"
 dependencies = [
  "cranelift-codegen",
  "log",
@@ -92,9 +92,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-jit"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93c66d594ad3bfe4e58b1fbd8d17877a7c6564a5f2d6f78cbbf4b0182af1927f"
+checksum = "7c769d4e0d76f59c8b2a3bf0477d89ee149bb0731b53fbb245ee081d49063095"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -110,9 +110,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-module"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf356697c40232aa09e1e3fb8a350ee894e849ccecc4eac56ff0570a4575c325"
+checksum = "0ab57d399a2401074bb0cc40b3031e420f3d66d46ec0cf21eeae53ac04bd73e2"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
@@ -120,9 +120,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-native"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b882b2251c9845d509d92aebfdb6c8bb3b3b48e207ac951f21fbd20cfe7f90b3"
+checksum = "8f4cdf93552e5ceb2e3c042829ebb4de4378492705f769eadc6a7c6c5251624c"
 dependencies = [
  "cranelift-codegen",
  "libc",
@@ -131,9 +131,9 @@ dependencies = [
 
 [[package]]
 name = "cranelift-object"
-version = "0.82.1"
+version = "0.83.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d3f1a88e654e567d2591169239ed157ab290811a729a6468f53999c01001263"
+checksum = "cf8e65f4839c26e6237fc0744911d79b0a2ac5e76b4e4eebd14db2b8d849fd31"
 dependencies = [
  "anyhow",
  "cranelift-codegen",
diff --git a/Cargo.toml b/Cargo.toml
index 70c03da3f29..74f50808a98 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -8,12 +8,12 @@ crate-type = ["dylib"]
 
 [dependencies]
 # These have to be in sync with each other
-cranelift-codegen = { version = "0.82.1", features = ["unwind", "all-arch"] }
-cranelift-frontend = "0.82.1"
-cranelift-module = "0.82.1"
-cranelift-native = "0.82.1"
-cranelift-jit = { version = "0.82.1", optional = true }
-cranelift-object = "0.82.1"
+cranelift-codegen = { version = "0.83.0", features = ["unwind", "all-arch"] }
+cranelift-frontend = "0.83.0"
+cranelift-module = "0.83.0"
+cranelift-native = "0.83.0"
+cranelift-jit = { version = "0.83.0", optional = true }
+cranelift-object = "0.83.0"
 target-lexicon = "0.12.0"
 gimli = { version = "0.26.0", default-features = false, features = ["write"]}
 object = { version = "0.27.0", default-features = false, features = ["std", "read_core", "write", "archive", "coff", "elf", "macho", "pe"] }
diff --git a/build_sysroot/Cargo.lock b/build_sysroot/Cargo.lock
index f584f54e106..51ba0dbfcc7 100644
--- a/build_sysroot/Cargo.lock
+++ b/build_sysroot/Cargo.lock
@@ -56,9 +56,9 @@ dependencies = [
 
 [[package]]
 name = "compiler_builtins"
-version = "0.1.71"
+version = "0.1.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "163437f05ca8f29d7e9128ea728dedf5eb620e445fbca273641d3a3050305f23"
+checksum = "afdbb35d279238cf77f0c9e8d90ad50d6c7bff476ab342baafa29440f0f10bff"
 dependencies = [
  "rustc-std-workspace-core",
 ]
@@ -134,9 +134,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.121"
+version = "0.2.124"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
+checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
 dependencies = [
  "rustc-std-workspace-core",
 ]
@@ -203,6 +203,7 @@ dependencies = [
 name = "proc_macro"
 version = "0.0.0"
 dependencies = [
+ "core",
  "std",
 ]
 
diff --git a/example/float-minmax-pass.rs b/example/float-minmax-pass.rs
new file mode 100644
index 00000000000..b8f901d1ba1
--- /dev/null
+++ b/example/float-minmax-pass.rs
@@ -0,0 +1,53 @@
+// Copied from https://github.com/rust-lang/rust/blob/3fe3b89cd57229343eeca753fdd8c63d9b03c65c/src/test/ui/simd/intrinsic/float-minmax-pass.rs
+// run-pass
+// ignore-emscripten
+
+// Test that the simd_f{min,max} intrinsics produce the correct results.
+
+#![feature(repr_simd, platform_intrinsics)]
+#![allow(non_camel_case_types)]
+
+#[repr(simd)]
+#[derive(Copy, Clone, PartialEq, Debug)]
+struct f32x4(pub f32, pub f32, pub f32, pub f32);
+
+extern "platform-intrinsic" {
+    fn simd_fmin<T>(x: T, y: T) -> T;
+    fn simd_fmax<T>(x: T, y: T) -> T;
+}
+
+fn main() {
+    let x = f32x4(1.0, 2.0, 3.0, 4.0);
+    let y = f32x4(2.0, 1.0, 4.0, 3.0);
+
+    #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))]
+    let nan = f32::NAN;
+    // MIPS hardware treats f32::NAN as SNAN. Clear the signaling bit.
+    // See https://github.com/rust-lang/rust/issues/52746.
+    #[cfg(any(target_arch = "mips", target_arch = "mips64"))]
+    let nan = f32::from_bits(f32::NAN.to_bits() - 1);
+
+    let n = f32x4(nan, nan, nan, nan);
+
+    unsafe {
+        let min0 = simd_fmin(x, y);
+        let min1 = simd_fmin(y, x);
+        assert_eq!(min0, min1);
+        let e = f32x4(1.0, 1.0, 3.0, 3.0);
+        assert_eq!(min0, e);
+        let minn = simd_fmin(x, n);
+        assert_eq!(minn, x);
+        let minn = simd_fmin(y, n);
+        assert_eq!(minn, y);
+
+        let max0 = simd_fmax(x, y);
+        let max1 = simd_fmax(y, x);
+        assert_eq!(max0, max1);
+        let e = f32x4(2.0, 2.0, 4.0, 4.0);
+        assert_eq!(max0, e);
+        let maxn = simd_fmax(x, n);
+        assert_eq!(maxn, x);
+        let maxn = simd_fmax(y, n);
+        assert_eq!(maxn, y);
+    }
+}
diff --git a/example/mini_core.rs b/example/mini_core.rs
index 7efc8dc785a..8da705e0cb0 100644
--- a/example/mini_core.rs
+++ b/example/mini_core.rs
@@ -16,6 +16,9 @@
 #[lang = "sized"]
 pub trait Sized {}
 
+#[lang = "destruct"]
+pub trait Destruct {}
+
 #[lang = "unsize"]
 pub trait Unsize<T: ?Sized> {}
 
@@ -491,13 +494,20 @@ pub trait Deref {
     fn deref(&self) -> &Self::Target;
 }
 
+#[repr(transparent)]
+#[rustc_layout_scalar_valid_range_start(1)]
+#[rustc_nonnull_optimization_guaranteed]
+pub struct NonNull<T: ?Sized>(pub *mut T);
+
+impl<T: ?Sized, U: ?Sized> CoerceUnsized<NonNull<U>> for NonNull<T> where T: Unsize<U> {}
+impl<T: ?Sized, U: ?Sized> DispatchFromDyn<NonNull<U>> for NonNull<T> where T: Unsize<U> {}
+
 pub struct Unique<T: ?Sized> {
-    pub pointer: *const T,
+    pub pointer: NonNull<T>,
     pub _marker: PhantomData<T>,
 }
 
 impl<T: ?Sized, U: ?Sized> CoerceUnsized<Unique<U>> for Unique<T> where T: Unsize<U> {}
-
 impl<T: ?Sized, U: ?Sized> DispatchFromDyn<Unique<U>> for Unique<T> where T: Unsize<U> {}
 
 #[lang = "owned_box"]
@@ -526,7 +536,7 @@ unsafe fn allocate(size: usize, _align: usize) -> *mut u8 {
 
 #[lang = "box_free"]
 unsafe fn box_free<T: ?Sized>(ptr: Unique<T>, alloc: ()) {
-    libc::free(ptr.pointer as *mut u8);
+    libc::free(ptr.pointer.0 as *mut u8);
 }
 
 #[lang = "drop"]
diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs
index c4730581335..85ca908d0a2 100644
--- a/example/mini_core_hello_world.rs
+++ b/example/mini_core_hello_world.rs
@@ -122,7 +122,7 @@ fn call_return_u128_pair() {
 #[allow(unreachable_code)] // FIXME false positive
 fn main() {
     take_unique(Unique {
-        pointer: 0 as *const (),
+        pointer: unsafe { NonNull(1 as *mut ()) },
         _marker: PhantomData,
     });
     take_f32(0.1);
@@ -173,7 +173,7 @@ fn main() {
         assert!(intrinsics::needs_drop::<NoisyDrop>());
 
         Unique {
-            pointer: 0 as *const &str,
+            pointer: NonNull(1 as *mut &str),
             _marker: PhantomData,
         } as Unique<dyn SomeTrait>;
 
diff --git a/example/std_example.rs b/example/std_example.rs
index 5bc51a541b5..0a2bce2621d 100644
--- a/example/std_example.rs
+++ b/example/std_example.rs
@@ -1,7 +1,8 @@
-#![feature(core_intrinsics, generators, generator_trait, is_sorted)]
+#![feature(core_intrinsics, generators, generator_trait, is_sorted, bench_black_box)]
 
 #[cfg(target_arch = "x86_64")]
 use std::arch::x86_64::*;
+use std::hint::black_box;
 use std::io::Write;
 use std::ops::Generator;
 
@@ -86,6 +87,9 @@ fn main() {
     assert_eq!(houndred_f64 as i128, 100);
     assert_eq!(1u128.rotate_left(2), 4);
 
+    assert_eq!(black_box(f32::NAN) as i128, 0);
+    assert_eq!(black_box(f32::NAN) as u128, 0);
+
     // Test signed 128bit comparing
     let max = usize::MAX as i128;
     if 100i128 < 0i128 || 100i128 > max {
diff --git a/patches/0001-portable-simd-Disable-unsupported-tests.patch b/patches/0001-portable-simd-Disable-unsupported-tests.patch
index c1325908691..54e13b090ab 100644
--- a/patches/0001-portable-simd-Disable-unsupported-tests.patch
+++ b/patches/0001-portable-simd-Disable-unsupported-tests.patch
@@ -102,42 +102,6 @@ index 6a8ecd3..68fcb49 100644
          }
      }
  }
-diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
-index 31b7ee2..bd04b3c 100644
---- a/crates/core_simd/tests/ops_macros.rs
-+++ b/crates/core_simd/tests/ops_macros.rs
-@@ -567,6 +567,7 @@ macro_rules! impl_float_tests {
-                     });
-                 }
- 
-+                /*
-                 fn horizontal_max<const LANES: usize>() {
-                     test_helpers::test_1(&|x| {
-                         let vmax = Vector::<LANES>::from_array(x).horizontal_max();
-@@ -590,6 +591,7 @@ macro_rules! impl_float_tests {
-                         Ok(())
-                     });
-                 }
-+                */
-             }
- 
-             #[cfg(feature = "std")]
-@@ -604,6 +606,7 @@ macro_rules! impl_float_tests {
-                         )
-                     }
- 
-+                    /*
-                     fn mul_add<const LANES: usize>() {
-                         test_helpers::test_ternary_elementwise(
-                             &Vector::<LANES>::mul_add,
-@@ -611,6 +614,7 @@ macro_rules! impl_float_tests {
-                             &|_, _, _| true,
-                         )
-                     }
-+                    */
-                 }
-             }
-         }
 -- 
 2.26.2.7.g19db9cfb68
 
diff --git a/rust-toolchain b/rust-toolchain
index 84d90e5db02..966097c248b 100644
--- a/rust-toolchain
+++ b/rust-toolchain
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2022-03-19"
+channel = "nightly-2022-04-21"
 components = ["rust-src", "rustc-dev", "llvm-tools-preview"]
diff --git a/scripts/filter_profile.rs b/scripts/filter_profile.rs
index a0e99267c2b..f4e863e5494 100755
--- a/scripts/filter_profile.rs
+++ b/scripts/filter_profile.rs
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 #![forbid(unsafe_code)]/* This line is ignored by bash
 # This block is ignored by rustc
 pushd $(dirname "$0")/../
diff --git a/scripts/setup_rust_fork.sh b/scripts/setup_rust_fork.sh
index 85c0109c6f6..cabbaaa8922 100644
--- a/scripts/setup_rust_fork.sh
+++ b/scripts/setup_rust_fork.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 ./y.rs build --no-unstable-features
diff --git a/scripts/test_rustc_tests.sh b/scripts/test_rustc_tests.sh
index a32e6df2208..4cf24c02235 100755
--- a/scripts/test_rustc_tests.sh
+++ b/scripts/test_rustc_tests.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 set -e
 
 cd $(dirname "$0")/../
@@ -11,7 +11,7 @@ pushd rust
 command -v rg >/dev/null 2>&1 || cargo install ripgrep
 
 rm -r src/test/ui/{extern/,unsized-locals/,lto/,linkage*} || true
-for test in $(rg --files-with-matches "asm!|lto|// needs-asm-support|// needs-unwind" src/test/{ui,incremental}); do
+for test in $(rg --files-with-matches "lto|// needs-asm-support|// needs-unwind" src/test/{ui,incremental}); do
   rm $test
 done
 
@@ -25,14 +25,8 @@ git checkout -- src/test/ui/issues/auxiliary/issue-3136-a.rs # contains //~ERROR
 # ================
 
 # requires stack unwinding
-rm src/test/ui/backtrace.rs
-rm src/test/ui/process/multi-panic.rs
-rm src/test/ui/numbers-arithmetic/issue-8460.rs
 rm src/test/incremental/change_crate_dep_kind.rs
 rm src/test/incremental/issue-80691-bad-eval-cache.rs # -Cpanic=abort causes abort instead of exit(101)
-rm src/test/ui/panic-while-printing.rs
-rm src/test/ui/test-attrs/test-panic-while-printing.rs
-rm src/test/ui/test-attrs/test-type.rs
 
 # requires compiling with -Cpanic=unwind
 rm src/test/ui/test-attrs/test-fn-signature-verification-for-explicit-return-type.rs # "Cannot run dynamic test fn out-of-process"
@@ -85,8 +79,6 @@ rm src/test/ui/abi/stack-protector.rs # requires stack protector support
 
 # giving different but possibly correct results
 # =============================================
-rm src/test/ui/numbers-arithmetic/saturating-float-casts.rs # intrinsic gives different but valid result
-rm src/test/ui/simd/intrinsic/float-minmax-pass.rs # same
 rm src/test/ui/mir/mir_misc_casts.rs # depends on deduplication of constants
 rm src/test/ui/mir/mir_raw_fat_ptr.rs # same
 rm src/test/ui/consts/issue-33537.rs # same
@@ -112,9 +104,14 @@ rm src/test/ui/mir/ssa-analysis-regression-50041.rs # produces ICE
 
 rm src/test/ui/simd/intrinsic/generic-reduction-pass.rs # simd_reduce_add_unordered doesn't accept an accumulator for integer vectors
 
+rm src/test/ui/rfc-2091-track-caller/intrinsic-wrapper.rs # wrong result from `Location::caller()`
+
 # bugs in the test suite
 # ======================
-rm src/test/ui/unsafe/union.rs # has UB caught by cg_clif. see rust-lang/rust#95075
+rm src/test/ui/backtrace.rs # TODO warning
+rm src/test/ui/empty_global_asm.rs # TODO add needs-asm-support
+rm src/test/ui/simple_global_asm.rs # TODO add needs-asm-support
+rm src/test/ui/test-attrs/test-type.rs # TODO panic message on stderr. correct stdout
 
 echo "[TEST] rustc test suite"
 RUST_TEST_NOCAPTURE=1 COMPILETEST_FORCE_STAGE0=1 ./x.py test --stage 0 src/test/{codegen-units,run-make,run-pass-valgrind,ui,incremental}
diff --git a/scripts/tests.sh b/scripts/tests.sh
index fee1012c8f1..aae626081f6 100755
--- a/scripts/tests.sh
+++ b/scripts/tests.sh
@@ -72,6 +72,10 @@ function base_sysroot_tests() {
     $MY_RUSTC example/track-caller-attribute.rs --crate-type bin -Cpanic=abort --target "$TARGET_TRIPLE"
     $RUN_WRAPPER ./target/out/track-caller-attribute
 
+    echo "[AOT] float-minmax-pass"
+    $MY_RUSTC example/float-minmax-pass.rs --crate-type bin -Cpanic=abort --target "$TARGET_TRIPLE"
+    $RUN_WRAPPER ./target/out/float-minmax-pass
+
     echo "[AOT] mod_bench"
     $MY_RUSTC example/mod_bench.rs --crate-type bin --target "$TARGET_TRIPLE"
     $RUN_WRAPPER ./target/out/mod_bench
diff --git a/src/base.rs b/src/base.rs
index 8c45993a8b7..65346cb3962 100644
--- a/src/base.rs
+++ b/src/base.rs
@@ -821,7 +821,8 @@ pub(crate) fn codegen_place<'tcx>(
                 if cplace.layout().ty.is_box() {
                     cplace = cplace
                         .place_field(fx, Field::new(0)) // Box<T> -> Unique<T>
-                        .place_field(fx, Field::new(0)) // Unique<T> -> *const T
+                        .place_field(fx, Field::new(0)) // Unique<T> -> NonNull<T>
+                        .place_field(fx, Field::new(0)) // NonNull<T> -> *mut T
                         .place_deref(fx);
                 } else {
                     cplace = cplace.place_deref(fx);
diff --git a/src/cast.rs b/src/cast.rs
index e7e6afeb865..e19070774c6 100644
--- a/src/cast.rs
+++ b/src/cast.rs
@@ -84,7 +84,7 @@ pub(crate) fn clif_int_or_float_cast(
             fx.bcx.ins().fcvt_from_uint(to_ty, from)
         }
     } else if from_ty.is_float() && to_ty.is_int() {
-        if to_ty == types::I128 {
+        let val = if to_ty == types::I128 {
             // _____sssf___
             // __fix   sfti: f32 -> i128
             // __fix   dfti: f64 -> i128
@@ -109,13 +109,9 @@ pub(crate) fn clif_int_or_float_cast(
 
             let to_rust_ty = if to_signed { fx.tcx.types.i128 } else { fx.tcx.types.u128 };
 
-            return fx
-                .easy_call(&name, &[CValue::by_val(from, fx.layout_of(from_rust_ty))], to_rust_ty)
-                .load_scalar(fx);
-        }
-
-        // float -> int-like
-        if to_ty == types::I8 || to_ty == types::I16 {
+            fx.easy_call(&name, &[CValue::by_val(from, fx.layout_of(from_rust_ty))], to_rust_ty)
+                .load_scalar(fx)
+        } else if to_ty == types::I8 || to_ty == types::I16 {
             // FIXME implement fcvt_to_*int_sat.i8/i16
             let val = if to_signed {
                 fx.bcx.ins().fcvt_to_sint_sat(types::I32, from)
@@ -146,6 +142,23 @@ pub(crate) fn clif_int_or_float_cast(
             fx.bcx.ins().fcvt_to_sint_sat(to_ty, from)
         } else {
             fx.bcx.ins().fcvt_to_uint_sat(to_ty, from)
+        };
+
+        if let Some(false) = fx.tcx.sess.opts.debugging_opts.saturating_float_casts {
+            return val;
+        }
+
+        let is_not_nan = fx.bcx.ins().fcmp(FloatCC::Equal, from, from);
+        if to_ty == types::I128 {
+            // FIXME(bytecodealliance/wasmtime#3963): select.i128 on fcmp eq miscompiles
+            let (lsb, msb) = fx.bcx.ins().isplit(val);
+            let zero = fx.bcx.ins().iconst(types::I64, 0);
+            let lsb = fx.bcx.ins().select(is_not_nan, lsb, zero);
+            let msb = fx.bcx.ins().select(is_not_nan, msb, zero);
+            fx.bcx.ins().iconcat(lsb, msb)
+        } else {
+            let zero = fx.bcx.ins().iconst(to_ty, 0);
+            fx.bcx.ins().select(is_not_nan, val, zero)
         }
     } else if from_ty.is_float() && to_ty.is_float() {
         // float -> float
diff --git a/src/config.rs b/src/config.rs
index eef3c8c8d6e..e59a0cb0a23 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -48,12 +48,6 @@ pub struct BackendConfig {
     /// Can be set using `-Cllvm-args=display_cg_time=...`.
     pub display_cg_time: bool,
 
-    /// The register allocator to use.
-    ///
-    /// Defaults to the value of `CG_CLIF_REGALLOC` or `backtracking` otherwise. Can be set using
-    /// `-Cllvm-args=regalloc=...`.
-    pub regalloc: String,
-
     /// Enable the Cranelift ir verifier for all compilation passes. If not set it will only run
     /// once before passing the clif ir to Cranelift for compilation.
     ///
@@ -80,8 +74,6 @@ impl Default for BackendConfig {
                 args.split(' ').map(|arg| arg.to_string()).collect()
             },
             display_cg_time: bool_env_var("CG_CLIF_DISPLAY_CG_TIME"),
-            regalloc: std::env::var("CG_CLIF_REGALLOC")
-                .unwrap_or_else(|_| "backtracking".to_string()),
             enable_verifier: cfg!(debug_assertions) || bool_env_var("CG_CLIF_ENABLE_VERIFIER"),
             disable_incr_cache: bool_env_var("CG_CLIF_DISABLE_INCR_CACHE"),
         }
@@ -101,7 +93,6 @@ impl BackendConfig {
                 match name {
                     "mode" => config.codegen_mode = value.parse()?,
                     "display_cg_time" => config.display_cg_time = parse_bool(name, value)?,
-                    "regalloc" => config.regalloc = value.to_string(),
                     "enable_verifier" => config.enable_verifier = parse_bool(name, value)?,
                     "disable_incr_cache" => config.disable_incr_cache = parse_bool(name, value)?,
                     _ => return Err(format!("Unknown option `{}`", name)),
diff --git a/src/discriminant.rs b/src/discriminant.rs
index 357cb4a6d24..f619bb5ed5e 100644
--- a/src/discriminant.rs
+++ b/src/discriminant.rs
@@ -128,8 +128,16 @@ pub(crate) fn codegen_get_discriminant<'tcx>(
             let relative_discr = if niche_start == 0 {
                 tag
             } else {
-                // FIXME handle niche_start > i64::MAX
-                fx.bcx.ins().iadd_imm(tag, -i64::try_from(niche_start).unwrap())
+                let niche_start = match fx.bcx.func.dfg.value_type(tag) {
+                    types::I128 => {
+                        let lsb = fx.bcx.ins().iconst(types::I64, niche_start as u64 as i64);
+                        let msb =
+                            fx.bcx.ins().iconst(types::I64, (niche_start >> 64) as u64 as i64);
+                        fx.bcx.ins().iconcat(lsb, msb)
+                    }
+                    ty => fx.bcx.ins().iconst(ty, niche_start as i64),
+                };
+                fx.bcx.ins().isub(tag, niche_start)
             };
             let relative_max = niche_variants.end().as_u32() - niche_variants.start().as_u32();
             let is_niche = {
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index 310d27c6dec..d76dfca7960 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -1019,39 +1019,23 @@ fn codegen_regular_intrinsic_call<'tcx>(
             ret.write_cvalue(fx, old);
         };
 
-        // In Rust floating point min and max don't propagate NaN. In Cranelift they do however.
-        // For this reason it is necessary to use `a.is_nan() ? b : (a >= b ? b : a)` for `minnumf*`
-        // and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
-        // a float against itself. Only in case of NaN is it not equal to itself.
         minnumf32, (v a, v b) {
-            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
-            let temp = fx.bcx.ins().select(a_ge_b, b, a);
-            let val = fx.bcx.ins().select(a_is_nan, b, temp);
+            let val = crate::num::codegen_float_min(fx, a, b);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
             ret.write_cvalue(fx, val);
         };
         minnumf64, (v a, v b) {
-            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-            let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
-            let temp = fx.bcx.ins().select(a_ge_b, b, a);
-            let val = fx.bcx.ins().select(a_is_nan, b, temp);
+            let val = crate::num::codegen_float_min(fx, a, b);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         };
         maxnumf32, (v a, v b) {
-            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
-            let temp = fx.bcx.ins().select(a_le_b, b, a);
-            let val = fx.bcx.ins().select(a_is_nan, b, temp);
+            let val = crate::num::codegen_float_max(fx, a, b);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f32));
             ret.write_cvalue(fx, val);
         };
         maxnumf64, (v a, v b) {
-            let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
-            let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
-            let temp = fx.bcx.ins().select(a_le_b, b, a);
-            let val = fx.bcx.ins().select(a_is_nan, b, temp);
+            let val = crate::num::codegen_float_max(fx, a, b);
             let val = CValue::by_val(val, fx.layout_of(fx.tcx.types.f64));
             ret.write_cvalue(fx, val);
         };
diff --git a/src/intrinsics/simd.rs b/src/intrinsics/simd.rs
index bc21d736166..d1ca9edf2e0 100644
--- a/src/intrinsics/simd.rs
+++ b/src/intrinsics/simd.rs
@@ -322,20 +322,21 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
             }
             assert_eq!(a.layout(), b.layout());
             assert_eq!(a.layout(), c.layout());
-            let layout = a.layout();
+            assert_eq!(a.layout(), ret.layout());
 
-            let (lane_count, _lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
-            let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
-            assert_eq!(lane_count, ret_lane_count);
-            let ret_lane_layout = fx.layout_of(ret_lane_ty);
+            let layout = a.layout();
+            let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
 
             for lane in 0..lane_count {
-                let a_lane = a.value_lane(fx, lane).load_scalar(fx);
-                let b_lane = b.value_lane(fx, lane).load_scalar(fx);
-                let c_lane = c.value_lane(fx, lane).load_scalar(fx);
+                let a_lane = a.value_lane(fx, lane);
+                let b_lane = b.value_lane(fx, lane);
+                let c_lane = c.value_lane(fx, lane);
 
-                let mul_lane = fx.bcx.ins().fmul(a_lane, b_lane);
-                let res_lane = CValue::by_val(fx.bcx.ins().fadd(mul_lane, c_lane), ret_lane_layout);
+                let res_lane = match lane_ty.kind() {
+                    ty::Float(FloatTy::F32) => fx.easy_call("fmaf", &[a_lane, b_lane, c_lane], lane_ty),
+                    ty::Float(FloatTy::F64) => fx.easy_call("fma", &[a_lane, b_lane, c_lane], lane_ty),
+                    _ => unreachable!(),
+                };
 
                 ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
             }
@@ -354,8 +355,8 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                     _ => unreachable!("{:?}", lane_ty),
                 }
                 match intrinsic {
-                    sym::simd_fmin => fx.bcx.ins().fmin(x_lane, y_lane),
-                    sym::simd_fmax => fx.bcx.ins().fmax(x_lane, y_lane),
+                    sym::simd_fmin => crate::num::codegen_float_min(fx, x_lane, y_lane),
+                    sym::simd_fmax => crate::num::codegen_float_max(fx, x_lane, y_lane),
                     _ => unreachable!(),
                 }
             });
@@ -495,7 +496,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 let lt = match ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedLessThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedLessThan, a, b),
-                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::LessThan, a, b),
+                    ty::Float(_) => return crate::num::codegen_float_min(fx, a, b),
                     _ => unreachable!(),
                 };
                 fx.bcx.ins().select(lt, a, b)
@@ -512,7 +513,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
                 let gt = match ty.kind() {
                     ty::Int(_) => fx.bcx.ins().icmp(IntCC::SignedGreaterThan, a, b),
                     ty::Uint(_) => fx.bcx.ins().icmp(IntCC::UnsignedGreaterThan, a, b),
-                    ty::Float(_) => fx.bcx.ins().fcmp(FloatCC::GreaterThan, a, b),
+                    ty::Float(_) => return crate::num::codegen_float_max(fx, a, b),
                     _ => unreachable!(),
                 };
                 fx.bcx.ins().select(gt, a, b)
diff --git a/src/lib.rs b/src/lib.rs
index 878b9390e13..9d2e12f9898 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -256,8 +256,6 @@ fn build_isa(sess: &Session, backend_config: &BackendConfig) -> Box<dyn isa::Tar
 
     flags_builder.set("enable_llvm_abi_extensions", "true").unwrap();
 
-    flags_builder.set("regalloc", &backend_config.regalloc).unwrap();
-
     use rustc_session::config::OptLevel;
     match sess.opts.optimize {
         OptLevel::No => {
diff --git a/src/num.rs b/src/num.rs
index 545d390e269..4ce8adb182e 100644
--- a/src/num.rs
+++ b/src/num.rs
@@ -420,3 +420,21 @@ pub(crate) fn codegen_ptr_binop<'tcx>(
         CValue::by_val(fx.bcx.ins().bint(types::I8, res), fx.layout_of(fx.tcx.types.bool))
     }
 }
+
+// In Rust floating point min and max don't propagate NaN. In Cranelift they do however.
+// For this reason it is necessary to use `a.is_nan() ? b : (a >= b ? b : a)` for `minnumf*`
+// and `a.is_nan() ? b : (a <= b ? b : a)` for `maxnumf*`. NaN checks are done by comparing
+// a float against itself. Only in case of NaN is it not equal to itself.
+pub(crate) fn codegen_float_min(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+    let a_ge_b = fx.bcx.ins().fcmp(FloatCC::GreaterThanOrEqual, a, b);
+    let temp = fx.bcx.ins().select(a_ge_b, b, a);
+    fx.bcx.ins().select(a_is_nan, b, temp)
+}
+
+pub(crate) fn codegen_float_max(fx: &mut FunctionCx<'_, '_, '_>, a: Value, b: Value) -> Value {
+    let a_is_nan = fx.bcx.ins().fcmp(FloatCC::NotEqual, a, a);
+    let a_le_b = fx.bcx.ins().fcmp(FloatCC::LessThanOrEqual, a, b);
+    let temp = fx.bcx.ins().select(a_le_b, b, a);
+    fx.bcx.ins().select(a_is_nan, b, temp)
+}