From 56951e4f2b136fbf8b6a93fd1577779855052a62 Mon Sep 17 00:00:00 2001
From: bjorn3 <17426603+bjorn3@users.noreply.github.com>
Date: Thu, 9 Feb 2023 12:38:16 +0100
Subject: [PATCH] Merge commit '7d53619064ab7045c383644cb445052d2a3d46db' into
 sync_cg_clif-2023-02-09

---
 .github/workflows/main.yml    | 146 ++++++++++++++++++++++----
 build_sysroot/Cargo.lock      |   4 +-
 build_system/abi_cafe.rs      |   8 +-
 build_system/bench.rs         |  32 +++---
 build_system/build_backend.rs |   6 +-
 build_system/build_sysroot.rs |   3 +
 build_system/mod.rs           |   8 +-
 build_system/prepare.rs       |  18 ++--
 build_system/tests.rs         |   5 -
 build_system/utils.rs         |  18 +++-
 config.txt                    |   1 -
 rust-toolchain                |   2 +-
 scripts/test_rustc_tests.sh   |   1 +
 src/abi/comments.rs           |   4 +-
 src/allocator.rs              |  75 +++-----------
 src/base.rs                   |  91 +++++++++++------
 src/common.rs                 |  38 +++++++
 src/compiler_builtins.rs      |  38 ++++++-
 src/config.rs                 |   8 --
 src/driver/aot.rs             | 128 ++++++++++++-----------
 src/driver/jit.rs             |  46 +++++----
 src/driver/mod.rs             |  15 +--
 src/global_asm.rs             |  43 +++++++-
 src/inline_asm.rs             | 187 ++++++++++++++++++++++++++--------
 src/intrinsics/mod.rs         |  31 +++---
 src/value_and_place.rs        |  11 +-
 26 files changed, 638 insertions(+), 329 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index c0daf69e98e..9d3ed3ac5d0 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -21,6 +21,7 @@ jobs:
         cargo fmt --check
         rustfmt --check build_system/mod.rs
 
+
   build:
     runs-on: ${{ matrix.os }}
     timeout-minutes: 60
@@ -33,7 +34,7 @@ jobs:
       fail-fast: false
       matrix:
         include:
-          - os: ubuntu-20.04 # FIXME switch to ubuntu-22.04 once #1303 is fixed
+          - os: ubuntu-latest
             env:
               TARGET_TRIPLE: x86_64-unknown-linux-gnu
           - os: macos-latest
@@ -112,23 +113,6 @@ jobs:
         TARGET_TRIPLE: ${{ matrix.env.TARGET_TRIPLE }}
       run: ./y.rs test
 
-    - name: Package prebuilt cg_clif
-      run: tar cvfJ cg_clif.tar.xz dist
-
-    - name: Upload prebuilt cg_clif
-      if: matrix.os == 'windows-latest' || matrix.env.TARGET_TRIPLE != 'x86_64-pc-windows-gnu'
-      uses: actions/upload-artifact@v3
-      with:
-        name: cg_clif-${{ matrix.env.TARGET_TRIPLE }}
-        path: cg_clif.tar.xz
-
-    - name: Upload prebuilt cg_clif (cross compile)
-      if: matrix.os != 'windows-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
-      uses: actions/upload-artifact@v3
-      with:
-        name: cg_clif-${{ runner.os }}-cross-x86_64-mingw
-        path: cg_clif.tar.xz
-
 
   abi_cafe:
     runs-on: ${{ matrix.os }}
@@ -185,3 +169,129 @@ jobs:
       env:
         TARGET_TRIPLE: ${{ matrix.env.TARGET_TRIPLE }}
       run: ./y.rs abi-cafe
+
+
+  bench:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    defaults:
+      run:
+        shell: bash
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Cache cargo target dir
+      uses: actions/cache@v3
+      with:
+        path: build/cg_clif
+        key: ${{ runner.os }}-x86_64-unknown-linux-gnu-cargo-build-target-${{ hashFiles('rust-toolchain', '**/Cargo.lock') }}
+
+    - name: Cache cargo bin dir
+      uses: actions/cache@v3
+      with:
+        path: ~/.cargo/bin
+        key: ${{ runner.os }}-${{ matrix.env.TARGET_TRIPLE }}-cargo-bin-dir-${{ hashFiles('rust-toolchain', '**/Cargo.lock') }}
+
+    - name: Use sparse cargo registry
+      run: |
+        cat >> ~/.cargo/config.toml <<EOF
+        [unstable]
+        sparse-registry = true
+        EOF
+
+    - name: Install hyperfine
+      run: cargo install hyperfine || true
+
+    - name: Prepare dependencies
+      run: ./y.rs prepare
+
+    - name: Build
+      run: CI_OPT=1 ./y.rs build --sysroot none
+
+    - name: Benchmark
+      run: CI_OPT=1 ./y.rs bench
+
+
+  dist:
+    runs-on: ${{ matrix.os }}
+    timeout-minutes: 60
+
+    defaults:
+      run:
+        shell: bash
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # FIXME update at some point in the future once most distros use a newer glibc
+          - os: ubuntu-20.04
+            env:
+              TARGET_TRIPLE: x86_64-unknown-linux-gnu
+          - os: macos-latest
+            env:
+              TARGET_TRIPLE: x86_64-apple-darwin
+          # cross-compile from Linux to Windows using mingw
+          - os: ubuntu-latest
+            env:
+              TARGET_TRIPLE: x86_64-pc-windows-gnu
+          - os: windows-latest
+            env:
+              TARGET_TRIPLE: x86_64-pc-windows-msvc
+          - os: windows-latest
+            env:
+              TARGET_TRIPLE: x86_64-pc-windows-gnu
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Cache cargo target dir
+      uses: actions/cache@v3
+      with:
+        path: build/cg_clif
+        key: ${{ runner.os }}-${{ matrix.env.TARGET_TRIPLE }}-dist-cargo-build-target-${{ hashFiles('rust-toolchain', '**/Cargo.lock') }}
+
+    - name: Set MinGW as the default toolchain
+      if: matrix.os == 'windows-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
+      run: rustup set default-host x86_64-pc-windows-gnu
+
+    - name: Install MinGW toolchain and wine
+      if: matrix.os == 'ubuntu-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
+      run: |
+        sudo apt-get update
+        sudo apt-get install -y gcc-mingw-w64-x86-64 wine-stable
+
+    - name: Use sparse cargo registry
+      run: |
+        cat >> ~/.cargo/config.toml <<EOF
+        [unstable]
+        sparse-registry = true
+        EOF
+
+    - name: Prepare dependencies
+      run: ./y.rs prepare
+
+    - name: Build backend
+      run: CI_OPT=1 ./y.rs build --sysroot none
+
+    - name: Build sysroot
+      run: CI_OPT=1 ./y.rs build
+
+    - name: Package prebuilt cg_clif
+      run: tar cvfJ cg_clif.tar.xz dist
+
+    - name: Upload prebuilt cg_clif
+      if: matrix.os == 'windows-latest' || matrix.env.TARGET_TRIPLE != 'x86_64-pc-windows-gnu'
+      uses: actions/upload-artifact@v3
+      with:
+        name: cg_clif-${{ matrix.env.TARGET_TRIPLE }}
+        path: cg_clif.tar.xz
+
+    - name: Upload prebuilt cg_clif (cross compile)
+      if: matrix.os != 'windows-latest' && matrix.env.TARGET_TRIPLE == 'x86_64-pc-windows-gnu'
+      uses: actions/upload-artifact@v3
+      with:
+        name: cg_clif-${{ runner.os }}-cross-x86_64-mingw
+        path: cg_clif.tar.xz
diff --git a/build_sysroot/Cargo.lock b/build_sysroot/Cargo.lock
index 24f15fc8521..b7e0b68a2a2 100644
--- a/build_sysroot/Cargo.lock
+++ b/build_sysroot/Cargo.lock
@@ -34,9 +34,9 @@ dependencies = [
 
 [[package]]
 name = "cc"
-version = "1.0.78"
+version = "1.0.79"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
+checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
 
 [[package]]
 name = "cfg-if"
diff --git a/build_system/abi_cafe.rs b/build_system/abi_cafe.rs
index dbee9be04ee..0da27f529b3 100644
--- a/build_system/abi_cafe.rs
+++ b/build_system/abi_cafe.rs
@@ -6,11 +6,10 @@ use super::prepare::GitRepo;
 use super::utils::{spawn_and_wait, CargoProject, Compiler};
 use super::SysrootKind;
 
-pub(crate) static ABI_CAFE_REPO: GitRepo =
+static ABI_CAFE_REPO: GitRepo =
     GitRepo::github("Gankra", "abi-cafe", "4c6dc8c9c687e2b3a760ff2176ce236872b37212", "abi-cafe");
 
-pub(crate) static ABI_CAFE: CargoProject =
-    CargoProject::new(&ABI_CAFE_REPO.source_dir(), "abi_cafe");
+static ABI_CAFE: CargoProject = CargoProject::new(&ABI_CAFE_REPO.source_dir(), "abi_cafe");
 
 pub(crate) fn run(
     channel: &str,
@@ -19,6 +18,9 @@ pub(crate) fn run(
     cg_clif_dylib: &Path,
     bootstrap_host_compiler: &Compiler,
 ) {
+    ABI_CAFE_REPO.fetch(dirs);
+    spawn_and_wait(ABI_CAFE.fetch("cargo", &bootstrap_host_compiler.rustc, dirs));
+
     eprintln!("Building sysroot for abi-cafe");
     build_sysroot::build_sysroot(
         dirs,
diff --git a/build_system/bench.rs b/build_system/bench.rs
index 01d44dafbdd..a9a851d0a8a 100644
--- a/build_system/bench.rs
+++ b/build_system/bench.rs
@@ -5,9 +5,9 @@ use std::path::Path;
 use super::path::{Dirs, RelPath};
 use super::prepare::GitRepo;
 use super::rustc_info::get_file_name;
-use super::utils::{hyperfine_command, is_ci, spawn_and_wait, CargoProject, Compiler};
+use super::utils::{hyperfine_command, spawn_and_wait, CargoProject, Compiler};
 
-pub(crate) static SIMPLE_RAYTRACER_REPO: GitRepo = GitRepo::github(
+static SIMPLE_RAYTRACER_REPO: GitRepo = GitRepo::github(
     "ebobby",
     "simple-raytracer",
     "804a7a21b9e673a482797aa289a18ed480e4d813",
@@ -15,10 +15,10 @@ pub(crate) static SIMPLE_RAYTRACER_REPO: GitRepo = GitRepo::github(
 );
 
 // Use a separate target dir for the initial LLVM build to reduce unnecessary recompiles
-pub(crate) static SIMPLE_RAYTRACER_LLVM: CargoProject =
+static SIMPLE_RAYTRACER_LLVM: CargoProject =
     CargoProject::new(&SIMPLE_RAYTRACER_REPO.source_dir(), "simple_raytracer_llvm");
 
-pub(crate) static SIMPLE_RAYTRACER: CargoProject =
+static SIMPLE_RAYTRACER: CargoProject =
     CargoProject::new(&SIMPLE_RAYTRACER_REPO.source_dir(), "simple_raytracer");
 
 pub(crate) fn benchmark(dirs: &Dirs, bootstrap_host_compiler: &Compiler) {
@@ -32,6 +32,15 @@ fn benchmark_simple_raytracer(dirs: &Dirs, bootstrap_host_compiler: &Compiler) {
         std::process::exit(1);
     }
 
+    if !SIMPLE_RAYTRACER_REPO.source_dir().to_path(dirs).exists() {
+        SIMPLE_RAYTRACER_REPO.fetch(dirs);
+        spawn_and_wait(SIMPLE_RAYTRACER.fetch(
+            &bootstrap_host_compiler.cargo,
+            &bootstrap_host_compiler.rustc,
+            dirs,
+        ));
+    }
+
     eprintln!("[LLVM BUILD] simple-raytracer");
     let build_cmd = SIMPLE_RAYTRACER_LLVM.build(bootstrap_host_compiler, dirs);
     spawn_and_wait(build_cmd);
@@ -45,10 +54,7 @@ fn benchmark_simple_raytracer(dirs: &Dirs, bootstrap_host_compiler: &Compiler) {
     )
     .unwrap();
 
-    let run_runs = env::var("RUN_RUNS")
-        .unwrap_or(if is_ci() { "2" } else { "10" }.to_string())
-        .parse()
-        .unwrap();
+    let bench_runs = env::var("BENCH_RUNS").unwrap_or_else(|_| "10".to_string()).parse().unwrap();
 
     eprintln!("[BENCH COMPILE] ebobby/simple-raytracer");
     let cargo_clif =
@@ -57,24 +63,24 @@ fn benchmark_simple_raytracer(dirs: &Dirs, bootstrap_host_compiler: &Compiler) {
     let target_dir = SIMPLE_RAYTRACER.target_dir(dirs);
 
     let clean_cmd = format!(
-        "cargo clean --manifest-path {manifest_path} --target-dir {target_dir}",
+        "RUSTC=rustc cargo clean --manifest-path {manifest_path} --target-dir {target_dir}",
         manifest_path = manifest_path.display(),
         target_dir = target_dir.display(),
     );
     let llvm_build_cmd = format!(
-        "cargo build --manifest-path {manifest_path} --target-dir {target_dir}",
+        "RUSTC=rustc cargo build --manifest-path {manifest_path} --target-dir {target_dir}",
         manifest_path = manifest_path.display(),
         target_dir = target_dir.display(),
     );
     let clif_build_cmd = format!(
-        "{cargo_clif} build --manifest-path {manifest_path} --target-dir {target_dir}",
+        "RUSTC=rustc {cargo_clif} build --manifest-path {manifest_path} --target-dir {target_dir}",
         cargo_clif = cargo_clif.display(),
         manifest_path = manifest_path.display(),
         target_dir = target_dir.display(),
     );
 
     let bench_compile =
-        hyperfine_command(1, run_runs, Some(&clean_cmd), &llvm_build_cmd, &clif_build_cmd);
+        hyperfine_command(1, bench_runs, Some(&clean_cmd), &llvm_build_cmd, &clif_build_cmd);
 
     spawn_and_wait(bench_compile);
 
@@ -87,7 +93,7 @@ fn benchmark_simple_raytracer(dirs: &Dirs, bootstrap_host_compiler: &Compiler) {
 
     let mut bench_run = hyperfine_command(
         0,
-        run_runs,
+        bench_runs,
         None,
         Path::new(".").join(get_file_name("raytracer_cg_llvm", "bin")).to_str().unwrap(),
         Path::new(".").join(get_file_name("raytracer_cg_clif", "bin")).to_str().unwrap(),
diff --git a/build_system/build_backend.rs b/build_system/build_backend.rs
index 514404305a3..4b740fa2db6 100644
--- a/build_system/build_backend.rs
+++ b/build_system/build_backend.rs
@@ -3,7 +3,7 @@ use std::path::PathBuf;
 
 use super::path::{Dirs, RelPath};
 use super::rustc_info::get_file_name;
-use super::utils::{is_ci, CargoProject, Compiler};
+use super::utils::{is_ci, is_ci_opt, CargoProject, Compiler};
 
 pub(crate) static CG_CLIF: CargoProject = CargoProject::new(&RelPath::SOURCE, "cg_clif");
 
@@ -26,7 +26,9 @@ pub(crate) fn build_backend(
         // Disabling incr comp reduces cache size and incr comp doesn't save as much on CI anyway
         cmd.env("CARGO_BUILD_INCREMENTAL", "false");
 
-        cmd.env("CARGO_PROFILE_RELEASE_DEBUG_ASSERTIONS", "true");
+        if !is_ci_opt() {
+            cmd.env("CARGO_PROFILE_RELEASE_DEBUG_ASSERTIONS", "true");
+        }
     }
 
     if use_unstable_features {
diff --git a/build_system/build_sysroot.rs b/build_system/build_sysroot.rs
index bd04fdbe304..2e04f2c6811 100644
--- a/build_system/build_sysroot.rs
+++ b/build_system/build_sysroot.rs
@@ -248,6 +248,9 @@ fn build_clif_sysroot_for_triple(
         build_cmd.arg("--release");
     }
     build_cmd.env("__CARGO_DEFAULT_LIB_METADATA", "cg_clif");
+    if compiler.triple.contains("apple") {
+        build_cmd.env("CARGO_PROFILE_RELEASE_SPLIT_DEBUGINFO", "packed");
+    }
     spawn_and_wait(build_cmd);
 
     for entry in fs::read_dir(build_dir.join("deps")).unwrap() {
diff --git a/build_system/mod.rs b/build_system/mod.rs
index 8dcbe8de189..8a53acdf7e3 100644
--- a/build_system/mod.rs
+++ b/build_system/mod.rs
@@ -2,7 +2,7 @@ use std::env;
 use std::path::PathBuf;
 use std::process;
 
-use self::utils::{is_ci, Compiler};
+use self::utils::{is_ci, is_ci_opt, Compiler};
 
 mod abi_cafe;
 mod bench;
@@ -53,8 +53,10 @@ pub fn main() {
         // Disabling incr comp reduces cache size and incr comp doesn't save as much on CI anyway
         env::set_var("CARGO_BUILD_INCREMENTAL", "false");
 
-        // Enable the Cranelift verifier
-        env::set_var("CG_CLIF_ENABLE_VERIFIER", "1");
+        if !is_ci_opt() {
+            // Enable the Cranelift verifier
+            env::set_var("CG_CLIF_ENABLE_VERIFIER", "1");
+        }
     }
 
     let mut args = env::args().skip(1);
diff --git a/build_system/prepare.rs b/build_system/prepare.rs
index f25a81dc234..50b1b7836de 100644
--- a/build_system/prepare.rs
+++ b/build_system/prepare.rs
@@ -11,22 +11,18 @@ use super::utils::{copy_dir_recursively, git_command, retry_spawn_and_wait, spaw
 pub(crate) fn prepare(dirs: &Dirs) {
     RelPath::DOWNLOAD.ensure_fresh(dirs);
 
-    spawn_and_wait(super::build_backend::CG_CLIF.fetch("cargo", dirs));
+    spawn_and_wait(super::build_backend::CG_CLIF.fetch("cargo", "rustc", dirs));
 
     prepare_sysroot(dirs);
-    spawn_and_wait(super::build_sysroot::STANDARD_LIBRARY.fetch("cargo", dirs));
-    spawn_and_wait(super::tests::LIBCORE_TESTS.fetch("cargo", dirs));
+    spawn_and_wait(super::build_sysroot::STANDARD_LIBRARY.fetch("cargo", "rustc", dirs));
+    spawn_and_wait(super::tests::LIBCORE_TESTS.fetch("cargo", "rustc", dirs));
 
-    super::abi_cafe::ABI_CAFE_REPO.fetch(dirs);
-    spawn_and_wait(super::abi_cafe::ABI_CAFE.fetch("cargo", dirs));
     super::tests::RAND_REPO.fetch(dirs);
-    spawn_and_wait(super::tests::RAND.fetch("cargo", dirs));
+    spawn_and_wait(super::tests::RAND.fetch("cargo", "rustc", dirs));
     super::tests::REGEX_REPO.fetch(dirs);
-    spawn_and_wait(super::tests::REGEX.fetch("cargo", dirs));
+    spawn_and_wait(super::tests::REGEX.fetch("cargo", "rustc", dirs));
     super::tests::PORTABLE_SIMD_REPO.fetch(dirs);
-    spawn_and_wait(super::tests::PORTABLE_SIMD.fetch("cargo", dirs));
-    super::bench::SIMPLE_RAYTRACER_REPO.fetch(dirs);
-    spawn_and_wait(super::bench::SIMPLE_RAYTRACER.fetch("cargo", dirs));
+    spawn_and_wait(super::tests::PORTABLE_SIMD.fetch("cargo", "rustc", dirs));
 }
 
 fn prepare_sysroot(dirs: &Dirs) {
@@ -80,7 +76,7 @@ impl GitRepo {
         }
     }
 
-    fn fetch(&self, dirs: &Dirs) {
+    pub(crate) fn fetch(&self, dirs: &Dirs) {
         match self.url {
             GitRepoUrl::Github { user, repo } => {
                 clone_repo_shallow_github(
diff --git a/build_system/tests.rs b/build_system/tests.rs
index dcfadd73756..e9486888f86 100644
--- a/build_system/tests.rs
+++ b/build_system/tests.rs
@@ -1,4 +1,3 @@
-use super::bench::SIMPLE_RAYTRACER;
 use super::build_sysroot::{self, SYSROOT_SRC};
 use super::config;
 use super::path::{Dirs, RelPath};
@@ -134,10 +133,6 @@ const EXTENDED_SYSROOT_SUITE: &[TestCase] = &[
             spawn_and_wait(build_cmd);
         }
     }),
-    TestCase::custom("test.simple-raytracer", &|runner| {
-        SIMPLE_RAYTRACER.clean(&runner.dirs);
-        spawn_and_wait(SIMPLE_RAYTRACER.build(&runner.target_compiler, &runner.dirs));
-    }),
     TestCase::custom("test.libcore", &|runner| {
         LIBCORE_TESTS.clean(&runner.dirs);
 
diff --git a/build_system/utils.rs b/build_system/utils.rs
index da2a94a0a4f..abc5bab4942 100644
--- a/build_system/utils.rs
+++ b/build_system/utils.rs
@@ -121,10 +121,18 @@ impl CargoProject {
     }
 
     #[must_use]
-    pub(crate) fn fetch(&self, cargo: impl AsRef<Path>, dirs: &Dirs) -> Command {
+    pub(crate) fn fetch(
+        &self,
+        cargo: impl AsRef<Path>,
+        rustc: impl AsRef<Path>,
+        dirs: &Dirs,
+    ) -> Command {
         let mut cmd = Command::new(cargo.as_ref());
 
-        cmd.arg("fetch").arg("--manifest-path").arg(self.manifest_path(dirs));
+        cmd.env("RUSTC", rustc.as_ref())
+            .arg("fetch")
+            .arg("--manifest-path")
+            .arg(self.manifest_path(dirs));
 
         cmd
     }
@@ -271,5 +279,9 @@ pub(crate) fn copy_dir_recursively(from: &Path, to: &Path) {
 }
 
 pub(crate) fn is_ci() -> bool {
-    env::var("CI").as_deref() == Ok("true")
+    env::var("CI").is_ok()
+}
+
+pub(crate) fn is_ci_opt() -> bool {
+    env::var("CI_OPT").is_ok()
 }
diff --git a/config.txt b/config.txt
index d49cc90791a..d6e3924a24d 100644
--- a/config.txt
+++ b/config.txt
@@ -44,7 +44,6 @@ aot.issue-72793
 
 testsuite.extended_sysroot
 test.rust-random/rand
-test.simple-raytracer
 test.libcore
 test.regex-shootout-regex-dna
 test.regex
diff --git a/rust-toolchain b/rust-toolchain
index 77345b9a17c..40fb54b9159 100644
--- a/rust-toolchain
+++ b/rust-toolchain
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2023-01-20"
+channel = "nightly-2023-02-06"
 components = ["rust-src", "rustc-dev", "llvm-tools-preview"]
diff --git a/scripts/test_rustc_tests.sh b/scripts/test_rustc_tests.sh
index 07c9ae6ee9f..e14a129dbc2 100755
--- a/scripts/test_rustc_tests.sh
+++ b/scripts/test_rustc_tests.sh
@@ -91,6 +91,7 @@ rm tests/ui/proc-macro/proc-macro-deprecated-attr.rs # same
 rm tests/ui/proc-macro/quote-debug.rs # same
 rm tests/ui/proc-macro/no-missing-docs.rs # same
 rm tests/ui/rust-2018/proc-macro-crate-in-paths.rs # same
+rm tests/ui/proc-macro/allowed-signatures.rs # same
 
 # doesn't work due to the way the rustc test suite is invoked.
 # should work when using ./x.py test the way it is intended
diff --git a/src/abi/comments.rs b/src/abi/comments.rs
index 7f4619b5c94..abf63e33c35 100644
--- a/src/abi/comments.rs
+++ b/src/abi/comments.rs
@@ -98,12 +98,12 @@ pub(super) fn add_local_place_comments<'tcx>(
         }
         CPlaceInner::VarPair(place_local, var1, var2) => {
             assert_eq!(local, place_local);
-            ("ssa", Cow::Owned(format!(",var=({}, {})", var1.index(), var2.index())))
+            ("ssa", Cow::Owned(format!("var=({}, {})", var1.index(), var2.index())))
         }
         CPlaceInner::VarLane(_local, _var, _lane) => unreachable!(),
         CPlaceInner::Addr(ptr, meta) => {
             let meta = if let Some(meta) = meta {
-                Cow::Owned(format!(",meta={}", meta))
+                Cow::Owned(format!("meta={}", meta))
             } else {
                 Cow::Borrowed("")
             };
diff --git a/src/allocator.rs b/src/allocator.rs
index 8508227179a..1c73957ca57 100644
--- a/src/allocator.rs
+++ b/src/allocator.rs
@@ -70,37 +70,13 @@ fn codegen_inner(
             params: arg_tys.iter().cloned().map(AbiParam::new).collect(),
             returns: output.into_iter().map(AbiParam::new).collect(),
         };
-
-        let caller_name = format!("__rust_{}", method.name);
-        let callee_name = kind.fn_name(method.name);
-
-        let func_id = module.declare_function(&caller_name, Linkage::Export, &sig).unwrap();
-
-        let callee_func_id = module.declare_function(&callee_name, Linkage::Import, &sig).unwrap();
-
-        let mut ctx = Context::new();
-        ctx.func.signature = sig.clone();
-        {
-            let mut func_ctx = FunctionBuilderContext::new();
-            let mut bcx = FunctionBuilder::new(&mut ctx.func, &mut func_ctx);
-
-            let block = bcx.create_block();
-            bcx.switch_to_block(block);
-            let args = arg_tys
-                .into_iter()
-                .map(|ty| bcx.append_block_param(block, ty))
-                .collect::<Vec<Value>>();
-
-            let callee_func_ref = module.declare_func_in_func(callee_func_id, &mut bcx.func);
-            let call_inst = bcx.ins().call(callee_func_ref, &args);
-            let results = bcx.inst_results(call_inst).to_vec(); // Clone to prevent borrow error
-
-            bcx.ins().return_(&results);
-            bcx.seal_all_blocks();
-            bcx.finalize();
-        }
-        module.define_function(func_id, &mut ctx).unwrap();
-        unwind_context.add_function(func_id, &ctx, module.isa());
+        crate::common::create_wrapper_function(
+            module,
+            unwind_context,
+            sig,
+            &format!("__rust_{}", method.name),
+            &kind.fn_name(method.name),
+        );
     }
 
     let sig = Signature {
@@ -108,36 +84,13 @@ fn codegen_inner(
         params: vec![AbiParam::new(usize_ty), AbiParam::new(usize_ty)],
         returns: vec![],
     };
-
-    let callee_name = alloc_error_handler_kind.fn_name(sym::oom);
-
-    let func_id =
-        module.declare_function("__rust_alloc_error_handler", Linkage::Export, &sig).unwrap();
-
-    let callee_func_id = module.declare_function(&callee_name, Linkage::Import, &sig).unwrap();
-
-    let mut ctx = Context::new();
-    ctx.func.signature = sig;
-    {
-        let mut func_ctx = FunctionBuilderContext::new();
-        let mut bcx = FunctionBuilder::new(&mut ctx.func, &mut func_ctx);
-
-        let block = bcx.create_block();
-        bcx.switch_to_block(block);
-        let args = (&[usize_ty, usize_ty])
-            .iter()
-            .map(|&ty| bcx.append_block_param(block, ty))
-            .collect::<Vec<Value>>();
-
-        let callee_func_ref = module.declare_func_in_func(callee_func_id, &mut bcx.func);
-        bcx.ins().call(callee_func_ref, &args);
-
-        bcx.ins().trap(TrapCode::UnreachableCodeReached);
-        bcx.seal_all_blocks();
-        bcx.finalize();
-    }
-    module.define_function(func_id, &mut ctx).unwrap();
-    unwind_context.add_function(func_id, &ctx, module.isa());
+    crate::common::create_wrapper_function(
+        module,
+        unwind_context,
+        sig,
+        "__rust_alloc_error_handler",
+        &alloc_error_handler_kind.fn_name(sym::oom),
+    );
 
     let data_id = module.declare_data(OomStrategy::SYMBOL, Linkage::Export, false, false).unwrap();
     let mut data_ctx = DataContext::new();
diff --git a/src/base.rs b/src/base.rs
index dffb2ed8f4f..189d952a92f 100644
--- a/src/base.rs
+++ b/src/base.rs
@@ -21,23 +21,6 @@ pub(crate) struct CodegenedFunction {
     func_debug_cx: Option<FunctionDebugContext>,
 }
 
-#[cfg_attr(not(feature = "jit"), allow(dead_code))]
-pub(crate) fn codegen_and_compile_fn<'tcx>(
-    tcx: TyCtxt<'tcx>,
-    cx: &mut crate::CodegenCx,
-    cached_context: &mut Context,
-    module: &mut dyn Module,
-    instance: Instance<'tcx>,
-) {
-    let _inst_guard =
-        crate::PrintOnPanic(|| format!("{:?} {}", instance, tcx.symbol_name(instance).name));
-
-    let cached_func = std::mem::replace(&mut cached_context.func, Function::new());
-    let codegened_func = codegen_fn(tcx, cx, cached_func, module, instance);
-
-    compile_fn(cx, cached_context, module, codegened_func);
-}
-
 pub(crate) fn codegen_fn<'tcx>(
     tcx: TyCtxt<'tcx>,
     cx: &mut crate::CodegenCx,
@@ -47,6 +30,9 @@ pub(crate) fn codegen_fn<'tcx>(
 ) -> CodegenedFunction {
     debug_assert!(!instance.substs.needs_infer());
 
+    let symbol_name = tcx.symbol_name(instance).name.to_string();
+    let _timer = tcx.prof.generic_activity_with_arg("codegen fn", &*symbol_name);
+
     let mir = tcx.instance_mir(instance.def);
     let _mir_guard = crate::PrintOnPanic(|| {
         let mut buf = Vec::new();
@@ -58,7 +44,6 @@ pub(crate) fn codegen_fn<'tcx>(
     });
 
     // Declare function
-    let symbol_name = tcx.symbol_name(instance).name.to_string();
     let sig = get_function_sig(tcx, module.target_config().default_call_conv, instance);
     let func_id = module.declare_function(&symbol_name, Linkage::Local, &sig).unwrap();
 
@@ -112,7 +97,7 @@ pub(crate) fn codegen_fn<'tcx>(
         next_ssa_var: 0,
     };
 
-    tcx.sess.time("codegen clif ir", || codegen_fn_body(&mut fx, start_block));
+    tcx.prof.generic_activity("codegen clif ir").run(|| codegen_fn_body(&mut fx, start_block));
     fx.bcx.seal_all_blocks();
     fx.bcx.finalize();
 
@@ -146,6 +131,9 @@ pub(crate) fn compile_fn(
     module: &mut dyn Module,
     codegened_func: CodegenedFunction,
 ) {
+    let _timer =
+        cx.profiler.generic_activity_with_arg("compile function", &*codegened_func.symbol_name);
+
     let clif_comments = codegened_func.clif_comments;
 
     // Store function in context
@@ -191,9 +179,30 @@ pub(crate) fn compile_fn(
     };
 
     // Define function
-    cx.profiler.verbose_generic_activity("define function").run(|| {
+    cx.profiler.generic_activity("define function").run(|| {
         context.want_disasm = cx.should_write_ir;
         module.define_function(codegened_func.func_id, context).unwrap();
+
+        if cx.profiler.enabled() {
+            let mut recording_args = false;
+            cx.profiler
+                .generic_activity_with_arg_recorder(
+                    "define function (clif pass timings)",
+                    |recorder| {
+                        let pass_times = cranelift_codegen::timing::take_current();
+                        // Replace newlines with | as measureme doesn't allow control characters like
+                        // newlines inside strings.
+                        recorder.record_arg(format!("{}", pass_times).replace("\n", " | "));
+                        recording_args = true;
+                    },
+                )
+                .run(|| {
+                    if recording_args {
+                        // Wait a tiny bit to ensure chrome's profiler doesn't hide the event
+                        std::thread::sleep(std::time::Duration::from_nanos(2))
+                    }
+                });
+        }
     });
 
     if cx.should_write_ir {
@@ -220,7 +229,7 @@ pub(crate) fn compile_fn(
     let isa = module.isa();
     let debug_context = &mut cx.debug_context;
     let unwind_context = &mut cx.unwind_context;
-    cx.profiler.verbose_generic_activity("generate debug info").run(|| {
+    cx.profiler.generic_activity("generate debug info").run(|| {
         if let Some(debug_context) = debug_context {
             codegened_func.func_debug_cx.unwrap().finalize(
                 debug_context,
@@ -237,7 +246,7 @@ pub(crate) fn verify_func(
     writer: &crate::pretty_clif::CommentWriter,
     func: &Function,
 ) {
-    tcx.sess.time("verify clif ir", || {
+    tcx.prof.generic_activity("verify clif ir").run(|| {
         let flags = cranelift_codegen::settings::Flags::new(cranelift_codegen::settings::builder());
         match cranelift_codegen::verify_function(&func, &flags) {
             Ok(_) => {}
@@ -273,7 +282,10 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
         fx.bcx.ins().trap(TrapCode::UnreachableCodeReached);
         return;
     }
-    fx.tcx.sess.time("codegen prelude", || crate::abi::codegen_fn_prelude(fx, start_block));
+    fx.tcx
+        .prof
+        .generic_activity("codegen prelude")
+        .run(|| crate::abi::codegen_fn_prelude(fx, start_block));
 
     for (bb, bb_data) in fx.mir.basic_blocks.iter_enumerated() {
         let block = fx.get_block(bb);
@@ -434,7 +446,7 @@ fn codegen_fn_body(fx: &mut FunctionCx<'_, '_, '_>, start_block: Block) {
                 cleanup: _,
                 from_hir_call: _,
             } => {
-                fx.tcx.sess.time("codegen call", || {
+                fx.tcx.prof.generic_activity("codegen call").run(|| {
                     crate::abi::codegen_terminator_call(
                         fx,
                         mir::SourceInfo { span: *fn_span, ..source_info },
@@ -778,17 +790,30 @@ fn codegen_stmt<'tcx>(
                     let val = CValue::const_val(fx, fx.layout_of(fx.tcx.types.usize), val.into());
                     lval.write_cvalue(fx, val);
                 }
-                Rvalue::Aggregate(ref kind, ref operands) => match kind.as_ref() {
-                    AggregateKind::Array(_ty) => {
-                        for (i, operand) in operands.iter().enumerate() {
-                            let operand = codegen_operand(fx, operand);
-                            let index = fx.bcx.ins().iconst(fx.pointer_type, i as i64);
-                            let to = lval.place_index(fx, index);
-                            to.write_cvalue(fx, operand);
+                Rvalue::Aggregate(ref kind, ref operands) => {
+                    let (variant_index, variant_dest, active_field_index) = match **kind {
+                        mir::AggregateKind::Adt(_, variant_index, _, _, active_field_index) => {
+                            let variant_dest = lval.downcast_variant(fx, variant_index);
+                            (variant_index, variant_dest, active_field_index)
                         }
+                        _ => (VariantIdx::from_u32(0), lval, None),
+                    };
+                    if active_field_index.is_some() {
+                        assert_eq!(operands.len(), 1);
                     }
-                    _ => unreachable!("shouldn't exist at codegen {:?}", to_place_and_rval.1),
-                },
+                    for (i, operand) in operands.iter().enumerate() {
+                        let operand = codegen_operand(fx, operand);
+                        let field_index = active_field_index.unwrap_or(i);
+                        let to = if let mir::AggregateKind::Array(_) = **kind {
+                            let index = fx.bcx.ins().iconst(fx.pointer_type, field_index as i64);
+                            variant_dest.place_index(fx, index)
+                        } else {
+                            variant_dest.place_field(fx, mir::Field::new(field_index))
+                        };
+                        to.write_cvalue(fx, operand);
+                    }
+                    crate::discriminant::codegen_set_discriminant(fx, lval, variant_index);
+                }
             }
         }
         StatementKind::StorageLive(_)
diff --git a/src/common.rs b/src/common.rs
index f41af3a9e63..a8be0d32cc8 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -254,6 +254,44 @@ pub(crate) fn type_sign(ty: Ty<'_>) -> bool {
     }
 }
 
+pub(crate) fn create_wrapper_function(
+    module: &mut dyn Module,
+    unwind_context: &mut UnwindContext,
+    sig: Signature,
+    wrapper_name: &str,
+    callee_name: &str,
+) {
+    let wrapper_func_id = module.declare_function(wrapper_name, Linkage::Export, &sig).unwrap();
+    let callee_func_id = module.declare_function(callee_name, Linkage::Import, &sig).unwrap();
+
+    let mut ctx = Context::new();
+    ctx.func.signature = sig;
+    {
+        let mut func_ctx = FunctionBuilderContext::new();
+        let mut bcx = FunctionBuilder::new(&mut ctx.func, &mut func_ctx);
+
+        let block = bcx.create_block();
+        bcx.switch_to_block(block);
+        let func = &mut bcx.func.stencil;
+        let args = func
+            .signature
+            .params
+            .iter()
+            .map(|param| func.dfg.append_block_param(block, param.value_type))
+            .collect::<Vec<Value>>();
+
+        let callee_func_ref = module.declare_func_in_func(callee_func_id, &mut bcx.func);
+        let call_inst = bcx.ins().call(callee_func_ref, &args);
+        let results = bcx.inst_results(call_inst).to_vec(); // Clone to prevent borrow error
+
+        bcx.ins().return_(&results);
+        bcx.seal_all_blocks();
+        bcx.finalize();
+    }
+    module.define_function(wrapper_func_id, &mut ctx).unwrap();
+    unwind_context.add_function(wrapper_func_id, &ctx, module.isa());
+}
+
 pub(crate) struct FunctionCx<'m, 'clif, 'tcx: 'm> {
     pub(crate) cx: &'clif mut crate::CodegenCx,
     pub(crate) module: &'m mut dyn Module,
diff --git a/src/compiler_builtins.rs b/src/compiler_builtins.rs
index c6a247cf59e..8a53baa763a 100644
--- a/src/compiler_builtins.rs
+++ b/src/compiler_builtins.rs
@@ -1,14 +1,33 @@
+#[cfg(all(unix, feature = "jit"))]
+use std::ffi::c_int;
+#[cfg(feature = "jit")]
+use std::ffi::c_void;
+
+// FIXME replace with core::ffi::c_size_t once stablized
+#[allow(non_camel_case_types)]
+#[cfg(feature = "jit")]
+type size_t = usize;
+
 macro_rules! builtin_functions {
-    ($register:ident; $(fn $name:ident($($arg_name:ident: $arg_ty:ty),*) -> $ret_ty:ty;)*) => {
+    (
+        $register:ident;
+        $(
+            $(#[$attr:meta])?
+            fn $name:ident($($arg_name:ident: $arg_ty:ty),*) -> $ret_ty:ty;
+        )*
+    ) => {
         #[cfg(feature = "jit")]
         #[allow(improper_ctypes)]
         extern "C" {
-            $(fn $name($($arg_name: $arg_ty),*) -> $ret_ty;)*
+            $(
+                $(#[$attr])?
+                fn $name($($arg_name: $arg_ty),*) -> $ret_ty;
+            )*
         }
 
         #[cfg(feature = "jit")]
         pub(crate) fn $register(builder: &mut cranelift_jit::JITBuilder) {
-            for (name, val) in [$((stringify!($name), $name as *const u8)),*] {
+            for (name, val) in [$($(#[$attr])? (stringify!($name), $name as *const u8)),*] {
                 builder.symbol(name, val);
             }
         }
@@ -40,4 +59,17 @@ builtin_functions! {
     fn __fixdfti(f: f64) -> i128;
     fn __fixunssfti(f: f32) -> u128;
     fn __fixunsdfti(f: f64) -> u128;
+
+    // allocator
+    // NOTE: These need to be mentioned here despite not being part of compiler_builtins because
+    // newer glibc resolve dlsym("malloc") to libc.so despite the override in the rustc binary to
+    // use jemalloc. Libraries opened with dlopen still get the jemalloc version, causing multiple
+    // allocators to be mixed, resulting in a crash.
+    fn calloc(nobj: size_t, size: size_t) -> *mut c_void;
+    #[cfg(unix)]
+    fn posix_memalign(memptr: *mut *mut c_void, align: size_t, size: size_t) -> c_int;
+    fn malloc(size: size_t) -> *mut c_void;
+    fn realloc(p: *mut c_void, size: size_t) -> *mut c_void;
+    fn free(p: *mut c_void) -> ();
+
 }
diff --git a/src/config.rs b/src/config.rs
index 45522fb1a4c..263401e1c4b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -42,12 +42,6 @@ pub struct BackendConfig {
     /// Defaults to the value of `CG_CLIF_JIT_ARGS`.
     pub jit_args: Vec<String>,
 
-    /// Display the time it took to perform codegen for a crate.
-    ///
-    /// Defaults to true when the `CG_CLIF_DISPLAY_CG_TIME` env var is set to 1 or false otherwise.
-    /// Can be set using `-Cllvm-args=display_cg_time=...`.
-    pub display_cg_time: bool,
-
     /// Enable the Cranelift ir verifier for all compilation passes. If not set it will only run
     /// once before passing the clif ir to Cranelift for compilation.
     ///
@@ -73,7 +67,6 @@ impl Default for BackendConfig {
                 let args = std::env::var("CG_CLIF_JIT_ARGS").unwrap_or_else(|_| String::new());
                 args.split(' ').map(|arg| arg.to_string()).collect()
             },
-            display_cg_time: bool_env_var("CG_CLIF_DISPLAY_CG_TIME"),
             enable_verifier: cfg!(debug_assertions) || bool_env_var("CG_CLIF_ENABLE_VERIFIER"),
             disable_incr_cache: bool_env_var("CG_CLIF_DISABLE_INCR_CACHE"),
         }
@@ -92,7 +85,6 @@ impl BackendConfig {
             if let Some((name, value)) = opt.split_once('=') {
                 match name {
                     "mode" => config.codegen_mode = value.parse()?,
-                    "display_cg_time" => config.display_cg_time = parse_bool(name, value)?,
                     "enable_verifier" => config.enable_verifier = parse_bool(name, value)?,
                     "disable_incr_cache" => config.disable_incr_cache = parse_bool(name, value)?,
                     _ => return Err(format!("Unknown option `{}`", name)),
diff --git a/src/driver/aot.rs b/src/driver/aot.rs
index d4494a9e45d..58b01dfb5b0 100644
--- a/src/driver/aot.rs
+++ b/src/driver/aot.rs
@@ -272,25 +272,25 @@ fn module_codegen(
         ConcurrencyLimiterToken,
     ),
 ) -> OngoingModuleCodegen {
-    let (cgu_name, mut cx, mut module, codegened_functions) = tcx.sess.time("codegen cgu", || {
-        let cgu = tcx.codegen_unit(cgu_name);
-        let mono_items = cgu.items_in_deterministic_order(tcx);
+    let (cgu_name, mut cx, mut module, codegened_functions) =
+        tcx.prof.verbose_generic_activity_with_arg("codegen cgu", cgu_name.as_str()).run(|| {
+            let cgu = tcx.codegen_unit(cgu_name);
+            let mono_items = cgu.items_in_deterministic_order(tcx);
 
-        let mut module = make_module(tcx.sess, &backend_config, cgu_name.as_str().to_string());
+            let mut module = make_module(tcx.sess, &backend_config, cgu_name.as_str().to_string());
 
-        let mut cx = crate::CodegenCx::new(
-            tcx,
-            backend_config.clone(),
-            module.isa(),
-            tcx.sess.opts.debuginfo != DebugInfo::None,
-            cgu_name,
-        );
-        super::predefine_mono_items(tcx, &mut module, &mono_items);
-        let mut codegened_functions = vec![];
-        for (mono_item, _) in mono_items {
-            match mono_item {
-                MonoItem::Fn(inst) => {
-                    tcx.sess.time("codegen fn", || {
+            let mut cx = crate::CodegenCx::new(
+                tcx,
+                backend_config.clone(),
+                module.isa(),
+                tcx.sess.opts.debuginfo != DebugInfo::None,
+                cgu_name,
+            );
+            super::predefine_mono_items(tcx, &mut module, &mono_items);
+            let mut codegened_functions = vec![];
+            for (mono_item, _) in mono_items {
+                match mono_item {
+                    MonoItem::Fn(inst) => {
                         let codegened_function = crate::base::codegen_fn(
                             tcx,
                             &mut cx,
@@ -299,53 +299,68 @@ fn module_codegen(
                             inst,
                         );
                         codegened_functions.push(codegened_function);
-                    });
-                }
-                MonoItem::Static(def_id) => {
-                    crate::constant::codegen_static(tcx, &mut module, def_id)
-                }
-                MonoItem::GlobalAsm(item_id) => {
-                    crate::global_asm::codegen_global_asm_item(tcx, &mut cx.global_asm, item_id);
+                    }
+                    MonoItem::Static(def_id) => {
+                        crate::constant::codegen_static(tcx, &mut module, def_id)
+                    }
+                    MonoItem::GlobalAsm(item_id) => {
+                        crate::global_asm::codegen_global_asm_item(
+                            tcx,
+                            &mut cx.global_asm,
+                            item_id,
+                        );
+                    }
                 }
             }
-        }
-        crate::main_shim::maybe_create_entry_wrapper(
-            tcx,
-            &mut module,
-            &mut cx.unwind_context,
-            false,
-            cgu.is_primary(),
-        );
+            crate::main_shim::maybe_create_entry_wrapper(
+                tcx,
+                &mut module,
+                &mut cx.unwind_context,
+                false,
+                cgu.is_primary(),
+            );
 
-        let cgu_name = cgu.name().as_str().to_owned();
+            let cgu_name = cgu.name().as_str().to_owned();
 
-        (cgu_name, cx, module, codegened_functions)
-    });
-
-    OngoingModuleCodegen::Async(std::thread::spawn(move || {
-        cx.profiler.clone().verbose_generic_activity("compile functions").run(|| {
-            let mut cached_context = Context::new();
-            for codegened_func in codegened_functions {
-                crate::base::compile_fn(&mut cx, &mut cached_context, &mut module, codegened_func);
-            }
+            (cgu_name, cx, module, codegened_functions)
         });
 
-        let global_asm_object_file =
-            cx.profiler.verbose_generic_activity("compile assembly").run(|| {
+    OngoingModuleCodegen::Async(std::thread::spawn(move || {
+        cx.profiler.clone().verbose_generic_activity_with_arg("compile functions", &*cgu_name).run(
+            || {
+                let mut cached_context = Context::new();
+                for codegened_func in codegened_functions {
+                    crate::base::compile_fn(
+                        &mut cx,
+                        &mut cached_context,
+                        &mut module,
+                        codegened_func,
+                    );
+                }
+            },
+        );
+
+        let global_asm_object_file = cx
+            .profiler
+            .verbose_generic_activity_with_arg("compile assembly", &*cgu_name)
+            .run(|| {
                 crate::global_asm::compile_global_asm(&global_asm_config, &cgu_name, &cx.global_asm)
             })?;
 
-        let codegen_result = cx.profiler.verbose_generic_activity("write object file").run(|| {
-            emit_cgu(
-                &global_asm_config.output_filenames,
-                &cx.profiler,
-                cgu_name,
-                module,
-                cx.debug_context,
-                cx.unwind_context,
-                global_asm_object_file,
-            )
-        });
+        let codegen_result = cx
+            .profiler
+            .verbose_generic_activity_with_arg("write object file", &*cgu_name)
+            .run(|| {
+                emit_cgu(
+                    &global_asm_config.output_filenames,
+                    &cx.profiler,
+                    cgu_name,
+                    module,
+                    cx.debug_context,
+                    cx.unwind_context,
+                    global_asm_object_file,
+                )
+            });
         std::mem::drop(token);
         codegen_result
     }))
@@ -375,7 +390,7 @@ pub(crate) fn run_aot(
 
     let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, cgus.len());
 
-    let modules = super::time(tcx, backend_config.display_cg_time, "codegen mono items", || {
+    let modules = tcx.sess.time("codegen mono items", || {
         cgus.iter()
             .map(|cgu| {
                 let cgu_reuse = if backend_config.disable_incr_cache {
@@ -437,7 +452,6 @@ pub(crate) fn run_aot(
     };
 
     let metadata_module = if need_metadata_module {
-        let _timer = tcx.prof.generic_activity("codegen crate metadata");
         let (metadata_cgu_name, tmp_file) = tcx.sess.time("write compressed metadata", || {
             use rustc_middle::mir::mono::CodegenUnitNameBuilder;
 
diff --git a/src/driver/jit.rs b/src/driver/jit.rs
index be1b8c9ead3..8b5a2da2c59 100644
--- a/src/driver/jit.rs
+++ b/src/driver/jit.rs
@@ -121,22 +121,20 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, backend_config: BackendConfig) -> ! {
         .into_iter()
         .collect::<Vec<(_, (_, _))>>();
 
-    super::time(tcx, backend_config.display_cg_time, "codegen mono items", || {
+    tcx.sess.time("codegen mono items", || {
         super::predefine_mono_items(tcx, &mut jit_module, &mono_items);
         for (mono_item, _) in mono_items {
             match mono_item {
                 MonoItem::Fn(inst) => match backend_config.codegen_mode {
                     CodegenMode::Aot => unreachable!(),
                     CodegenMode::Jit => {
-                        tcx.sess.time("codegen fn", || {
-                            crate::base::codegen_and_compile_fn(
-                                tcx,
-                                &mut cx,
-                                &mut cached_context,
-                                &mut jit_module,
-                                inst,
-                            )
-                        });
+                        codegen_and_compile_fn(
+                            tcx,
+                            &mut cx,
+                            &mut cached_context,
+                            &mut jit_module,
+                            inst,
+                        );
                     }
                     CodegenMode::JitLazy => {
                         codegen_shim(tcx, &mut cx, &mut cached_context, &mut jit_module, inst)
@@ -219,6 +217,24 @@ pub(crate) fn run_jit(tcx: TyCtxt<'_>, backend_config: BackendConfig) -> ! {
     }
 }
 
+pub(crate) fn codegen_and_compile_fn<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    cx: &mut crate::CodegenCx,
+    cached_context: &mut Context,
+    module: &mut dyn Module,
+    instance: Instance<'tcx>,
+) {
+    tcx.prof.generic_activity("codegen and compile fn").run(|| {
+        let _inst_guard =
+            crate::PrintOnPanic(|| format!("{:?} {}", instance, tcx.symbol_name(instance).name));
+
+        let cached_func = std::mem::replace(&mut cached_context.func, Function::new());
+        let codegened_func = crate::base::codegen_fn(tcx, cx, cached_func, module, instance);
+
+        crate::base::compile_fn(cx, cached_context, module, codegened_func);
+    });
+}
+
 extern "C" fn clif_jit_fn(
     instance_ptr: *const Instance<'static>,
     trampoline_ptr: *const u8,
@@ -271,15 +287,7 @@ fn jit_fn(instance_ptr: *const Instance<'static>, trampoline_ptr: *const u8) ->
                 false,
                 Symbol::intern("dummy_cgu_name"),
             );
-            tcx.sess.time("codegen fn", || {
-                crate::base::codegen_and_compile_fn(
-                    tcx,
-                    &mut cx,
-                    &mut Context::new(),
-                    jit_module,
-                    instance,
-                )
-            });
+            codegen_and_compile_fn(tcx, &mut cx, &mut Context::new(), jit_module, instance);
 
             assert!(cx.global_asm.is_empty());
             jit_module.finalize_definitions().unwrap();
diff --git a/src/driver/mod.rs b/src/driver/mod.rs
index 6e925cea277..d09d3a52975 100644
--- a/src/driver/mod.rs
+++ b/src/driver/mod.rs
@@ -17,7 +17,7 @@ fn predefine_mono_items<'tcx>(
     module: &mut dyn Module,
     mono_items: &[(MonoItem<'tcx>, (RLinkage, Visibility))],
 ) {
-    tcx.sess.time("predefine functions", || {
+    tcx.prof.generic_activity("predefine functions").run(|| {
         let is_compiler_builtins = tcx.is_compiler_builtins(LOCAL_CRATE);
         for &(mono_item, (linkage, visibility)) in mono_items {
             match mono_item {
@@ -39,16 +39,3 @@ fn predefine_mono_items<'tcx>(
         }
     });
 }
-
-fn time<R>(tcx: TyCtxt<'_>, display: bool, name: &'static str, f: impl FnOnce() -> R) -> R {
-    if display {
-        println!("[{:<30}: {}] start", tcx.crate_name(LOCAL_CRATE), name);
-        let before = std::time::Instant::now();
-        let res = tcx.sess.time(name, f);
-        let after = std::time::Instant::now();
-        println!("[{:<30}: {}] end time: {:?}", tcx.crate_name(LOCAL_CRATE), name, after - before);
-        res
-    } else {
-        tcx.sess.time(name, f)
-    }
-}
diff --git a/src/global_asm.rs b/src/global_asm.rs
index dcbcaba30fe..46c78ce6a1e 100644
--- a/src/global_asm.rs
+++ b/src/global_asm.rs
@@ -7,7 +7,7 @@ use std::process::{Command, Stdio};
 use std::sync::Arc;
 
 use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
-use rustc_hir::ItemId;
+use rustc_hir::{InlineAsmOperand, ItemId};
 use rustc_session::config::{OutputFilenames, OutputType};
 
 use crate::prelude::*;
@@ -23,7 +23,46 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String,
         for piece in asm.template {
             match *piece {
                 InlineAsmTemplatePiece::String(ref s) => global_asm.push_str(s),
-                InlineAsmTemplatePiece::Placeholder { .. } => todo!(),
+                InlineAsmTemplatePiece::Placeholder { operand_idx, modifier: _, span: op_sp } => {
+                    match asm.operands[operand_idx].0 {
+                        InlineAsmOperand::Const { ref anon_const } => {
+                            let const_value =
+                                tcx.const_eval_poly(anon_const.def_id.to_def_id()).unwrap_or_else(
+                                    |_| span_bug!(op_sp, "asm const cannot be resolved"),
+                                );
+                            let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id);
+                            let string = rustc_codegen_ssa::common::asm_const_to_str(
+                                tcx,
+                                op_sp,
+                                const_value,
+                                RevealAllLayoutCx(tcx).layout_of(ty),
+                            );
+                            global_asm.push_str(&string);
+                        }
+                        InlineAsmOperand::SymFn { anon_const } => {
+                            let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id);
+                            let instance = match ty.kind() {
+                                &ty::FnDef(def_id, substs) => Instance::new(def_id, substs),
+                                _ => span_bug!(op_sp, "asm sym is not a function"),
+                            };
+                            let symbol = tcx.symbol_name(instance);
+                            // FIXME handle the case where the function was made private to the
+                            // current codegen unit
+                            global_asm.push_str(symbol.name);
+                        }
+                        InlineAsmOperand::SymStatic { path: _, def_id } => {
+                            let instance = Instance::mono(tcx, def_id).polymorphize(tcx);
+                            let symbol = tcx.symbol_name(instance);
+                            global_asm.push_str(symbol.name);
+                        }
+                        InlineAsmOperand::In { .. }
+                        | InlineAsmOperand::Out { .. }
+                        | InlineAsmOperand::InOut { .. }
+                        | InlineAsmOperand::SplitInOut { .. } => {
+                            span_bug!(op_sp, "invalid operand type for global_asm!")
+                        }
+                    }
+                }
             }
         }
         global_asm.push_str("\n.att_syntax\n\n");
diff --git a/src/inline_asm.rs b/src/inline_asm.rs
index 3fcc84d3929..6206fbf7dd5 100644
--- a/src/inline_asm.rs
+++ b/src/inline_asm.rs
@@ -9,9 +9,33 @@ use rustc_middle::mir::InlineAsmOperand;
 use rustc_span::sym;
 use rustc_target::asm::*;
 
+enum CInlineAsmOperand<'tcx> {
+    In {
+        reg: InlineAsmRegOrRegClass,
+        value: CValue<'tcx>,
+    },
+    Out {
+        reg: InlineAsmRegOrRegClass,
+        late: bool,
+        place: Option<CPlace<'tcx>>,
+    },
+    InOut {
+        reg: InlineAsmRegOrRegClass,
+        _late: bool,
+        in_value: CValue<'tcx>,
+        out_place: Option<CPlace<'tcx>>,
+    },
+    Const {
+        value: String,
+    },
+    Symbol {
+        symbol: String,
+    },
+}
+
 pub(crate) fn codegen_inline_asm<'tcx>(
     fx: &mut FunctionCx<'_, '_, 'tcx>,
-    _span: Span,
+    span: Span,
     template: &[InlineAsmTemplatePiece],
     operands: &[InlineAsmOperand<'tcx>],
     options: InlineAsmOptions,
@@ -198,6 +222,81 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         }
     }
 
+    let operands = operands
+        .into_iter()
+        .map(|operand| match *operand {
+            InlineAsmOperand::In { reg, ref value } => {
+                CInlineAsmOperand::In { reg, value: crate::base::codegen_operand(fx, value) }
+            }
+            InlineAsmOperand::Out { reg, late, ref place } => CInlineAsmOperand::Out {
+                reg,
+                late,
+                place: place.map(|place| crate::base::codegen_place(fx, place)),
+            },
+            InlineAsmOperand::InOut { reg, late, ref in_value, ref out_place } => {
+                CInlineAsmOperand::InOut {
+                    reg,
+                    _late: late,
+                    in_value: crate::base::codegen_operand(fx, in_value),
+                    out_place: out_place.map(|place| crate::base::codegen_place(fx, place)),
+                }
+            }
+            InlineAsmOperand::Const { ref value } => {
+                let (const_value, ty) = crate::constant::eval_mir_constant(fx, &*value)
+                    .unwrap_or_else(|| span_bug!(span, "asm const cannot be resolved"));
+                let value = rustc_codegen_ssa::common::asm_const_to_str(
+                    fx.tcx,
+                    span,
+                    const_value,
+                    fx.layout_of(ty),
+                );
+                CInlineAsmOperand::Const { value }
+            }
+            InlineAsmOperand::SymFn { ref value } => {
+                let literal = fx.monomorphize(value.literal);
+                if let ty::FnDef(def_id, substs) = *literal.ty().kind() {
+                    let instance = ty::Instance::resolve_for_fn_ptr(
+                        fx.tcx,
+                        ty::ParamEnv::reveal_all(),
+                        def_id,
+                        substs,
+                    )
+                    .unwrap();
+                    let symbol = fx.tcx.symbol_name(instance);
+
+                    // Pass a wrapper rather than the function itself as the function itself may not
+                    // be exported from the main codegen unit and may thus be unreachable from the
+                    // object file created by an external assembler.
+                    let inline_asm_index = fx.cx.inline_asm_index.get();
+                    fx.cx.inline_asm_index.set(inline_asm_index + 1);
+                    let wrapper_name = format!(
+                        "__inline_asm_{}_wrapper_n{}",
+                        fx.cx.cgu_name.as_str().replace('.', "__").replace('-', "_"),
+                        inline_asm_index
+                    );
+                    let sig =
+                        get_function_sig(fx.tcx, fx.target_config.default_call_conv, instance);
+                    create_wrapper_function(
+                        fx.module,
+                        &mut fx.cx.unwind_context,
+                        sig,
+                        &wrapper_name,
+                        symbol.name,
+                    );
+
+                    CInlineAsmOperand::Symbol { symbol: wrapper_name }
+                } else {
+                    span_bug!(span, "invalid type for asm sym (fn)");
+                }
+            }
+            InlineAsmOperand::SymStatic { def_id } => {
+                assert!(fx.tcx.is_static(def_id));
+                let instance = Instance::mono(fx.tcx, def_id).polymorphize(fx.tcx);
+                CInlineAsmOperand::Symbol { symbol: fx.tcx.symbol_name(instance).name.to_owned() }
+            }
+        })
+        .collect::<Vec<_>>();
+
     let mut inputs = Vec::new();
     let mut outputs = Vec::new();
 
@@ -206,7 +305,7 @@ pub(crate) fn codegen_inline_asm<'tcx>(
         arch: fx.tcx.sess.asm_arch.unwrap(),
         enclosing_def_id: fx.instance.def_id(),
         template,
-        operands,
+        operands: &operands,
         options,
         registers: Vec::new(),
         stack_slots_clobber: Vec::new(),
@@ -229,36 +328,22 @@ pub(crate) fn codegen_inline_asm<'tcx>(
     fx.cx.global_asm.push_str(&generated_asm);
 
     for (i, operand) in operands.iter().enumerate() {
-        match *operand {
-            InlineAsmOperand::In { reg: _, ref value } => {
-                inputs.push((
-                    asm_gen.stack_slots_input[i].unwrap(),
-                    crate::base::codegen_operand(fx, value).load_scalar(fx),
-                ));
+        match operand {
+            CInlineAsmOperand::In { reg: _, value } => {
+                inputs.push((asm_gen.stack_slots_input[i].unwrap(), value.load_scalar(fx)));
             }
-            InlineAsmOperand::Out { reg: _, late: _, place } => {
+            CInlineAsmOperand::Out { reg: _, late: _, place } => {
                 if let Some(place) = place {
-                    outputs.push((
-                        asm_gen.stack_slots_output[i].unwrap(),
-                        crate::base::codegen_place(fx, place),
-                    ));
+                    outputs.push((asm_gen.stack_slots_output[i].unwrap(), place.clone()));
                 }
             }
-            InlineAsmOperand::InOut { reg: _, late: _, ref in_value, out_place } => {
-                inputs.push((
-                    asm_gen.stack_slots_input[i].unwrap(),
-                    crate::base::codegen_operand(fx, in_value).load_scalar(fx),
-                ));
+            CInlineAsmOperand::InOut { reg: _, _late: _, in_value, out_place } => {
+                inputs.push((asm_gen.stack_slots_input[i].unwrap(), in_value.load_scalar(fx)));
                 if let Some(out_place) = out_place {
-                    outputs.push((
-                        asm_gen.stack_slots_output[i].unwrap(),
-                        crate::base::codegen_place(fx, out_place),
-                    ));
+                    outputs.push((asm_gen.stack_slots_output[i].unwrap(), out_place.clone()));
                 }
             }
-            InlineAsmOperand::Const { value: _ } => todo!(),
-            InlineAsmOperand::SymFn { value: _ } => todo!(),
-            InlineAsmOperand::SymStatic { def_id: _ } => todo!(),
+            CInlineAsmOperand::Const { value: _ } | CInlineAsmOperand::Symbol { symbol: _ } => {}
         }
     }
 
@@ -280,7 +365,7 @@ struct InlineAssemblyGenerator<'a, 'tcx> {
     arch: InlineAsmArch,
     enclosing_def_id: DefId,
     template: &'a [InlineAsmTemplatePiece],
-    operands: &'a [InlineAsmOperand<'tcx>],
+    operands: &'a [CInlineAsmOperand<'tcx>],
     options: InlineAsmOptions,
     registers: Vec<Option<InlineAsmReg>>,
     stack_slots_clobber: Vec<Option<Size>>,
@@ -304,18 +389,20 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Add explicit registers to the allocated set.
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                CInlineAsmOperand::In { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
                     regs[i] = Some(reg);
                     allocated.entry(reg).or_default().0 = true;
                 }
-                InlineAsmOperand::Out {
-                    reg: InlineAsmRegOrRegClass::Reg(reg), late: true, ..
+                CInlineAsmOperand::Out {
+                    reg: InlineAsmRegOrRegClass::Reg(reg),
+                    late: true,
+                    ..
                 } => {
                     regs[i] = Some(reg);
                     allocated.entry(reg).or_default().1 = true;
                 }
-                InlineAsmOperand::Out { reg: InlineAsmRegOrRegClass::Reg(reg), .. }
-                | InlineAsmOperand::InOut { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
+                CInlineAsmOperand::Out { reg: InlineAsmRegOrRegClass::Reg(reg), .. }
+                | CInlineAsmOperand::InOut { reg: InlineAsmRegOrRegClass::Reg(reg), .. } => {
                     regs[i] = Some(reg);
                     allocated.insert(reg, (true, true));
                 }
@@ -326,12 +413,12 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Allocate out/inout/inlateout registers first because they are more constrained.
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::Out {
+                CInlineAsmOperand::Out {
                     reg: InlineAsmRegOrRegClass::RegClass(class),
                     late: false,
                     ..
                 }
-                | InlineAsmOperand::InOut {
+                | CInlineAsmOperand::InOut {
                     reg: InlineAsmRegOrRegClass::RegClass(class), ..
                 } => {
                     let mut alloc_reg = None;
@@ -360,7 +447,7 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Allocate in/lateout.
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::In { reg: InlineAsmRegOrRegClass::RegClass(class), .. } => {
+                CInlineAsmOperand::In { reg: InlineAsmRegOrRegClass::RegClass(class), .. } => {
                     let mut alloc_reg = None;
                     for &reg in &map[&class] {
                         let mut used = false;
@@ -380,7 +467,7 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
                     regs[i] = Some(reg);
                     allocated.entry(reg).or_default().0 = true;
                 }
-                InlineAsmOperand::Out {
+                CInlineAsmOperand::Out {
                     reg: InlineAsmRegOrRegClass::RegClass(class),
                     late: true,
                     ..
@@ -455,7 +542,7 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Allocate stack slots for inout
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::InOut { reg, out_place: Some(_), .. } => {
+                CInlineAsmOperand::InOut { reg, out_place: Some(_), .. } => {
                     let slot = new_slot(reg.reg_class());
                     slots_input[i] = Some(slot);
                     slots_output[i] = Some(slot);
@@ -470,8 +557,8 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Allocate stack slots for input
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::In { reg, .. }
-                | InlineAsmOperand::InOut { reg, out_place: None, .. } => {
+                CInlineAsmOperand::In { reg, .. }
+                | CInlineAsmOperand::InOut { reg, out_place: None, .. } => {
                     slots_input[i] = Some(new_slot(reg.reg_class()));
                 }
                 _ => (),
@@ -487,7 +574,7 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
         // Allocate stack slots for output
         for (i, operand) in self.operands.iter().enumerate() {
             match *operand {
-                InlineAsmOperand::Out { reg, place: Some(_), .. } => {
+                CInlineAsmOperand::Out { reg, place: Some(_), .. } => {
                     slots_output[i] = Some(new_slot(reg.reg_class()));
                 }
                 _ => (),
@@ -549,13 +636,23 @@ impl<'tcx> InlineAssemblyGenerator<'_, 'tcx> {
                     generated_asm.push_str(s);
                 }
                 InlineAsmTemplatePiece::Placeholder { operand_idx, modifier, span: _ } => {
-                    if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
-                        generated_asm.push('%');
+                    match self.operands[*operand_idx] {
+                        CInlineAsmOperand::In { .. }
+                        | CInlineAsmOperand::Out { .. }
+                        | CInlineAsmOperand::InOut { .. } => {
+                            if self.options.contains(InlineAsmOptions::ATT_SYNTAX) {
+                                generated_asm.push('%');
+                            }
+                            self.registers[*operand_idx]
+                                .unwrap()
+                                .emit(&mut generated_asm, self.arch, *modifier)
+                                .unwrap();
+                        }
+                        CInlineAsmOperand::Const { ref value } => {
+                            generated_asm.push_str(value);
+                        }
+                        CInlineAsmOperand::Symbol { ref symbol } => generated_asm.push_str(symbol),
                     }
-                    self.registers[*operand_idx]
-                        .unwrap()
-                        .emit(&mut generated_asm, self.arch, *modifier)
-                        .unwrap();
                 }
             }
         }
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index d561cf139b6..892e7c30e2f 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -218,22 +218,6 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
     let intrinsic = fx.tcx.item_name(instance.def_id());
     let substs = instance.substs;
 
-    let target = if let Some(target) = target {
-        target
-    } else {
-        // Insert non returning intrinsics here
-        match intrinsic {
-            sym::abort => {
-                fx.bcx.ins().trap(TrapCode::User(0));
-            }
-            sym::transmute => {
-                crate::base::codegen_panic(fx, "Transmuting to uninhabited type.", source_info);
-            }
-            _ => unimplemented!("unsupported intrinsic {}", intrinsic),
-        }
-        return;
-    };
-
     if intrinsic.as_str().starts_with("simd_") {
         self::simd::codegen_simd_intrinsic_call(
             fx,
@@ -241,11 +225,11 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             substs,
             args,
             destination,
-            target,
+            target.expect("target for simd intrinsic"),
             source_info.span,
         );
     } else if codegen_float_intrinsic_call(fx, intrinsic, args, destination) {
-        let ret_block = fx.get_block(target);
+        let ret_block = fx.get_block(target.expect("target for float intrinsic"));
         fx.bcx.ins().jump(ret_block, &[]);
     } else {
         codegen_regular_intrinsic_call(
@@ -255,7 +239,7 @@ pub(crate) fn codegen_intrinsic_call<'tcx>(
             substs,
             args,
             destination,
-            Some(target),
+            target,
             source_info,
         );
     }
@@ -382,6 +366,10 @@ fn codegen_regular_intrinsic_call<'tcx>(
     let usize_layout = fx.layout_of(fx.tcx.types.usize);
 
     match intrinsic {
+        sym::abort => {
+            fx.bcx.ins().trap(TrapCode::User(0));
+            return;
+        }
         sym::likely | sym::unlikely => {
             intrinsic_args!(fx, args => (a); intrinsic);
 
@@ -579,6 +567,11 @@ fn codegen_regular_intrinsic_call<'tcx>(
         sym::transmute => {
             intrinsic_args!(fx, args => (from); intrinsic);
 
+            if ret.layout().abi.is_uninhabited() {
+                crate::base::codegen_panic(fx, "Transmuting to uninhabited type.", source_info);
+                return;
+            }
+
             ret.write_cvalue_transmute(fx, from);
         }
         sym::write_bytes | sym::volatile_set_memory => {
diff --git a/src/value_and_place.rs b/src/value_and_place.rs
index fa06d6c3ba7..320eecaee00 100644
--- a/src/value_and_place.rs
+++ b/src/value_and_place.rs
@@ -588,10 +588,13 @@ impl<'tcx> CPlace<'tcx> {
                 return;
             }
             CPlaceInner::VarPair(_local, var1, var2) => {
-                let (ptr, meta) = from.force_stack(fx);
-                assert!(meta.is_none());
-                let (data1, data2) =
-                    CValue(CValueInner::ByRef(ptr, None), dst_layout).load_scalar_pair(fx);
+                let (data1, data2) = if from.layout().ty == dst_layout.ty {
+                    CValue(from.0, dst_layout).load_scalar_pair(fx)
+                } else {
+                    let (ptr, meta) = from.force_stack(fx);
+                    assert!(meta.is_none());
+                    CValue(CValueInner::ByRef(ptr, None), dst_layout).load_scalar_pair(fx)
+                };
                 let (dst_ty1, dst_ty2) = fx.clif_pair_type(self.layout().ty).unwrap();
                 transmute_value(fx, var1, data1, dst_ty1);
                 transmute_value(fx, var2, data2, dst_ty2);