diff --git a/Cargo.toml b/Cargo.toml
index f2ce714e8ff..f422d53380f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -35,9 +35,9 @@ smallvec = "1.8.1"
 
 [features]
 # Enable features not ready to be enabled when compiling as part of rustc
-unstable-features = ["jit", "inline_asm"]
+unstable-features = ["jit", "inline_asm_sym"]
 jit = ["cranelift-jit", "libloading"]
-inline_asm = []
+inline_asm_sym = []
 
 [package.metadata.rust-analyzer]
 rustc_private = true
diff --git a/scripts/setup_rust_fork.sh b/scripts/setup_rust_fork.sh
index 3e48fb006de..bbb8a010d96 100644
--- a/scripts/setup_rust_fork.sh
+++ b/scripts/setup_rust_fork.sh
@@ -4,7 +4,9 @@ set -e
 # Compiletest expects all standard library paths to start with /rustc/FAKE_PREFIX.
 # CG_CLIF_STDLIB_REMAP_PATH_PREFIX will cause cg_clif's build system to pass
 # --remap-path-prefix to handle this.
-CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build
+# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we
+# the LLVM backend isn't compiled in here.
+CG_CLIF_FORCE_GNU_AS=1 CG_CLIF_STDLIB_REMAP_PATH_PREFIX=/rustc/FAKE_PREFIX ./y.sh build
 
 echo "[SETUP] Rust fork"
 git clone https://github.com/rust-lang/rust.git || true
diff --git a/scripts/test_bootstrap.sh b/scripts/test_bootstrap.sh
index 791d457993d..a8f6d7a2024 100755
--- a/scripts/test_bootstrap.sh
+++ b/scripts/test_bootstrap.sh
@@ -11,5 +11,7 @@ rm -r compiler/rustc_codegen_cranelift/{Cargo.*,src}
 cp ../Cargo.* compiler/rustc_codegen_cranelift/
 cp -r ../src compiler/rustc_codegen_cranelift/src
 
-./x.py build --stage 1 library/std
+# CG_CLIF_FORCE_GNU_AS will force usage of as instead of the LLVM backend of rustc as we
+# the LLVM backend isn't compiled in here.
+CG_CLIF_FORCE_GNU_AS=1 ./x.py build --stage 1 library/std
 popd
diff --git a/src/global_asm.rs b/src/global_asm.rs
index 6692d1b85ea..b14007f4e52 100644
--- a/src/global_asm.rs
+++ b/src/global_asm.rs
@@ -46,6 +46,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String,
                             global_asm.push_str(&string);
                         }
                         InlineAsmOperand::SymFn { anon_const } => {
+                            if cfg!(not(feature = "inline_asm_sym")) {
+                                tcx.sess.span_err(
+                                    item.span,
+                                    "asm! and global_asm! sym operands are not yet supported",
+                                );
+                            }
+
                             let ty = tcx.typeck_body(anon_const.body).node_type(anon_const.hir_id);
                             let instance = match ty.kind() {
                                 &ty::FnDef(def_id, args) => Instance::new(def_id, args),
@@ -57,6 +64,13 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String,
                             global_asm.push_str(symbol.name);
                         }
                         InlineAsmOperand::SymStatic { path: _, def_id } => {
+                            if cfg!(not(feature = "inline_asm_sym")) {
+                                tcx.sess.span_err(
+                                    item.span,
+                                    "asm! and global_asm! sym operands are not yet supported",
+                                );
+                            }
+
                             let instance = Instance::mono(tcx, def_id).polymorphize(tcx);
                             let symbol = tcx.symbol_name(instance);
                             global_asm.push_str(symbol.name);
@@ -81,22 +95,23 @@ pub(crate) fn codegen_global_asm_item(tcx: TyCtxt<'_>, global_asm: &mut String,
     }
 }
 
-pub(crate) fn asm_supported(tcx: TyCtxt<'_>) -> bool {
-    cfg!(feature = "inline_asm") && !tcx.sess.target.is_like_windows
-}
-
 #[derive(Debug)]
 pub(crate) struct GlobalAsmConfig {
-    asm_enabled: bool,
     assembler: PathBuf,
+    target: String,
     pub(crate) output_filenames: Arc<OutputFilenames>,
 }
 
 impl GlobalAsmConfig {
     pub(crate) fn new(tcx: TyCtxt<'_>) -> Self {
         GlobalAsmConfig {
-            asm_enabled: asm_supported(tcx),
             assembler: crate::toolchain::get_toolchain_binary(tcx.sess, "as"),
+            target: match &tcx.sess.opts.target_triple {
+                rustc_target::spec::TargetTriple::TargetTriple(triple) => triple.clone(),
+                rustc_target::spec::TargetTriple::TargetJson { path_for_rustdoc, .. } => {
+                    path_for_rustdoc.to_str().unwrap().to_owned()
+                }
+            },
             output_filenames: tcx.output_filenames(()).clone(),
         }
     }
@@ -111,21 +126,6 @@ pub(crate) fn compile_global_asm(
         return Ok(None);
     }
 
-    if !config.asm_enabled {
-        if global_asm.contains("__rust_probestack") {
-            return Ok(None);
-        }
-
-        if cfg!(not(feature = "inline_asm")) {
-            return Err(
-                "asm! and global_asm! support is disabled while compiling rustc_codegen_cranelift"
-                    .to_owned(),
-            );
-        } else {
-            return Err("asm! and global_asm! are not yet supported on Windows".to_owned());
-        }
-    }
-
     // Remove all LLVM style comments
     let mut global_asm = global_asm
         .lines()
@@ -134,20 +134,67 @@ pub(crate) fn compile_global_asm(
         .join("\n");
     global_asm.push('\n');
 
-    let output_object_file = config.output_filenames.temp_path(OutputType::Object, Some(cgu_name));
+    let global_asm_object_file = add_file_stem_postfix(
+        config.output_filenames.temp_path(OutputType::Object, Some(cgu_name)),
+        ".asm",
+    );
 
     // Assemble `global_asm`
-    let global_asm_object_file = add_file_stem_postfix(output_object_file, ".asm");
-    let mut child = Command::new(&config.assembler)
-        .arg("-o")
-        .arg(&global_asm_object_file)
-        .stdin(Stdio::piped())
-        .spawn()
-        .expect("Failed to spawn `as`.");
-    child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap();
-    let status = child.wait().expect("Failed to wait for `as`.");
-    if !status.success() {
-        return Err(format!("Failed to assemble `{}`", global_asm));
+    if option_env!("CG_CLIF_FORCE_GNU_AS").is_some() {
+        let mut child = Command::new(&config.assembler)
+            .arg("-o")
+            .arg(&global_asm_object_file)
+            .stdin(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn `as`.");
+        child.stdin.take().unwrap().write_all(global_asm.as_bytes()).unwrap();
+        let status = child.wait().expect("Failed to wait for `as`.");
+        if !status.success() {
+            return Err(format!("Failed to assemble `{}`", global_asm));
+        }
+    } else {
+        let mut child = Command::new(std::env::current_exe().unwrap())
+            .arg("--target")
+            .arg(&config.target)
+            .arg("--crate-type")
+            .arg("staticlib")
+            .arg("--emit")
+            .arg("obj")
+            .arg("-o")
+            .arg(&global_asm_object_file)
+            .arg("-")
+            .arg("-Abad_asm_style")
+            .arg("-Zcodegen-backend=llvm")
+            .stdin(Stdio::piped())
+            .spawn()
+            .expect("Failed to spawn `as`.");
+        let mut stdin = child.stdin.take().unwrap();
+        stdin
+            .write_all(
+                br####"
+                #![feature(decl_macro, no_core, rustc_attrs)]
+                #![allow(internal_features)]
+                #![no_core]
+                #[rustc_builtin_macro]
+                #[rustc_macro_transparency = "semitransparent"]
+                macro global_asm() { /* compiler built-in */ }
+                global_asm!(r###"
+                "####,
+            )
+            .unwrap();
+        stdin.write_all(global_asm.as_bytes()).unwrap();
+        stdin
+            .write_all(
+                br####"
+                "###);
+                "####,
+            )
+            .unwrap();
+        std::mem::drop(stdin);
+        let status = child.wait().expect("Failed to wait for `as`.");
+        if !status.success() {
+            return Err(format!("Failed to assemble `{}`", global_asm));
+        }
     }
 
     Ok(Some(global_asm_object_file))
diff --git a/src/inline_asm.rs b/src/inline_asm.rs
index 0517c609337..331649b2ec2 100644
--- a/src/inline_asm.rs
+++ b/src/inline_asm.rs
@@ -8,7 +8,6 @@ use rustc_span::sym;
 use rustc_target::asm::*;
 use target_lexicon::BinaryFormat;
 
-use crate::global_asm::asm_supported;
 use crate::prelude::*;
 
 enum CInlineAsmOperand<'tcx> {
@@ -45,208 +44,11 @@ pub(crate) fn codegen_inline_asm<'tcx>(
 ) {
     // FIXME add .eh_frame unwind info directives
 
-    if !asm_supported(fx.tcx) {
-        if template.is_empty() {
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        }
-
-        // Used by panic_abort
-        if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) {
-            fx.bcx.ins().trap(TrapCode::User(1));
-            return;
-        }
-
-        // Used by stdarch
-        if template[0] == InlineAsmTemplatePiece::String("mov ".to_string())
-            && matches!(
-                template[1],
-                InlineAsmTemplatePiece::Placeholder {
-                    operand_idx: 0,
-                    modifier: Some('r'),
-                    span: _
-                }
-            )
-            && template[2] == InlineAsmTemplatePiece::String(", rbx".to_string())
-            && template[3] == InlineAsmTemplatePiece::String("\n".to_string())
-            && template[4] == InlineAsmTemplatePiece::String("cpuid".to_string())
-            && template[5] == InlineAsmTemplatePiece::String("\n".to_string())
-            && template[6] == InlineAsmTemplatePiece::String("xchg ".to_string())
-            && matches!(
-                template[7],
-                InlineAsmTemplatePiece::Placeholder {
-                    operand_idx: 0,
-                    modifier: Some('r'),
-                    span: _
-                }
-            )
-            && template[8] == InlineAsmTemplatePiece::String(", rbx".to_string())
-        {
-            assert_eq!(operands.len(), 4);
-            let (leaf, eax_place) = match operands[1] {
-                InlineAsmOperand::InOut {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
-                    late: _,
-                    ref in_value,
-                    out_place: Some(out_place),
-                } => (
-                    crate::base::codegen_operand(fx, in_value).load_scalar(fx),
-                    crate::base::codegen_place(fx, out_place),
-                ),
-                _ => unreachable!(),
-            };
-            let ebx_place = match operands[0] {
-                InlineAsmOperand::Out {
-                    reg:
-                        InlineAsmRegOrRegClass::RegClass(InlineAsmRegClass::X86(
-                            X86InlineAsmRegClass::reg,
-                        )),
-                    late: _,
-                    place: Some(place),
-                } => crate::base::codegen_place(fx, place),
-                _ => unreachable!(),
-            };
-            let (sub_leaf, ecx_place) = match operands[2] {
-                InlineAsmOperand::InOut {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
-                    late: _,
-                    ref in_value,
-                    out_place: Some(out_place),
-                } => (
-                    crate::base::codegen_operand(fx, in_value).load_scalar(fx),
-                    crate::base::codegen_place(fx, out_place),
-                ),
-                _ => unreachable!(),
-            };
-            let edx_place = match operands[3] {
-                InlineAsmOperand::Out {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
-                    late: _,
-                    place: Some(place),
-                } => crate::base::codegen_place(fx, place),
-                _ => unreachable!(),
-            };
-
-            let (eax, ebx, ecx, edx) = crate::intrinsics::codegen_cpuid_call(fx, leaf, sub_leaf);
-
-            eax_place.write_cvalue(fx, CValue::by_val(eax, fx.layout_of(fx.tcx.types.u32)));
-            ebx_place.write_cvalue(fx, CValue::by_val(ebx, fx.layout_of(fx.tcx.types.u32)));
-            ecx_place.write_cvalue(fx, CValue::by_val(ecx, fx.layout_of(fx.tcx.types.u32)));
-            edx_place.write_cvalue(fx, CValue::by_val(edx, fx.layout_of(fx.tcx.types.u32)));
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        }
-
-        // Used by compiler-builtins
-        if fx.tcx.symbol_name(fx.instance).name.starts_with("___chkstk") {
-            // ___chkstk, ___chkstk_ms and __alloca are only used on Windows
-            crate::trap::trap_unimplemented(fx, "Stack probes are not supported");
-            return;
-        } else if fx.tcx.symbol_name(fx.instance).name == "__alloca" {
-            crate::trap::trap_unimplemented(fx, "Alloca is not supported");
-            return;
-        }
-
-        // Used by core::hint::spin_loop()
-        if template[0]
-            == InlineAsmTemplatePiece::String(".insn i 0x0F, 0, x0, x0, 0x010".to_string())
-            && template.len() == 1
-        {
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        }
-
-        // Used by measureme
-        if template[0] == InlineAsmTemplatePiece::String("xor %eax, %eax".to_string())
-            && template[1] == InlineAsmTemplatePiece::String("\n".to_string())
-            && template[2] == InlineAsmTemplatePiece::String("mov %rbx, ".to_string())
-            && matches!(
-                template[3],
-                InlineAsmTemplatePiece::Placeholder {
-                    operand_idx: 0,
-                    modifier: Some('r'),
-                    span: _
-                }
-            )
-            && template[4] == InlineAsmTemplatePiece::String("\n".to_string())
-            && template[5] == InlineAsmTemplatePiece::String("cpuid".to_string())
-            && template[6] == InlineAsmTemplatePiece::String("\n".to_string())
-            && template[7] == InlineAsmTemplatePiece::String("mov ".to_string())
-            && matches!(
-                template[8],
-                InlineAsmTemplatePiece::Placeholder {
-                    operand_idx: 0,
-                    modifier: Some('r'),
-                    span: _
-                }
-            )
-            && template[9] == InlineAsmTemplatePiece::String(", %rbx".to_string())
-        {
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        } else if template[0] == InlineAsmTemplatePiece::String("rdpmc".to_string()) {
-            // Return zero dummy values for all performance counters
-            match operands[0] {
-                InlineAsmOperand::In {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::cx)),
-                    value: _,
-                } => {}
-                _ => unreachable!(),
-            };
-            let lo = match operands[1] {
-                InlineAsmOperand::Out {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::ax)),
-                    late: true,
-                    place: Some(place),
-                } => crate::base::codegen_place(fx, place),
-                _ => unreachable!(),
-            };
-            let hi = match operands[2] {
-                InlineAsmOperand::Out {
-                    reg: InlineAsmRegOrRegClass::Reg(InlineAsmReg::X86(X86InlineAsmReg::dx)),
-                    late: true,
-                    place: Some(place),
-                } => crate::base::codegen_place(fx, place),
-                _ => unreachable!(),
-            };
-
-            let u32_layout = fx.layout_of(fx.tcx.types.u32);
-            let zero = fx.bcx.ins().iconst(types::I32, 0);
-            lo.write_cvalue(fx, CValue::by_val(zero, u32_layout));
-            hi.write_cvalue(fx, CValue::by_val(zero, u32_layout));
-
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        } else if template[0] == InlineAsmTemplatePiece::String("lock xadd ".to_string())
-            && matches!(
-                template[1],
-                InlineAsmTemplatePiece::Placeholder { operand_idx: 1, modifier: None, span: _ }
-            )
-            && template[2] == InlineAsmTemplatePiece::String(", (".to_string())
-            && matches!(
-                template[3],
-                InlineAsmTemplatePiece::Placeholder { operand_idx: 0, modifier: None, span: _ }
-            )
-            && template[4] == InlineAsmTemplatePiece::String(")".to_string())
-        {
-            let destination_block = fx.get_block(destination.unwrap());
-            fx.bcx.ins().jump(destination_block, &[]);
-            return;
-        }
-
-        if cfg!(not(feature = "inline_asm")) {
-            fx.tcx.sess.span_err(
-                span,
-                "asm! and global_asm! support is disabled while compiling rustc_codegen_cranelift",
-            );
-        } else {
-            fx.tcx.sess.span_err(span, "asm! and global_asm! are not yet supported on Windows");
-        }
+    // Used by panic_abort on Windows, but uses a syntax which only happens to work with
+    // asm!() by accident and breaks with the GNU assembler as well as global_asm!() for
+    // the LLVM backend.
+    if template[0] == InlineAsmTemplatePiece::String("int $$0x29".to_string()) {
+        fx.bcx.ins().trap(TrapCode::User(1));
         return;
     }
 
@@ -280,6 +82,12 @@ pub(crate) fn codegen_inline_asm<'tcx>(
                 CInlineAsmOperand::Const { value }
             }
             InlineAsmOperand::SymFn { ref value } => {
+                if cfg!(not(feature = "inline_asm_sym")) {
+                    fx.tcx
+                        .sess
+                        .span_err(span, "asm! and global_asm! sym operands are not yet supported");
+                }
+
                 let const_ = fx.monomorphize(value.const_);
                 if let ty::FnDef(def_id, args) = *const_.ty().kind() {
                     let instance = ty::Instance::resolve_for_fn_ptr(
diff --git a/src/intrinsics/cpuid.rs b/src/intrinsics/cpuid.rs
deleted file mode 100644
index 5120b89c4e8..00000000000
--- a/src/intrinsics/cpuid.rs
+++ /dev/null
@@ -1,74 +0,0 @@
-//! Emulation of a subset of the cpuid x86 instruction.
-
-use crate::prelude::*;
-
-/// Emulates a subset of the cpuid x86 instruction.
-///
-/// This emulates an intel cpu with sse and sse2 support, but which doesn't support anything else.
-pub(crate) fn codegen_cpuid_call<'tcx>(
-    fx: &mut FunctionCx<'_, '_, 'tcx>,
-    leaf: Value,
-    _sub_leaf: Value,
-) -> (Value, Value, Value, Value) {
-    let leaf_0 = fx.bcx.create_block();
-    let leaf_1 = fx.bcx.create_block();
-    let leaf_7 = fx.bcx.create_block();
-    let leaf_8000_0000 = fx.bcx.create_block();
-    let leaf_8000_0001 = fx.bcx.create_block();
-    let unsupported_leaf = fx.bcx.create_block();
-
-    let dest = fx.bcx.create_block();
-    let eax = fx.bcx.append_block_param(dest, types::I32);
-    let ebx = fx.bcx.append_block_param(dest, types::I32);
-    let ecx = fx.bcx.append_block_param(dest, types::I32);
-    let edx = fx.bcx.append_block_param(dest, types::I32);
-
-    let mut switch = cranelift_frontend::Switch::new();
-    switch.set_entry(0, leaf_0);
-    switch.set_entry(1, leaf_1);
-    switch.set_entry(7, leaf_7);
-    switch.set_entry(0x8000_0000, leaf_8000_0000);
-    switch.set_entry(0x8000_0001, leaf_8000_0001);
-    switch.emit(&mut fx.bcx, leaf, unsupported_leaf);
-
-    fx.bcx.switch_to_block(leaf_0);
-    let max_basic_leaf = fx.bcx.ins().iconst(types::I32, 1);
-    let vend0 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"Genu")));
-    let vend2 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ineI")));
-    let vend1 = fx.bcx.ins().iconst(types::I32, i64::from(u32::from_le_bytes(*b"ntel")));
-    fx.bcx.ins().jump(dest, &[max_basic_leaf, vend0, vend1, vend2]);
-
-    fx.bcx.switch_to_block(leaf_1);
-    let cpu_signature = fx.bcx.ins().iconst(types::I32, 0);
-    let additional_information = fx.bcx.ins().iconst(types::I32, 0);
-    let ecx_features = fx.bcx.ins().iconst(types::I32, 0);
-    let edx_features = fx.bcx.ins().iconst(types::I32, 1 << 25 /* sse */ | 1 << 26 /* sse2 */);
-    fx.bcx.ins().jump(dest, &[cpu_signature, additional_information, ecx_features, edx_features]);
-
-    fx.bcx.switch_to_block(leaf_7);
-    // This leaf technically has subleaves, but we just return zero for all subleaves.
-    let zero = fx.bcx.ins().iconst(types::I32, 0);
-    fx.bcx.ins().jump(dest, &[zero, zero, zero, zero]);
-
-    fx.bcx.switch_to_block(leaf_8000_0000);
-    let extended_max_basic_leaf = fx.bcx.ins().iconst(types::I32, 0);
-    let zero = fx.bcx.ins().iconst(types::I32, 0);
-    fx.bcx.ins().jump(dest, &[extended_max_basic_leaf, zero, zero, zero]);
-
-    fx.bcx.switch_to_block(leaf_8000_0001);
-    let zero = fx.bcx.ins().iconst(types::I32, 0);
-    let proc_info_ecx = fx.bcx.ins().iconst(types::I32, 0);
-    let proc_info_edx = fx.bcx.ins().iconst(types::I32, 0);
-    fx.bcx.ins().jump(dest, &[zero, zero, proc_info_ecx, proc_info_edx]);
-
-    fx.bcx.switch_to_block(unsupported_leaf);
-    crate::trap::trap_unimplemented(
-        fx,
-        "__cpuid_count arch intrinsic doesn't yet support specified leaf",
-    );
-
-    fx.bcx.switch_to_block(dest);
-    fx.bcx.ins().nop();
-
-    (eax, ebx, ecx, edx)
-}
diff --git a/src/intrinsics/mod.rs b/src/intrinsics/mod.rs
index e94091e6a25..83d5d53624e 100644
--- a/src/intrinsics/mod.rs
+++ b/src/intrinsics/mod.rs
@@ -12,7 +12,6 @@ macro_rules! intrinsic_args {
     }
 }
 
-mod cpuid;
 mod llvm;
 mod llvm_aarch64;
 mod llvm_x86;
@@ -25,7 +24,6 @@ use rustc_middle::ty::print::{with_no_trimmed_paths, with_no_visible_paths};
 use rustc_middle::ty::GenericArgsRef;
 use rustc_span::symbol::{kw, sym, Symbol};
 
-pub(crate) use self::cpuid::codegen_cpuid_call;
 pub(crate) use self::llvm::codegen_llvm_intrinsic_call;
 use crate::prelude::*;