diff --git a/compiler/rustc_codegen_llvm/src/asm.rs b/compiler/rustc_codegen_llvm/src/asm.rs index f931698c38f..1d91c3fb17d 100644 --- a/compiler/rustc_codegen_llvm/src/asm.rs +++ b/compiler/rustc_codegen_llvm/src/asm.rs @@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty Primitive::Int(Integer::I16, _) => cx.type_i16(), Primitive::Int(Integer::I32, _) => cx.type_i32(), Primitive::Int(Integer::I64, _) => cx.type_i64(), + Primitive::Float(Float::F16) => cx.type_f16(), Primitive::Float(Float::F32) => cx.type_f32(), Primitive::Float(Float::F64) => cx.type_f64(), + Primitive::Float(Float::F128) => cx.type_f128(), // FIXME(erikdesjardins): handle non-default addrspace ptr sizes Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()), _ => unreachable!(), @@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>( value } } - (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => { + (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) + if s.primitive() != Primitive::Float(Float::F128) => + { let elem_ty = llvm_asm_scalar_type(bx.cx, s); let count = 16 / layout.size.bytes(); let vec_ty = bx.cx.type_vector(elem_ty, count); @@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>( value } } - (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => { + (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) + if s.primitive() != Primitive::Float(Float::F128) => + { value = bx.extract_element(value, bx.const_i32(0)); if let Primitive::Pointer(_) = s.primitive() { value = bx.inttoptr(value, layout.llvm_type(bx.cx)); @@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>( layout.llvm_type(cx) } } - (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => { + (InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) + if s.primitive() != Primitive::Float(Float::F128) => + { let elem_ty = llvm_asm_scalar_type(cx, s); let count = 16 / layout.size.bytes(); cx.type_vector(elem_ty, count) diff --git a/compiler/rustc_target/src/asm/aarch64.rs b/compiler/rustc_target/src/asm/aarch64.rs index 041582b7df9..daf5162e8ac 100644 --- a/compiler/rustc_target/src/asm/aarch64.rs +++ b/compiler/rustc_target/src/asm/aarch64.rs @@ -61,9 +61,9 @@ impl AArch64InlineAsmRegClass { match self { Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; }, Self::vreg | Self::vreg_low16 => types! { - neon: I8, I16, I32, I64, F16, F32, F64, - VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1), - VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(4),VecF16(8), VecF32(4), VecF64(2); + neon: I8, I16, I32, I64, F16, F32, F64, F128, + VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1), + VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2); }, Self::preg => &[], } diff --git a/tests/assembly/asm/aarch64-types.rs b/tests/assembly/asm/aarch64-types.rs index 4b506f180d8..f36345670e3 100644 --- a/tests/assembly/asm/aarch64-types.rs +++ b/tests/assembly/asm/aarch64-types.rs @@ -5,10 +5,12 @@ //@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc //@ [arm64ec] needs-llvm-components: aarch64 -#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16)] +#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)] #![crate_type = "rlib"] #![no_core] #![allow(asm_sub_register, non_camel_case_types)] +// FIXME(f16_f128): Only needed for FIXME in check! and check_reg! +#![feature(auto_traits)] #[rustc_builtin_macro] macro_rules! asm { @@ -41,8 +43,6 @@ pub struct i64x1(i64); #[repr(simd)] pub struct f16x4(f16, f16, f16, f16); #[repr(simd)] -pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16); -#[repr(simd)] pub struct f32x2(f32, f32); #[repr(simd)] pub struct f64x1(f64); @@ -55,6 +55,8 @@ pub struct i32x4(i32, i32, i32, i32); #[repr(simd)] pub struct i64x2(i64, i64); #[repr(simd)] +pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16); +#[repr(simd)] pub struct f32x4(f32, f32, f32, f32); #[repr(simd)] pub struct f64x2(f64, f64); @@ -66,13 +68,14 @@ impl Copy for i32 {} impl Copy for f32 {} impl Copy for i64 {} impl Copy for f64 {} +impl Copy for f128 {} impl Copy for ptr {} impl Copy for i8x8 {} impl Copy for i16x4 {} impl Copy for i32x2 {} impl Copy for i64x1 {} -impl Copy for f32x2 {} impl Copy for f16x4 {} +impl Copy for f32x2 {} impl Copy for f64x1 {} impl Copy for i8x16 {} impl Copy for i16x8 {} @@ -82,6 +85,12 @@ impl Copy for f16x8 {} impl Copy for f32x4 {} impl Copy for f64x2 {} +// FIXME(f16_f128): Only needed for FIXME in check! and check_reg! +#[lang = "freeze"] +unsafe auto trait Freeze {} +#[lang = "unpin"] +auto trait Unpin {} + extern "C" { fn extern_func(); static extern_static: u8; @@ -118,38 +127,44 @@ pub unsafe fn issue_75761() { macro_rules! check { ($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => { + // FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return + // `f16` and `f128` without LLVM erroring. + // LLVM issue: #[no_mangle] - pub unsafe fn $func(x: $ty) -> $ty { + pub unsafe fn $func(inp: &$ty, out: &mut $ty) { // Hack to avoid function merging extern "Rust" { fn dont_merge(s: &str); } dont_merge(stringify!($func)); + let x = *inp; let y; asm!( concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"), out($class) y, in($class) x ); - y + *out = y; } }; } macro_rules! check_reg { ($func:ident $ty:ident $reg:tt $mov:literal) => { + // FIXME(f16_f128): See FIXME in `check!` #[no_mangle] - pub unsafe fn $func(x: $ty) -> $ty { + pub unsafe fn $func(inp: &$ty, out: &mut $ty) { // Hack to avoid function merging extern "Rust" { fn dont_merge(s: &str); } dont_merge(stringify!($func)); + let x = *inp; let y; asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x); - y + *out = y; } }; } @@ -166,18 +181,18 @@ check!(reg_i8 i8 reg "mov" ""); // CHECK: //NO_APP check!(reg_i16 i16 reg "mov" ""); +// CHECK-LABEL: {{("#)?}}reg_f16{{"?}} +// CHECK: //APP +// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}} +// CHECK: //NO_APP +check!(reg_f16 f16 reg "mov" ""); + // CHECK-LABEL: {{("#)?}}reg_i32{{"?}} // CHECK: //APP // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}} // CHECK: //NO_APP check!(reg_i32 i32 reg "mov" ""); -// CHECK-LABEL: reg_f16: -// CHECK: @APP -// CHECK: mov {{[a-z0-9]+}}, {{[a-z0-9]+}} -// CHECK: @NO_APP -check!(reg_f16 f16 reg "mov"); - // CHECK-LABEL: {{("#)?}}reg_f32{{"?}} // CHECK: //APP // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}} @@ -214,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s"); // CHECK: //NO_APP check!(vreg_i16 i16 vreg "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_f16 f16 vreg "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_i32{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -238,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s"); // CHECK: //NO_APP check!(vreg_f64 f64 vreg "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_f128 f128 vreg "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -268,19 +295,11 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s"); // CHECK: //NO_APP check!(vreg_i64x1 i64x1 vreg "fmov" "s"); -// neon-LABEL: vreg_f16x4: -// neon: @APP -// neon: vmov.f64 d{{[0-9]+}}, d{{[0-9]+}} -// neon: @NO_APP -#[cfg(neon)] -check!(vreg_f16x4 f16x4 vreg "vmov.f64"); - -// neon-LABEL: vreg_f16x8: -// neon: @APP -// neon: vorr q{{[0-9]+}}, q{{[0-9]+}}, q{{[0-9]+}} -// neon: @NO_APP -#[cfg(neon)] -check!(vreg_f16x8 f16x8 vreg "vmov"); +// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_f16x4 f16x4 vreg "fmov" "s"); // CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}} // CHECK: //APP @@ -318,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s"); // CHECK: //NO_APP check!(vreg_i64x2 i64x2 vreg "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_f16x8 f16x8 vreg "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -342,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s"); // CHECK: //NO_APP check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -360,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s"); // CHECK: //NO_APP check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -390,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s"); // CHECK: //NO_APP check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -426,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s"); // CHECK: //NO_APP check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s"); +// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}} +// CHECK: //APP +// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} +// CHECK: //NO_APP +check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s"); + // CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}} // CHECK: //APP // CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}} @@ -450,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov"); // CHECK: //NO_APP check_reg!(x0_i16 i16 "x0" "mov"); +// CHECK-LABEL: {{("#)?}}x0_f16{{"?}} +// CHECK: //APP +// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}} +// CHECK: //NO_APP +check_reg!(x0_f16 f16 "x0" "mov"); + // CHECK-LABEL: {{("#)?}}x0_i32{{"?}} // CHECK: //APP // CHECK: mov x{{[0-9]+}}, x{{[0-9]+}} @@ -492,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov"); // CHECK: //NO_APP check_reg!(v0_i16 i16 "s0" "fmov"); +// CHECK-LABEL: {{("#)?}}v0_f16{{"?}} +// CHECK: //APP +// CHECK: fmov s0, s0 +// CHECK: //NO_APP +check_reg!(v0_f16 f16 "s0" "fmov"); + // CHECK-LABEL: {{("#)?}}v0_i32{{"?}} // CHECK: //APP // CHECK: fmov s0, s0 @@ -516,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov"); // CHECK: //NO_APP check_reg!(v0_f64 f64 "s0" "fmov"); +// CHECK-LABEL: {{("#)?}}v0_f128{{"?}} +// CHECK: //APP +// CHECK: fmov s0, s0 +// CHECK: //NO_APP +check_reg!(v0_f128 f128 "s0" "fmov"); + // CHECK-LABEL: {{("#)?}}v0_ptr{{"?}} // CHECK: //APP // CHECK: fmov s0, s0 @@ -546,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov"); // CHECK: //NO_APP check_reg!(v0_i64x1 i64x1 "s0" "fmov"); +// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}} +// CHECK: //APP +// CHECK: fmov s0, s0 +// CHECK: //NO_APP +check_reg!(v0_f16x4 f16x4 "s0" "fmov"); + // CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}} // CHECK: //APP // CHECK: fmov s0, s0 @@ -582,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov"); // CHECK: //NO_APP check_reg!(v0_i64x2 i64x2 "s0" "fmov"); +// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}} +// CHECK: //APP +// CHECK: fmov s0, s0 +// CHECK: //NO_APP +check_reg!(v0_f16x8 f16x8 "s0" "fmov"); + // CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}} // CHECK: //APP // CHECK: fmov s0, s0 diff --git a/tests/ui/asm/aarch64/type-check-3.stderr b/tests/ui/asm/aarch64/type-check-3.stderr index 4bd97b93867..9d84d2666b3 100644 --- a/tests/ui/asm/aarch64/type-check-3.stderr +++ b/tests/ui/asm/aarch64/type-check-3.stderr @@ -111,7 +111,7 @@ error: type `Simd256bit` cannot be used with this register class LL | asm!("{}", in(vreg) f64x4); | ^^^^^ | - = note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x4, f16x8, f32x4, f64x2 + = note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2 error: incompatible types for asm inout argument --> $DIR/type-check-3.rs:88:33 diff --git a/tests/ui/asm/aarch64/type-f16.rs b/tests/ui/asm/aarch64/type-f16.rs index 763ea4684da..e62d8130c93 100644 --- a/tests/ui/asm/aarch64/type-f16.rs +++ b/tests/ui/asm/aarch64/type-f16.rs @@ -1,21 +1,25 @@ //@ only-aarch64 //@ run-pass +//@ needs-asm-support + +#![feature(f16)] -#![feature(f16, f128)] use std::arch::asm; + #[inline(never)] pub fn f32_to_f16_asm(a: f32) -> f16 { let ret: f16; unsafe { asm!( - "fcvt {ret:h}, {a:s}", - a = in(vreg) a, - ret = lateout(vreg) ret, - options(nomem, nostack), + "fcvt {ret:h}, {a:s}", + a = in(vreg) a, + ret = lateout(vreg) ret, + options(nomem, nostack), ); } ret } + fn main() { assert_eq!(f32_to_f16_asm(1.0 as f32), 1.0); }