mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-25 16:24:46 +00:00
Rollup merge of #129536 - beetrees:f16-f128-inline-asm-aarch64, r=Amanieu
Add `f16` and `f128` inline ASM support for `aarch64` Adds `f16` and `f128` inline ASM support for `aarch64`. SIMD vector types are taken from [the ARM intrinsics list](https://developer.arm.com/architectures/instruction-sets/intrinsics/#f:`@navigationhierarchiesreturnbasetype=[float]&f:@navigationhierarchieselementbitsize=[16]&f:@navigationhierarchiesarchitectures=[A64]).` Based on the work of `@lengrongfu` in #127043. Relevant issue: #125398 Tracking issue: #116909 `@rustbot` label +F-f16_and_f128 try-job: aarch64-gnu try-job: aarch64-apple
This commit is contained in:
commit
8ea70e9537
@ -913,8 +913,10 @@ fn llvm_asm_scalar_type<'ll>(cx: &CodegenCx<'ll, '_>, scalar: Scalar) -> &'ll Ty
|
||||
Primitive::Int(Integer::I16, _) => cx.type_i16(),
|
||||
Primitive::Int(Integer::I32, _) => cx.type_i32(),
|
||||
Primitive::Int(Integer::I64, _) => cx.type_i64(),
|
||||
Primitive::Float(Float::F16) => cx.type_f16(),
|
||||
Primitive::Float(Float::F32) => cx.type_f32(),
|
||||
Primitive::Float(Float::F64) => cx.type_f64(),
|
||||
Primitive::Float(Float::F128) => cx.type_f128(),
|
||||
// FIXME(erikdesjardins): handle non-default addrspace ptr sizes
|
||||
Primitive::Pointer(_) => cx.type_from_integer(dl.ptr_sized_integer()),
|
||||
_ => unreachable!(),
|
||||
@ -948,7 +950,9 @@ fn llvm_fixup_input<'ll, 'tcx>(
|
||||
value
|
||||
}
|
||||
}
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
|
||||
if s.primitive() != Primitive::Float(Float::F128) =>
|
||||
{
|
||||
let elem_ty = llvm_asm_scalar_type(bx.cx, s);
|
||||
let count = 16 / layout.size.bytes();
|
||||
let vec_ty = bx.cx.type_vector(elem_ty, count);
|
||||
@ -1090,7 +1094,9 @@ fn llvm_fixup_output<'ll, 'tcx>(
|
||||
value
|
||||
}
|
||||
}
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
|
||||
if s.primitive() != Primitive::Float(Float::F128) =>
|
||||
{
|
||||
value = bx.extract_element(value, bx.const_i32(0));
|
||||
if let Primitive::Pointer(_) = s.primitive() {
|
||||
value = bx.inttoptr(value, layout.llvm_type(bx.cx));
|
||||
@ -1222,7 +1228,9 @@ fn llvm_fixup_output_type<'ll, 'tcx>(
|
||||
layout.llvm_type(cx)
|
||||
}
|
||||
}
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s)) => {
|
||||
(InlineAsmRegClass::AArch64(AArch64InlineAsmRegClass::vreg_low16), Abi::Scalar(s))
|
||||
if s.primitive() != Primitive::Float(Float::F128) =>
|
||||
{
|
||||
let elem_ty = llvm_asm_scalar_type(cx, s);
|
||||
let count = 16 / layout.size.bytes();
|
||||
cx.type_vector(elem_ty, count)
|
||||
|
@ -59,11 +59,11 @@ impl AArch64InlineAsmRegClass {
|
||||
_arch: InlineAsmArch,
|
||||
) -> &'static [(InlineAsmType, Option<Symbol>)] {
|
||||
match self {
|
||||
Self::reg => types! { _: I8, I16, I32, I64, F32, F64; },
|
||||
Self::reg => types! { _: I8, I16, I32, I64, F16, F32, F64; },
|
||||
Self::vreg | Self::vreg_low16 => types! {
|
||||
neon: I8, I16, I32, I64, F32, F64,
|
||||
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF32(2), VecF64(1),
|
||||
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF32(4), VecF64(2);
|
||||
neon: I8, I16, I32, I64, F16, F32, F64, F128,
|
||||
VecI8(8), VecI16(4), VecI32(2), VecI64(1), VecF16(4), VecF32(2), VecF64(1),
|
||||
VecI8(16), VecI16(8), VecI32(4), VecI64(2), VecF16(8), VecF32(4), VecF64(2);
|
||||
},
|
||||
Self::preg => &[],
|
||||
}
|
||||
|
@ -5,10 +5,12 @@
|
||||
//@ [arm64ec] compile-flags: --target arm64ec-pc-windows-msvc
|
||||
//@ [arm64ec] needs-llvm-components: aarch64
|
||||
|
||||
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch)]
|
||||
#![feature(no_core, lang_items, rustc_attrs, repr_simd, asm_experimental_arch, f16, f128)]
|
||||
#![crate_type = "rlib"]
|
||||
#![no_core]
|
||||
#![allow(asm_sub_register, non_camel_case_types)]
|
||||
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
|
||||
#![feature(auto_traits)]
|
||||
|
||||
#[rustc_builtin_macro]
|
||||
macro_rules! asm {
|
||||
@ -39,6 +41,8 @@ pub struct i32x2(i32, i32);
|
||||
#[repr(simd)]
|
||||
pub struct i64x1(i64);
|
||||
#[repr(simd)]
|
||||
pub struct f16x4(f16, f16, f16, f16);
|
||||
#[repr(simd)]
|
||||
pub struct f32x2(f32, f32);
|
||||
#[repr(simd)]
|
||||
pub struct f64x1(f64);
|
||||
@ -51,30 +55,42 @@ pub struct i32x4(i32, i32, i32, i32);
|
||||
#[repr(simd)]
|
||||
pub struct i64x2(i64, i64);
|
||||
#[repr(simd)]
|
||||
pub struct f16x8(f16, f16, f16, f16, f16, f16, f16, f16);
|
||||
#[repr(simd)]
|
||||
pub struct f32x4(f32, f32, f32, f32);
|
||||
#[repr(simd)]
|
||||
pub struct f64x2(f64, f64);
|
||||
|
||||
impl Copy for i8 {}
|
||||
impl Copy for i16 {}
|
||||
impl Copy for f16 {}
|
||||
impl Copy for i32 {}
|
||||
impl Copy for f32 {}
|
||||
impl Copy for i64 {}
|
||||
impl Copy for f64 {}
|
||||
impl Copy for f128 {}
|
||||
impl Copy for ptr {}
|
||||
impl Copy for i8x8 {}
|
||||
impl Copy for i16x4 {}
|
||||
impl Copy for i32x2 {}
|
||||
impl Copy for i64x1 {}
|
||||
impl Copy for f16x4 {}
|
||||
impl Copy for f32x2 {}
|
||||
impl Copy for f64x1 {}
|
||||
impl Copy for i8x16 {}
|
||||
impl Copy for i16x8 {}
|
||||
impl Copy for i32x4 {}
|
||||
impl Copy for i64x2 {}
|
||||
impl Copy for f16x8 {}
|
||||
impl Copy for f32x4 {}
|
||||
impl Copy for f64x2 {}
|
||||
|
||||
// FIXME(f16_f128): Only needed for FIXME in check! and check_reg!
|
||||
#[lang = "freeze"]
|
||||
unsafe auto trait Freeze {}
|
||||
#[lang = "unpin"]
|
||||
auto trait Unpin {}
|
||||
|
||||
extern "C" {
|
||||
fn extern_func();
|
||||
static extern_static: u8;
|
||||
@ -111,38 +127,44 @@ pub unsafe fn issue_75761() {
|
||||
|
||||
macro_rules! check {
|
||||
($func:ident $ty:ident $class:ident $mov:literal $modifier:literal) => {
|
||||
// FIXME(f16_f128): Change back to `$func(x: $ty) -> $ty` once arm64ec can pass and return
|
||||
// `f16` and `f128` without LLVM erroring.
|
||||
// LLVM issue: <https://github.com/llvm/llvm-project/issues/94434>
|
||||
#[no_mangle]
|
||||
pub unsafe fn $func(x: $ty) -> $ty {
|
||||
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
|
||||
// Hack to avoid function merging
|
||||
extern "Rust" {
|
||||
fn dont_merge(s: &str);
|
||||
}
|
||||
dont_merge(stringify!($func));
|
||||
|
||||
let x = *inp;
|
||||
let y;
|
||||
asm!(
|
||||
concat!($mov, " {:", $modifier, "}, {:", $modifier, "}"),
|
||||
out($class) y,
|
||||
in($class) x
|
||||
);
|
||||
y
|
||||
*out = y;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! check_reg {
|
||||
($func:ident $ty:ident $reg:tt $mov:literal) => {
|
||||
// FIXME(f16_f128): See FIXME in `check!`
|
||||
#[no_mangle]
|
||||
pub unsafe fn $func(x: $ty) -> $ty {
|
||||
pub unsafe fn $func(inp: &$ty, out: &mut $ty) {
|
||||
// Hack to avoid function merging
|
||||
extern "Rust" {
|
||||
fn dont_merge(s: &str);
|
||||
}
|
||||
dont_merge(stringify!($func));
|
||||
|
||||
let x = *inp;
|
||||
let y;
|
||||
asm!(concat!($mov, " ", $reg, ", ", $reg), lateout($reg) y, in($reg) x);
|
||||
y
|
||||
*out = y;
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -159,6 +181,12 @@ check!(reg_i8 i8 reg "mov" "");
|
||||
// CHECK: //NO_APP
|
||||
check!(reg_i16 i16 reg "mov" "");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}reg_f16{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(reg_f16 f16 reg "mov" "");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}reg_i32{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
|
||||
@ -201,6 +229,12 @@ check!(vreg_i8 i8 vreg "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_i16 i16 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f16{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_f16 f16 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_i32{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -225,6 +259,12 @@ check!(vreg_i64 i64 vreg "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_f64 f64 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f128{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_f128 f128 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_ptr{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -255,6 +295,12 @@ check!(vreg_i32x2 i32x2 vreg "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_i64x1 i64x1 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f16x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_f16x4 f16x4 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f32x2{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -291,6 +337,12 @@ check!(vreg_i32x4 i32x4 vreg "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_i64x2 i64x2 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f16x8{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_f16x8 f16x8 vreg "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_f32x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -315,6 +367,12 @@ check!(vreg_low16_i8 i8 vreg_low16 "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_i16 i16 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f16{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_f16 f16 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f32{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -333,6 +391,12 @@ check!(vreg_low16_i64 i64 vreg_low16 "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_f64 f64 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f128{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_f128 f128 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_ptr{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -363,6 +427,12 @@ check!(vreg_low16_i32x2 i32x2 vreg_low16 "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_i64x1 i64x1 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_f16x4 f16x4 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x2{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -399,6 +469,12 @@ check!(vreg_low16_i32x4 i32x4 vreg_low16 "fmov" "s");
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_i64x2 i64x2 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f16x8{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check!(vreg_low16_f16x8 f16x8 vreg_low16 "fmov" "s");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}vreg_low16_f32x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s{{[0-9]+}}, s{{[0-9]+}}
|
||||
@ -423,6 +499,12 @@ check_reg!(x0_i8 i8 "x0" "mov");
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(x0_i16 i16 "x0" "mov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}x0_f16{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(x0_f16 f16 "x0" "mov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}x0_i32{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: mov x{{[0-9]+}}, x{{[0-9]+}}
|
||||
@ -465,6 +547,12 @@ check_reg!(v0_i8 i8 "s0" "fmov");
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_i16 i16 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f16{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_f16 f16 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_i32{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
@ -489,6 +577,12 @@ check_reg!(v0_i64 i64 "s0" "fmov");
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_f64 f64 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f128{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_f128 f128 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_ptr{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
@ -519,6 +613,12 @@ check_reg!(v0_i32x2 i32x2 "s0" "fmov");
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_i64x1 i64x1 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f16x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_f16x4 f16x4 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f32x2{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
@ -555,6 +655,12 @@ check_reg!(v0_i32x4 i32x4 "s0" "fmov");
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_i64x2 i64x2 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f16x8{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
// CHECK: //NO_APP
|
||||
check_reg!(v0_f16x8 f16x8 "s0" "fmov");
|
||||
|
||||
// CHECK-LABEL: {{("#)?}}v0_f32x4{{"?}}
|
||||
// CHECK: //APP
|
||||
// CHECK: fmov s0, s0
|
||||
|
@ -95,7 +95,7 @@ error: type `i128` cannot be used with this register class
|
||||
LL | asm!("{}", in(reg) 0i128);
|
||||
| ^^^^^
|
||||
|
|
||||
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
|
||||
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64
|
||||
|
||||
error: type `float64x2_t` cannot be used with this register class
|
||||
--> $DIR/type-check-3.rs:75:28
|
||||
@ -103,7 +103,7 @@ error: type `float64x2_t` cannot be used with this register class
|
||||
LL | asm!("{}", in(reg) f64x2);
|
||||
| ^^^^^
|
||||
|
|
||||
= note: register class `reg` supports these types: i8, i16, i32, i64, f32, f64
|
||||
= note: register class `reg` supports these types: i8, i16, i32, i64, f16, f32, f64
|
||||
|
||||
error: type `Simd256bit` cannot be used with this register class
|
||||
--> $DIR/type-check-3.rs:77:29
|
||||
@ -111,7 +111,7 @@ error: type `Simd256bit` cannot be used with this register class
|
||||
LL | asm!("{}", in(vreg) f64x4);
|
||||
| ^^^^^
|
||||
|
|
||||
= note: register class `vreg` supports these types: i8, i16, i32, i64, f32, f64, i8x8, i16x4, i32x2, i64x1, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f32x4, f64x2
|
||||
= note: register class `vreg` supports these types: i8, i16, i32, i64, f16, f32, f64, f128, i8x8, i16x4, i32x2, i64x1, f16x4, f32x2, f64x1, i8x16, i16x8, i32x4, i64x2, f16x8, f32x4, f64x2
|
||||
|
||||
error: incompatible types for asm inout argument
|
||||
--> $DIR/type-check-3.rs:88:33
|
||||
|
25
tests/ui/asm/aarch64/type-f16.rs
Normal file
25
tests/ui/asm/aarch64/type-f16.rs
Normal file
@ -0,0 +1,25 @@
|
||||
//@ only-aarch64
|
||||
//@ run-pass
|
||||
//@ needs-asm-support
|
||||
|
||||
#![feature(f16)]
|
||||
|
||||
use std::arch::asm;
|
||||
|
||||
#[inline(never)]
|
||||
pub fn f32_to_f16_asm(a: f32) -> f16 {
|
||||
let ret: f16;
|
||||
unsafe {
|
||||
asm!(
|
||||
"fcvt {ret:h}, {a:s}",
|
||||
a = in(vreg) a,
|
||||
ret = lateout(vreg) ret,
|
||||
options(nomem, nostack),
|
||||
);
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
fn main() {
|
||||
assert_eq!(f32_to_f16_asm(1.0 as f32), 1.0);
|
||||
}
|
Loading…
Reference in New Issue
Block a user