mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-14 02:49:40 +00:00
Implement *fmaddsub_p*, *fmsubadd_p* and *fnmadd_p* vendor intrinsics
This commit is contained in:
parent
705031d017
commit
65da671694
@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfmaddsub.ps"
|
||||
| "llvm.x86.fma.vfmaddsub.pd"
|
||||
| "llvm.x86.fma.vfmaddsub.ps.256"
|
||||
| "llvm.x86.fma.vfmaddsub.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let res = if idx & 1 == 0 {
|
||||
fx.bcx.ins().fsub(mul, c_lane)
|
||||
} else {
|
||||
fx.bcx.ins().fadd(mul, c_lane)
|
||||
};
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfmsubadd.ps"
|
||||
| "llvm.x86.fma.vfmsubadd.pd"
|
||||
| "llvm.x86.fma.vfmsubadd.ps.256"
|
||||
| "llvm.x86.fma.vfmsubadd.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let res = if idx & 1 == 0 {
|
||||
fx.bcx.ins().fadd(mul, c_lane)
|
||||
} else {
|
||||
fx.bcx.ins().fsub(mul, c_lane)
|
||||
};
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.fma.vfnmadd.ps"
|
||||
| "llvm.x86.fma.vfnmadd.pd"
|
||||
| "llvm.x86.fma.vfnmadd.ps.256"
|
||||
| "llvm.x86.fma.vfnmadd.pd.256" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
|
||||
intrinsic_args!(fx, args => (a, b, c); intrinsic);
|
||||
|
||||
assert_eq!(a.layout(), b.layout());
|
||||
assert_eq!(a.layout(), c.layout());
|
||||
let layout = a.layout();
|
||||
|
||||
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
|
||||
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
|
||||
assert!(lane_ty.is_floating_point());
|
||||
assert!(ret_lane_ty.is_floating_point());
|
||||
assert_eq!(lane_count, ret_lane_count);
|
||||
let ret_lane_layout = fx.layout_of(ret_lane_ty);
|
||||
|
||||
for idx in 0..lane_count {
|
||||
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
|
||||
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
|
||||
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
|
||||
|
||||
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
|
||||
let neg_mul = fx.bcx.ins().fneg(mul);
|
||||
let res = fx.bcx.ins().fadd(neg_mul, c_lane);
|
||||
|
||||
let res_lane = CValue::by_val(res, ret_lane_layout);
|
||||
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
|
||||
}
|
||||
}
|
||||
|
||||
"llvm.x86.sse42.pcmpestri128" => {
|
||||
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
|
||||
intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);
|
||||
|
Loading…
Reference in New Issue
Block a user