Implement *fmaddsub_p*, *fmsubadd_p* and *fnmadd_p* vendor intrinsics

This commit is contained in:
bjorn3 2023-11-24 15:30:53 +00:00
parent 705031d017
commit 65da671694

View File

@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
}
}
"llvm.x86.fma.vfmaddsub.ps"
| "llvm.x86.fma.vfmaddsub.pd"
| "llvm.x86.fma.vfmaddsub.ps.256"
| "llvm.x86.fma.vfmaddsub.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let res = if idx & 1 == 0 {
fx.bcx.ins().fsub(mul, c_lane)
} else {
fx.bcx.ins().fadd(mul, c_lane)
};
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.fma.vfmsubadd.ps"
| "llvm.x86.fma.vfmsubadd.pd"
| "llvm.x86.fma.vfmsubadd.ps.256"
| "llvm.x86.fma.vfmsubadd.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let res = if idx & 1 == 0 {
fx.bcx.ins().fadd(mul, c_lane)
} else {
fx.bcx.ins().fsub(mul, c_lane)
};
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.fma.vfnmadd.ps"
| "llvm.x86.fma.vfnmadd.pd"
| "llvm.x86.fma.vfnmadd.ps.256"
| "llvm.x86.fma.vfnmadd.pd.256" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
intrinsic_args!(fx, args => (a, b, c); intrinsic);
assert_eq!(a.layout(), b.layout());
assert_eq!(a.layout(), c.layout());
let layout = a.layout();
let (lane_count, lane_ty) = layout.ty.simd_size_and_type(fx.tcx);
let (ret_lane_count, ret_lane_ty) = ret.layout().ty.simd_size_and_type(fx.tcx);
assert!(lane_ty.is_floating_point());
assert!(ret_lane_ty.is_floating_point());
assert_eq!(lane_count, ret_lane_count);
let ret_lane_layout = fx.layout_of(ret_lane_ty);
for idx in 0..lane_count {
let a_lane = a.value_lane(fx, idx).load_scalar(fx);
let b_lane = b.value_lane(fx, idx).load_scalar(fx);
let c_lane = c.value_lane(fx, idx).load_scalar(fx);
let mul = fx.bcx.ins().fmul(a_lane, b_lane);
let neg_mul = fx.bcx.ins().fneg(mul);
let res = fx.bcx.ins().fadd(neg_mul, c_lane);
let res_lane = CValue::by_val(res, ret_lane_layout);
ret.place_lane(fx, idx).write_cvalue(fx, res_lane);
}
}
"llvm.x86.sse42.pcmpestri128" => {
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
intrinsic_args!(fx, args => (a, la, b, lb, _imm8); intrinsic);