Auto merge of #3075 - eduardosm:x86-addcarry-subborrow, r=RalfJung

Move `llvm.x86.*` shims into `shims::x86` and implement `_addcarry_u32` and `_subborrow_u{32,64}` This PR moves all `llvm.x86.*` shims into `shims::x86` and adds `llvm.x86.addcarry.32`, `llvm.x86.subborrow.32` and `llvm.x86.subborrow.64`. Additionally, it fixes the input carry semantics of `llvm.x86.addcarry.32`. The input carry is an 8-bit value that is interpreted as 1 when it is non-zero. https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
2025-06-04 19:29:07 +00:00 · 2023-09-25 06:10:43 +00:00 · 2023-09-25 06:10:43 +00:00 · d644603b80
commit d644603b80
parent b791f02834 4625e1e8ab
7 changed files with 2084 additions and 1925 deletions
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@ -22,7 +22,7 @@ use rustc_target::{
 };
 use super::backtrace::EvalContextExt as _;
-use crate::helpers::{convert::Truncate, target_os_is_unix};
+use crate::helpers::target_os_is_unix;
 use crate::*;
 /// Returned by `emulate_foreign_item_by_name`.
@ -981,30 +981,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                    throw_unsup_format!("unsupported `llvm.prefetch` type argument: {}", ty);
                }
            }
-            "llvm.x86.addcarry.64" if this.tcx.sess.target.arch == "x86_64" => {
+            // FIXME: Move these to an `arm` submodule.
                // Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum.
                let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
                let c_in = this.read_scalar(c_in)?.to_u8()?;
                let a = this.read_scalar(a)?.to_u64()?;
                let b = this.read_scalar(b)?.to_u64()?;
                #[allow(clippy::arithmetic_side_effects)]
                // adding two u64 and a u8 cannot wrap in a u128
                let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b);
                #[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64
                let (c_out, sum) = ((wide_sum >> 64).truncate::<u8>(), wide_sum.truncate::<u64>());
                let c_out_field = this.project_field(dest, 0)?;
                this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?;
                let sum_field = this.project_field(dest, 1)?;
                this.write_scalar(Scalar::from_u64(sum), &sum_field)?;
            }
            "llvm.x86.sse2.pause"
                if this.tcx.sess.target.arch == "x86" || this.tcx.sess.target.arch == "x86_64" =>
            {
                let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
                this.yield_active_thread();
            }
            "llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
                let [arg] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
                let arg = this.read_scalar(arg)?.to_i32()?;
@ -1055,13 +1032,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                }
            }
-            name if name.starts_with("llvm.x86.sse.") => {
+            name if name.starts_with("llvm.x86.")
-                return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic(
+                && (this.tcx.sess.target.arch == "x86"
-                    this, link_name, abi, args, dest,
+                    || this.tcx.sess.target.arch == "x86_64") =>
-                );
+            {
-            }
+                return shims::x86::EvalContextExt::emulate_x86_intrinsic(
            name if name.starts_with("llvm.x86.sse2.") => {
                return shims::x86::sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
                    this, link_name, abi, args, dest,
                );
            }
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@ -1,11 +1,98 @@
 use rustc_middle::mir;
 use rustc_span::Symbol;
 use rustc_target::abi::Size;
 use rustc_target::spec::abi::Abi;
 use crate::*;
 use helpers::bool_to_simd_element;
 use shims::foreign_items::EmulateByNameResult;
-pub(super) mod sse;
+mod sse;
-pub(super) mod sse2;
+mod sse2;
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
 pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
    crate::MiriInterpCxExt<'mir, 'tcx>
 {
    fn emulate_x86_intrinsic(
        &mut self,
        link_name: Symbol,
        abi: Abi,
        args: &[OpTy<'tcx, Provenance>],
        dest: &PlaceTy<'tcx, Provenance>,
    ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
        let this = self.eval_context_mut();
        // Prefix should have already been checked.
        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
        match unprefixed_name {
            // Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
            // Computes a + b with input and output carry. The input carry is an 8-bit
            // value, which is interpreted as 1 if it is non-zero. The output carry is
            // an 8-bit value that will be 0 or 1.
            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
            "addcarry.32" | "addcarry.64" => {
                if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
                    return Ok(EmulateByNameResult::NotSupported);
                }
                let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
                let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
                let a = this.read_immediate(a)?;
                let b = this.read_immediate(b)?;
                let (sum, overflow1) = this.overflowing_binary_op(mir::BinOp::Add, &a, &b)?;
                let (sum, overflow2) = this.overflowing_binary_op(
                    mir::BinOp::Add,
                    &sum,
                    &ImmTy::from_uint(c_in, a.layout),
                )?;
                let c_out = overflow1 | overflow2;
                this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
                this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
            }
            // Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
            // Computes a - b with input and output borrow. The input borrow is an 8-bit
            // value, which is interpreted as 1 if it is non-zero. The output borrow is
            // an 8-bit value that will be 0 or 1.
            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
            "subborrow.32" | "subborrow.64" => {
                if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
                    return Ok(EmulateByNameResult::NotSupported);
                }
                let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
                let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
                let a = this.read_immediate(a)?;
                let b = this.read_immediate(b)?;
                let (sub, overflow1) = this.overflowing_binary_op(mir::BinOp::Sub, &a, &b)?;
                let (sub, overflow2) = this.overflowing_binary_op(
                    mir::BinOp::Sub,
                    &sub,
                    &ImmTy::from_uint(b_in, a.layout),
                )?;
                let b_out = overflow1 | overflow2;
                this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
                this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
            }
            name if name.starts_with("sse.") => {
                return sse::EvalContextExt::emulate_x86_sse_intrinsic(
                    this, link_name, abi, args, dest,
                );
            }
            name if name.starts_with("sse2.") => {
                return sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
                    this, link_name, abi, args, dest,
                );
            }
            _ => return Ok(EmulateByNameResult::NotSupported),
        }
        Ok(EmulateByNameResult::NeedsJumping)
    }
 }
 /// Floating point comparison operation
 ///
--- a/src/tools/miri/src/shims/x86/sse.rs
+++ b/src/tools/miri/src/shims/x86/sse.rs
@ -10,7 +10,9 @@ use crate::*;
 use shims::foreign_items::EmulateByNameResult;
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
    crate::MiriInterpCxExt<'mir, 'tcx>
 {
    fn emulate_x86_sse_intrinsic(
        &mut self,
        link_name: Symbol,
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@ -13,7 +13,9 @@ use crate::*;
 use shims::foreign_items::EmulateByNameResult;
 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
    crate::MiriInterpCxExt<'mir, 'tcx>
 {
    fn emulate_x86_sse2_intrinsic(
        &mut self,
        link_name: Symbol,
@ -753,6 +755,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
            }
            // Used to implement the `_mm_pause` function.
            // The intrinsic is used to hint the processor that the code is in a spin-loop.
            "pause" => {
                let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
                this.yield_active_thread();
            }
            _ => return Ok(EmulateByNameResult::NotSupported),
        }
        Ok(EmulateByNameResult::NeedsJumping)
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse.rs
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
--- a/src/tools/miri/tests/pass/intrinsics-x86.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86.rs
@ -1,3 +1,51 @@
 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
 mod x86 {
    #[cfg(target_arch = "x86")]
    use core::arch::x86 as arch;
    #[cfg(target_arch = "x86_64")]
    use core::arch::x86_64 as arch;
    fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
        let mut sum = 0;
        // SAFETY: There are no safety requirements for calling `_addcarry_u32`.
        // It's just unsafe for API consistency with other intrinsics.
        let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) };
        (c_out, sum)
    }
    fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) {
        let mut sum = 0;
        // SAFETY: There are no safety requirements for calling `_subborrow_u32`.
        // It's just unsafe for API consistency with other intrinsics.
        let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) };
        (b_out, sum)
    }
    pub fn main() {
        assert_eq!(adc(0, 1, 1), (0, 2));
        assert_eq!(adc(1, 1, 1), (0, 3));
        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
        assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
        assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
        assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
        assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
        assert_eq!(sbb(0, 1, 1), (0, 0));
        assert_eq!(sbb(1, 1, 1), (1, u32::MAX));
        assert_eq!(sbb(2, 1, 1), (1, u32::MAX)); // any non-zero borrow acts as 1!
        assert_eq!(sbb(u8::MAX, 1, 1), (1, u32::MAX));
        assert_eq!(sbb(0, 2, 1), (0, 1));
        assert_eq!(sbb(1, 2, 1), (0, 0));
        assert_eq!(sbb(2, 2, 1), (0, 0));
        assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
        assert_eq!(sbb(0, 1, 2), (1, u32::MAX));
        assert_eq!(sbb(1, 1, 2), (1, u32::MAX - 1));
        assert_eq!(sbb(2, 1, 2), (1, u32::MAX - 1));
        assert_eq!(sbb(u8::MAX, 1, 2), (1, u32::MAX - 1));
    }
 }
 #[cfg(target_arch = "x86_64")]
 mod x86_64 {
    use core::arch::x86_64 as arch;
@ -10,13 +58,42 @@ mod x86_64 {
        (c_out, sum)
    }
    fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) {
        let mut sum = 0;
        // SAFETY: There are no safety requirements for calling `_subborrow_u64`.
        // It's just unsafe for API consistency with other intrinsics.
        let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) };
        (b_out, sum)
    }
    pub fn main() {
        assert_eq!(adc(0, 1, 1), (0, 2));
        assert_eq!(adc(1, 1, 1), (0, 3));
-        assert_eq!(adc(3, u64::MAX, u64::MAX), (2, 1));
+        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
        assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
        assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
        assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
        assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
        assert_eq!(sbb(0, 1, 1), (0, 0));
        assert_eq!(sbb(1, 1, 1), (1, u64::MAX));
        assert_eq!(sbb(2, 1, 1), (1, u64::MAX)); // any non-zero borrow acts as 1!
        assert_eq!(sbb(u8::MAX, 1, 1), (1, u64::MAX));
        assert_eq!(sbb(0, 2, 1), (0, 1));
        assert_eq!(sbb(1, 2, 1), (0, 0));
        assert_eq!(sbb(2, 2, 1), (0, 0));
        assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
        assert_eq!(sbb(0, 1, 2), (1, u64::MAX));
        assert_eq!(sbb(1, 1, 2), (1, u64::MAX - 1));
        assert_eq!(sbb(2, 1, 2), (1, u64::MAX - 1));
        assert_eq!(sbb(u8::MAX, 1, 2), (1, u64::MAX - 1));
    }
 }
 fn main() {
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    x86::main();
    #[cfg(target_arch = "x86_64")]
    x86_64::main();
 }