Auto merge of #3075 - eduardosm:x86-addcarry-subborrow, r=RalfJung

Move `llvm.x86.*` shims into `shims::x86` and implement `_addcarry_u32` and `_subborrow_u{32,64}` This PR moves all `llvm.x86.*` shims into `shims::x86` and adds `llvm.x86.addcarry.32`, `llvm.x86.subborrow.32` and `llvm.x86.subborrow.64`. Additionally, it fixes the input carry semantics of `llvm.x86.addcarry.32`. The input carry is an 8-bit value that is interpreted as 1 when it is non-zero. https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
2025-05-14 02:49:40 +00:00 · 2023-09-25 06:10:43 +00:00 · 2023-09-25 06:10:43 +00:00 · d644603b80
commit d644603b80
parent b791f02834 4625e1e8ab
7 changed files with 2084 additions and 1925 deletions
--- a/src/tools/miri/src/shims/foreign_items.rs
+++ b/src/tools/miri/src/shims/foreign_items.rs
@ -22,7 +22,7 @@ use rustc_target::{
 };

 use super::backtrace::EvalContextExt as _;
-use crate::helpers::{convert::Truncate, target_os_is_unix};
+use crate::helpers::target_os_is_unix;
 use crate::*;

 /// Returned by `emulate_foreign_item_by_name`.
@ -981,30 +981,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                    throw_unsup_format!("unsupported `llvm.prefetch` type argument: {}", ty);
                }
            }
-            "llvm.x86.addcarry.64" if this.tcx.sess.target.arch == "x86_64" => {
-                // Computes u8+u64+u64, returning tuple (u8,u64) comprising the output carry and truncated sum.
-                let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
-                let c_in = this.read_scalar(c_in)?.to_u8()?;
-                let a = this.read_scalar(a)?.to_u64()?;
-                let b = this.read_scalar(b)?.to_u64()?;
-
-                #[allow(clippy::arithmetic_side_effects)]
-                // adding two u64 and a u8 cannot wrap in a u128
-                let wide_sum = u128::from(c_in) + u128::from(a) + u128::from(b);
-                #[allow(clippy::arithmetic_side_effects)] // it's a u128, we can shift by 64
-                let (c_out, sum) = ((wide_sum >> 64).truncate::<u8>(), wide_sum.truncate::<u64>());
-
-                let c_out_field = this.project_field(dest, 0)?;
-                this.write_scalar(Scalar::from_u8(c_out), &c_out_field)?;
-                let sum_field = this.project_field(dest, 1)?;
-                this.write_scalar(Scalar::from_u64(sum), &sum_field)?;
-            }
-            "llvm.x86.sse2.pause"
-                if this.tcx.sess.target.arch == "x86" || this.tcx.sess.target.arch == "x86_64" =>
-            {
-                let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
-                this.yield_active_thread();
-            }
+            // FIXME: Move these to an `arm` submodule.
            "llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
                let [arg] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
                let arg = this.read_scalar(arg)?.to_i32()?;
@ -1055,13 +1032,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
                }
            }

-            name if name.starts_with("llvm.x86.sse.") => {
-                return shims::x86::sse::EvalContextExt::emulate_x86_sse_intrinsic(
-                    this, link_name, abi, args, dest,
-                );
-            }
-            name if name.starts_with("llvm.x86.sse2.") => {
-                return shims::x86::sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
+            name if name.starts_with("llvm.x86.")
+                && (this.tcx.sess.target.arch == "x86"
+                    || this.tcx.sess.target.arch == "x86_64") =>
+            {
+                return shims::x86::EvalContextExt::emulate_x86_intrinsic(
                    this, link_name, abi, args, dest,
                );
            }
--- a/src/tools/miri/src/shims/x86/mod.rs
+++ b/src/tools/miri/src/shims/x86/mod.rs
@ -1,11 +1,98 @@
 use rustc_middle::mir;
+use rustc_span::Symbol;
 use rustc_target::abi::Size;
+use rustc_target::spec::abi::Abi;

 use crate::*;
 use helpers::bool_to_simd_element;
+use shims::foreign_items::EmulateByNameResult;

-pub(super) mod sse;
-pub(super) mod sse2;
+mod sse;
+mod sse2;
+
+impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
+    fn emulate_x86_intrinsic(
+        &mut self,
+        link_name: Symbol,
+        abi: Abi,
+        args: &[OpTy<'tcx, Provenance>],
+        dest: &PlaceTy<'tcx, Provenance>,
+    ) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
+        let this = self.eval_context_mut();
+        // Prefix should have already been checked.
+        let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.").unwrap();
+        match unprefixed_name {
+            // Used to implement the `_addcarry_u32` and `_addcarry_u64` functions.
+            // Computes a + b with input and output carry. The input carry is an 8-bit
+            // value, which is interpreted as 1 if it is non-zero. The output carry is
+            // an 8-bit value that will be 0 or 1.
+            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/addcarry-u32-addcarry-u64.html
+            "addcarry.32" | "addcarry.64" => {
+                if unprefixed_name == "addcarry.64" && this.tcx.sess.target.arch != "x86_64" {
+                    return Ok(EmulateByNameResult::NotSupported);
+                }
+
+                let [c_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
+                let c_in = this.read_scalar(c_in)?.to_u8()? != 0;
+                let a = this.read_immediate(a)?;
+                let b = this.read_immediate(b)?;
+
+                let (sum, overflow1) = this.overflowing_binary_op(mir::BinOp::Add, &a, &b)?;
+                let (sum, overflow2) = this.overflowing_binary_op(
+                    mir::BinOp::Add,
+                    &sum,
+                    &ImmTy::from_uint(c_in, a.layout),
+                )?;
+                let c_out = overflow1 | overflow2;
+
+                this.write_scalar(Scalar::from_u8(c_out.into()), &this.project_field(dest, 0)?)?;
+                this.write_immediate(*sum, &this.project_field(dest, 1)?)?;
+            }
+            // Used to implement the `_subborrow_u32` and `_subborrow_u64` functions.
+            // Computes a - b with input and output borrow. The input borrow is an 8-bit
+            // value, which is interpreted as 1 if it is non-zero. The output borrow is
+            // an 8-bit value that will be 0 or 1.
+            // https://www.intel.com/content/www/us/en/docs/cpp-compiler/developer-guide-reference/2021-8/subborrow-u32-subborrow-u64.html
+            "subborrow.32" | "subborrow.64" => {
+                if unprefixed_name == "subborrow.64" && this.tcx.sess.target.arch != "x86_64" {
+                    return Ok(EmulateByNameResult::NotSupported);
+                }
+
+                let [b_in, a, b] = this.check_shim(abi, Abi::Unadjusted, link_name, args)?;
+                let b_in = this.read_scalar(b_in)?.to_u8()? != 0;
+                let a = this.read_immediate(a)?;
+                let b = this.read_immediate(b)?;
+
+                let (sub, overflow1) = this.overflowing_binary_op(mir::BinOp::Sub, &a, &b)?;
+                let (sub, overflow2) = this.overflowing_binary_op(
+                    mir::BinOp::Sub,
+                    &sub,
+                    &ImmTy::from_uint(b_in, a.layout),
+                )?;
+                let b_out = overflow1 | overflow2;
+
+                this.write_scalar(Scalar::from_u8(b_out.into()), &this.project_field(dest, 0)?)?;
+                this.write_immediate(*sub, &this.project_field(dest, 1)?)?;
+            }
+
+            name if name.starts_with("sse.") => {
+                return sse::EvalContextExt::emulate_x86_sse_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+            name if name.starts_with("sse2.") => {
+                return sse2::EvalContextExt::emulate_x86_sse2_intrinsic(
+                    this, link_name, abi, args, dest,
+                );
+            }
+            _ => return Ok(EmulateByNameResult::NotSupported),
+        }
+        Ok(EmulateByNameResult::NeedsJumping)
+    }
+}

 /// Floating point comparison operation
 ///
--- a/src/tools/miri/src/shims/x86/sse.rs
+++ b/src/tools/miri/src/shims/x86/sse.rs
@ -10,7 +10,9 @@ use crate::*;
 use shims::foreign_items::EmulateByNameResult;

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
    fn emulate_x86_sse_intrinsic(
        &mut self,
        link_name: Symbol,
--- a/src/tools/miri/src/shims/x86/sse2.rs
+++ b/src/tools/miri/src/shims/x86/sse2.rs
@ -13,7 +13,9 @@ use crate::*;
 use shims::foreign_items::EmulateByNameResult;

 impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
-pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
+pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
+    crate::MiriInterpCxExt<'mir, 'tcx>
+{
    fn emulate_x86_sse2_intrinsic(
        &mut self,
        link_name: Symbol,
@ -753,6 +755,12 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {

                this.write_scalar(Scalar::from_u32(res.try_into().unwrap()), dest)?;
            }
+            // Used to implement the `_mm_pause` function.
+            // The intrinsic is used to hint the processor that the code is in a spin-loop.
+            "pause" => {
+                let [] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
+                this.yield_active_thread();
+            }
            _ => return Ok(EmulateByNameResult::NotSupported),
        }
        Ok(EmulateByNameResult::NeedsJumping)
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse.rs
--- a/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86-sse2.rs
--- a/src/tools/miri/tests/pass/intrinsics-x86.rs
+++ b/src/tools/miri/tests/pass/intrinsics-x86.rs
@ -1,3 +1,51 @@
+#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+mod x86 {
+    #[cfg(target_arch = "x86")]
+    use core::arch::x86 as arch;
+    #[cfg(target_arch = "x86_64")]
+    use core::arch::x86_64 as arch;
+
+    fn adc(c_in: u8, a: u32, b: u32) -> (u8, u32) {
+        let mut sum = 0;
+        // SAFETY: There are no safety requirements for calling `_addcarry_u32`.
+        // It's just unsafe for API consistency with other intrinsics.
+        let c_out = unsafe { arch::_addcarry_u32(c_in, a, b, &mut sum) };
+        (c_out, sum)
+    }
+
+    fn sbb(b_in: u8, a: u32, b: u32) -> (u8, u32) {
+        let mut sum = 0;
+        // SAFETY: There are no safety requirements for calling `_subborrow_u32`.
+        // It's just unsafe for API consistency with other intrinsics.
+        let b_out = unsafe { arch::_subborrow_u32(b_in, a, b, &mut sum) };
+        (b_out, sum)
+    }
+
+    pub fn main() {
+        assert_eq!(adc(0, 1, 1), (0, 2));
+        assert_eq!(adc(1, 1, 1), (0, 3));
+        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
+        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
+        assert_eq!(adc(0, u32::MAX, u32::MAX), (1, u32::MAX - 1));
+        assert_eq!(adc(1, u32::MAX, u32::MAX), (1, u32::MAX));
+        assert_eq!(adc(2, u32::MAX, u32::MAX), (1, u32::MAX));
+        assert_eq!(adc(u8::MAX, u32::MAX, u32::MAX), (1, u32::MAX));
+
+        assert_eq!(sbb(0, 1, 1), (0, 0));
+        assert_eq!(sbb(1, 1, 1), (1, u32::MAX));
+        assert_eq!(sbb(2, 1, 1), (1, u32::MAX)); // any non-zero borrow acts as 1!
+        assert_eq!(sbb(u8::MAX, 1, 1), (1, u32::MAX));
+        assert_eq!(sbb(0, 2, 1), (0, 1));
+        assert_eq!(sbb(1, 2, 1), (0, 0));
+        assert_eq!(sbb(2, 2, 1), (0, 0));
+        assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
+        assert_eq!(sbb(0, 1, 2), (1, u32::MAX));
+        assert_eq!(sbb(1, 1, 2), (1, u32::MAX - 1));
+        assert_eq!(sbb(2, 1, 2), (1, u32::MAX - 1));
+        assert_eq!(sbb(u8::MAX, 1, 2), (1, u32::MAX - 1));
+    }
+}
+
 #[cfg(target_arch = "x86_64")]
 mod x86_64 {
    use core::arch::x86_64 as arch;
@ -10,13 +58,42 @@ mod x86_64 {
        (c_out, sum)
    }

+    fn sbb(b_in: u8, a: u64, b: u64) -> (u8, u64) {
+        let mut sum = 0;
+        // SAFETY: There are no safety requirements for calling `_subborrow_u64`.
+        // It's just unsafe for API consistency with other intrinsics.
+        let b_out = unsafe { arch::_subborrow_u64(b_in, a, b, &mut sum) };
+        (b_out, sum)
+    }
+
    pub fn main() {
+        assert_eq!(adc(0, 1, 1), (0, 2));
        assert_eq!(adc(1, 1, 1), (0, 3));
-        assert_eq!(adc(3, u64::MAX, u64::MAX), (2, 1));
+        assert_eq!(adc(2, 1, 1), (0, 3)); // any non-zero carry acts as 1!
+        assert_eq!(adc(u8::MAX, 1, 1), (0, 3));
+        assert_eq!(adc(0, u64::MAX, u64::MAX), (1, u64::MAX - 1));
+        assert_eq!(adc(1, u64::MAX, u64::MAX), (1, u64::MAX));
+        assert_eq!(adc(2, u64::MAX, u64::MAX), (1, u64::MAX));
+        assert_eq!(adc(u8::MAX, u64::MAX, u64::MAX), (1, u64::MAX));
+
+        assert_eq!(sbb(0, 1, 1), (0, 0));
+        assert_eq!(sbb(1, 1, 1), (1, u64::MAX));
+        assert_eq!(sbb(2, 1, 1), (1, u64::MAX)); // any non-zero borrow acts as 1!
+        assert_eq!(sbb(u8::MAX, 1, 1), (1, u64::MAX));
+        assert_eq!(sbb(0, 2, 1), (0, 1));
+        assert_eq!(sbb(1, 2, 1), (0, 0));
+        assert_eq!(sbb(2, 2, 1), (0, 0));
+        assert_eq!(sbb(u8::MAX, 2, 1), (0, 0));
+        assert_eq!(sbb(0, 1, 2), (1, u64::MAX));
+        assert_eq!(sbb(1, 1, 2), (1, u64::MAX - 1));
+        assert_eq!(sbb(2, 1, 2), (1, u64::MAX - 1));
+        assert_eq!(sbb(u8::MAX, 1, 2), (1, u64::MAX - 1));
    }
 }

 fn main() {
+    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+    x86::main();
    #[cfg(target_arch = "x86_64")]
    x86_64::main();
 }