Rollup merge of #94452 - workingjubilee:sync-simd-bitmasks, r=workingjubilee

Sync portable-simd for bitmasks &c. In the ideal case, where everything works easily and nothing has to be rearranged, it is as simple as: - `git subtree pull -P library/portable-simd https://github.com/rust-lang/portable-simd - ${branch}` - write the commit message - `python x.py test --stage 1` to make sure it runs - `git push` to your PR-to-rustc branch If anything borks up this flow, you can fix it with sufficient git wizardry but you are usually better off going back to the source, fixing it, and starting over, before you open the PR. r? `@calebzulawski`
2024-10-30 05:51:58 +00:00 · 2022-03-01 03:41:53 +01:00 · 2022-03-01 03:41:53 +01:00 · 4001d98019
commit 4001d98019
parent bf6954b2fc 4de99e187c
21 changed files with 440 additions and 125 deletions
--- a/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
+++ b/library/portable-simd/crates/core_simd/examples/spectral_norm.rs
@ -0,0 +1,77 @@
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+    mult_av(v, tmp);
+    mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+    assert!(n % 2 == 0, "only even lengths are accepted");
+
+    let mut u = vec![1.0; n];
+    let mut v = u.clone();
+    let mut tmp = u.clone();
+
+    for _ in 0..10 {
+        mult_atav(&u, &mut v, &mut tmp);
+        mult_atav(&v, &mut u, &mut tmp);
+    }
+    (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+    // This is auto-vectorized:
+    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
--- a/library/portable-simd/crates/core_simd/src/comparisons.rs
+++ b/library/portable-simd/crates/core_simd/src/comparisons.rs
@ -10,6 +10,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
    }

@ -17,6 +19,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
    }
 }
@ -30,6 +34,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
    }

@ -37,6 +43,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
    }

@ -44,6 +52,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
    }

@ -51,6 +61,8 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
        unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/intrinsics.rs
+++ b/library/portable-simd/crates/core_simd/src/intrinsics.rs
@ -2,31 +2,55 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.

 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
    /// add/fadd
    pub(crate) fn simd_add<T>(x: T, y: T) -> T;

    /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;

    /// mul/fmul
    pub(crate) fn simd_mul<T>(x: T, y: T) -> T;

    /// udiv/sdiv/fdiv
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}div incur UB if division by zero occurs.
+    /// ints: sdiv is UB for int::MIN / -1.
+    /// floats: fdiv is never UB, but may create NaNs or infinities.
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;

    /// urem/srem/frem
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
+    /// ints: srem is UB for int::MIN / -1.
+    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;

    /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;

-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;

    /// and
    pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@ -38,12 +62,18 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_xor<T>(x: T, y: T) -> T;

    /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
    pub(crate) fn simd_cast<T, U>(x: T) -> U;
    /// follows Rust's `T as U` semantics, including saturating float casts
    /// which amounts to the same as `simd_cast` for many cases
    pub(crate) fn simd_as<T, U>(x: T) -> U;

    /// neg/fneg
+    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
+    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
+    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
    pub(crate) fn simd_neg<T>(x: T) -> T;

    /// fabs
@ -53,6 +83,7 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
    pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;

+    // these return Simd<int, N> with the same BITS size as the inputs
    pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
    pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@ -61,19 +92,31 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;

    // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
    pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;

+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
    pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
    pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);

    // {s,u}add.sat
    pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;

    // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;

    // reductions
+    // llvm.vector.reduce.{add,fadd}
    pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
    pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
    #[allow(unused)]
    pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@ -90,7 +133,10 @@ extern "platform-intrinsic" {
    pub(crate) fn simd_bitmask<T, U>(x: T) -> U;

    // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
    #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
--- a/library/portable-simd/crates/core_simd/src/lib.rs
+++ b/library/portable-simd/crates/core_simd/src/lib.rs
@ -1,7 +1,9 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![feature(
    const_fn_trait_bound,
+    convert_float_to_int,
    decl_macro,
+    intra_doc_pointers,
    platform_intrinsics,
    repr_simd,
    simd_ffi,
--- a/library/portable-simd/crates/core_simd/src/masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks.rs
@ -12,8 +12,10 @@
 )]
 mod mask_impl;

-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+mod to_bitmask;
+pub use to_bitmask::ToBitMask;
+
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};

@ -42,6 +44,9 @@ mod sealed {
 use sealed::Sealed;

 /// Marker trait for types that may be used as SIMD mask elements.
+///
+/// # Safety
+/// Type must be a signed integer.
 pub unsafe trait MaskElement: SimdElement + Sealed {}

 macro_rules! impl_element {
@ -149,6 +154,7 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        // Safety: the caller must confirm this invariant
        unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
    }

@ -161,6 +167,7 @@ where
    #[must_use = "method returns a new mask and does not mutate the original value"]
    pub fn from_int(value: Simd<T, LANES>) -> Self {
        assert!(T::valid(value), "all values must be either 0 or -1",);
+        // Safety: the validity has been checked
        unsafe { Self::from_int_unchecked(value) }
    }

@ -179,6 +186,7 @@ where
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        // Safety: the caller must confirm this invariant
        unsafe { self.0.test_unchecked(lane) }
    }

@ -190,6 +198,7 @@ where
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn test(&self, lane: usize) -> bool {
        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
        unsafe { self.test_unchecked(lane) }
    }

@ -199,6 +208,7 @@ where
    /// `lane` must be less than `LANES`.
    #[inline]
    pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        // Safety: the caller must confirm this invariant
        unsafe {
            self.0.set_unchecked(lane, value);
        }
@ -211,27 +221,12 @@ where
    #[inline]
    pub fn set(&mut self, lane: usize, value: bool) {
        assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
        unsafe {
            self.set_unchecked(lane, value);
        }
    }

-    /// Convert this mask to a bitmask, with one bit set per lane.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        self.0.to_bitmask()
-    }
-
-    /// Convert a bitmask to a mask.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        Self(mask_impl::Mask::from_bitmask(bitmask))
-    }
-
    /// Returns true if any lane is set, or false otherwise.
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
--- a/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/bitmask.rs
@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 use core::marker::PhantomData;

 /// A mask where each lane is represented by a single bit.
@ -115,20 +115,22 @@ where
        unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        // Safety: these are the same type and we are laundering the generic
+    pub fn to_bitmask_integer<U>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
        unsafe { core::mem::transmute_copy(&self.0) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        // Safety: these are the same type and we are laundering the generic
-        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    pub fn from_bitmask_integer<U>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: these are the same types
+        unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
    }

    #[inline]
@ -137,6 +139,7 @@ where
    where
        U: MaskElement,
    {
+        // Safety: bitmask layout does not depend on the element width
        unsafe { core::mem::transmute_copy(&self) }
    }

--- a/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/full_masks.rs
@ -2,7 +2,7 @@

 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};

 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@ -66,6 +66,23 @@ where
    }
 }

+// Used for bitmask bit order workaround
+pub(crate) trait ReverseBits {
+    fn reverse_bits(self) -> Self;
+}
+
+macro_rules! impl_reverse_bits {
+    { $($int:ty),* } => {
+        $(
+        impl ReverseBits for $int {
+            fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
+        }
+        )*
+    }
+}
+
+impl_reverse_bits! { u8, u16, u32, u64 }
+
 impl<T, const LANES: usize> Mask<T, LANES>
 where
    T: MaskElement,
@ -106,44 +123,40 @@ where
    where
        U: MaskElement,
    {
+        // Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
        unsafe { Mask(intrinsics::simd_cast(self.0)) }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        unsafe {
-            let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
-                intrinsics::simd_bitmask(self.0);
+    pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // Safety: U is required to be the appropriate bitmask type
+        let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };

-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
+        // LLVM assumes bit order should match endianness
        if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
-
+            bitmask.reverse_bits()
+        } else {
            bitmask
        }
    }

-    #[cfg(feature = "generic_const_exprs")]
    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        unsafe {
-            // There is a bug where LLVM appears to implement this operation with the wrong
-            // bit order.
-            // TODO fix this in a better way
-            if cfg!(target_endian = "big") {
-                for x in bitmask.as_mut() {
-                    *x = x.reverse_bits();
-                }
-            }
+    pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMask<BitMask = U>,
+    {
+        // LLVM assumes bit order should match endianness
+        let bitmask = if cfg!(target_endian = "big") {
+            bitmask.reverse_bits()
+        } else {
+            bitmask
+        };

+        // Safety: U is required to be the appropriate bitmask type
+        unsafe {
            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
                bitmask,
                Self::splat(true).to_int(),
@ -155,12 +168,14 @@ where
    #[inline]
    #[must_use = "method returns a new bool and does not mutate the original value"]
    pub fn any(self) -> bool {
+        // Safety: use `self` as an integer vector
        unsafe { intrinsics::simd_reduce_any(self.to_int()) }
    }

    #[inline]
    #[must_use = "method returns a new vector and does not mutate the original value"]
    pub fn all(self) -> bool {
+        // Safety: use `self` as an integer vector
        unsafe { intrinsics::simd_reduce_all(self.to_int()) }
    }
 }
@ -184,6 +199,7 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitand(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
    }
 }
@ -197,6 +213,7 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
    }
 }
@ -210,6 +227,7 @@ where
    #[inline]
    #[must_use = "method returns a new mask and does not mutate the original value"]
    fn bitxor(self, rhs: Self) -> Self {
+        // Safety: `self` is an integer vector
        unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
+++ b/library/portable-simd/crates/core_simd/src/masks/to_bitmask.rs
@ -0,0 +1,57 @@
+use super::{mask_impl, Mask, MaskElement};
+use crate::simd::{LaneCount, SupportedLaneCount};
+
+mod sealed {
+    pub trait Sealed {}
+}
+pub use sealed::Sealed;
+
+impl<T, const LANES: usize> Sealed for Mask<T, LANES>
+where
+    T: MaskElement,
+    LaneCount<LANES>: SupportedLaneCount,
+{
+}
+
+/// Converts masks to and from integer bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
+///
+/// # Safety
+/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
+/// the mask.
+pub unsafe trait ToBitMask: Sealed {
+    /// The integer bitmask type.
+    type BitMask;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask(self) -> Self::BitMask;
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask(bitmask: Self::BitMask) -> Self;
+}
+
+macro_rules! impl_integer_intrinsic {
+    { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
+        $(
+        unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
+            type BitMask = $int;
+
+            fn to_bitmask(self) -> $int {
+                self.0.to_bitmask_integer()
+            }
+
+            fn from_bitmask(bitmask: $int) -> Self {
+                Self(mask_impl::Mask::from_bitmask_integer(bitmask))
+            }
+        }
+        )*
+    }
+}
+
+impl_integer_intrinsic! {
+    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
+    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
+    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
+    unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
+}
--- a/library/portable-simd/crates/core_simd/src/math.rs
+++ b/library/portable-simd/crates/core_simd/src/math.rs
@ -22,6 +22,7 @@ macro_rules! impl_uint_arith {
            /// ```
            #[inline]
            pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_add(self, second) }
            }

@ -41,6 +42,7 @@ macro_rules! impl_uint_arith {
            /// assert_eq!(sat, Simd::splat(0));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_sub(self, second) }
            }
        })+
@ -68,6 +70,7 @@ macro_rules! impl_int_arith {
            /// ```
            #[inline]
            pub fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_add(self, second) }
            }

@ -87,6 +90,7 @@ macro_rules! impl_int_arith {
            /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
            #[inline]
            pub fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
                unsafe { simd_saturating_sub(self, second) }
            }

--- a/library/portable-simd/crates/core_simd/src/ops.rs
+++ b/library/portable-simd/crates/core_simd/src/ops.rs
@ -57,29 +57,40 @@ macro_rules! wrap_bitshift {
    };
 }

-// Division by zero is poison, according to LLVM.
-// So is dividing the MIN value of a signed integer by -1,
-// since that would return MAX + 1.
-// FIXME: Rust allows <SInt>::MIN / -1,
-// so we should probably figure out how to make that safe.
+/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
+/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
+/// thus guaranteeing a Rust value returns instead.
+///
+/// |                  | LLVM | Rust
+/// | :--------------: | :--- | :----------
+/// | N {/,%} 0        | UB   | panic!()
+/// | <$int>::MIN / -1 | UB   | <$int>::MIN
+/// | <$int>::MIN % -1 | UB   | 0
+///
 macro_rules! int_divrem_guard {
    (   $lhs:ident,
        $rhs:ident,
        {   const PANIC_ZERO: &'static str = $zero:literal;
-            const PANIC_OVERFLOW: &'static str = $overflow:literal;
            $simd_call:ident
        },
        $int:ident ) => {
        if $rhs.lanes_eq(Simd::splat(0)).any() {
            panic!($zero);
-        } else if <$int>::MIN != 0
-            && ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
-                // type inference can break here, so cut an SInt to size
-                & $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
-        {
-            panic!($overflow);
        } else {
-            unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
+            // Prevent otherwise-UB overflow on the MIN / -1 case.
+            let rhs = if <$int>::MIN != 0 {
+                // This should, at worst, optimize to a few branchless logical ops
+                // Ideally, this entire conditional should evaporate
+                // Fire LLVM and implement those manually if it doesn't get the hint
+                ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
+                // type inference can break here, so cut an SInt to size
+                & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
+                .select(Simd::splat(1), $rhs)
+            } else {
+                // Nice base case to make it easy to const-fold away the other branch.
+                $rhs
+            };
+            unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
        }
    };
 }
@ -183,7 +194,6 @@ for_base_ops! {
    impl Div::div {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to divide by zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
            simd_div
        }
    }
@ -191,7 +201,6 @@ for_base_ops! {
    impl Rem::rem {
        int_divrem_guard {
            const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
-            const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
            simd_rem
        }
    }
--- a/library/portable-simd/crates/core_simd/src/reduction.rs
+++ b/library/portable-simd/crates/core_simd/src/reduction.rs
@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions {
            /// Horizontal wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
            #[inline]
            pub fn horizontal_sum(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_add_ordered(self, 0) }
            }

            /// Horizontal wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
            #[inline]
            pub fn horizontal_product(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_mul_ordered(self, 1) }
            }

            /// Horizontal maximum.  Returns the maximum lane in the vector.
            #[inline]
            pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_max(self) }
            }

            /// Horizontal minimum.  Returns the minimum lane in the vector.
            #[inline]
            pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is an integer vector
                unsafe { simd_reduce_min(self) }
            }
        }
@ -63,6 +67,7 @@ macro_rules! impl_float_reductions {
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().sum()
                } else {
+                    // Safety: `self` is a float vector
                    unsafe { simd_reduce_add_ordered(self, 0.) }
                }
            }
@ -74,6 +79,7 @@ macro_rules! impl_float_reductions {
                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
                    self.as_array().iter().product()
                } else {
+                    // Safety: `self` is a float vector
                    unsafe { simd_reduce_mul_ordered(self, 1.) }
                }
            }
@ -84,6 +90,7 @@ macro_rules! impl_float_reductions {
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
            pub fn horizontal_max(self) -> $scalar {
+                // Safety: `self` is a float vector
                unsafe { simd_reduce_max(self) }
            }

@ -93,6 +100,7 @@ macro_rules! impl_float_reductions {
            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
            #[inline]
            pub fn horizontal_min(self) -> $scalar {
+                // Safety: `self` is a float vector
                unsafe { simd_reduce_min(self) }
            }
        }
--- a/library/portable-simd/crates/core_simd/src/round.rs
+++ b/library/portable-simd/crates/core_simd/src/round.rs
@ -1,9 +1,10 @@
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use core::convert::FloatToInt;

 macro_rules! implement {
    {
-        $type:ty, $int_type:ty
+        $type:ty
    } => {
        impl<const LANES: usize> Simd<$type, LANES>
        where
@ -18,20 +19,22 @@ macro_rules! implement {
            /// * Not be NaN
            /// * Not be infinite
            /// * Be representable in the return type, after truncating off its fractional part
+            ///
+            /// If these requirements are infeasible or costly, consider using the safe function [cast],
+            /// which saturates on conversion.
+            ///
+            /// [cast]: Simd::cast
            #[inline]
-            pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
+            pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
+            where
+                $type: FloatToInt<I>,
+                I: SimdElement,
+            {
                unsafe { intrinsics::simd_cast(self) }
            }
-
-            /// Creates a floating-point vector from an integer vector.  Rounds values that are
-            /// not exactly representable.
-            #[inline]
-            pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
-                unsafe { intrinsics::simd_cast(value) }
-            }
        }
    }
 }

-implement! { f32, i32 }
-implement! { f64, i64 }
+implement! { f32 }
+implement! { f64 }
--- a/library/portable-simd/crates/core_simd/src/select.rs
+++ b/library/portable-simd/crates/core_simd/src/select.rs
@ -11,6 +11,7 @@ where
    /// For each lane in the mask, choose the corresponding lane from `true_values` if
    /// that lane mask is true, and `false_values` if that lane mask is false.
    ///
+    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
    /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@ -31,6 +32,8 @@ where
    where
        U: SimdElement<Mask = T>,
    {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
        unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
    }

@ -39,6 +42,7 @@ where
    /// For each lane in the mask, choose the corresponding lane from `true_values` if
    /// that lane mask is true, and `false_values` if that lane mask is false.
    ///
+    /// # Examples
    /// ```
    /// # #![feature(portable_simd)]
    /// # #[cfg(feature = "std")] use core_simd::Mask;
--- a/library/portable-simd/crates/core_simd/src/swizzle.rs
+++ b/library/portable-simd/crates/core_simd/src/swizzle.rs
@ -95,6 +95,7 @@ pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
        LaneCount<INPUT_LANES>: SupportedLaneCount,
        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
    {
+        // Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
        unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
    }
 }
@ -119,6 +120,7 @@ pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
        LaneCount<INPUT_LANES>: SupportedLaneCount,
        LaneCount<OUTPUT_LANES>: SupportedLaneCount,
    {
+        // Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
        unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
    }
 }
--- a/library/portable-simd/crates/core_simd/src/to_bytes.rs
+++ b/library/portable-simd/crates/core_simd/src/to_bytes.rs
@ -8,12 +8,14 @@ macro_rules! impl_to_bytes {
            /// Return the memory representation of this integer as a byte array in native byte
            /// order.
            pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
+                // Safety: transmuting between vectors is safe
                unsafe { core::mem::transmute_copy(&self) }
            }

            /// Create a native endian integer value from its memory representation as a byte array
            /// in native endianness.
            pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
+                // Safety: transmuting between vectors is safe
                unsafe { core::mem::transmute_copy(&bytes) }
            }
        }
--- a/library/portable-simd/crates/core_simd/src/vector.rs
+++ b/library/portable-simd/crates/core_simd/src/vector.rs
@ -12,7 +12,79 @@ pub(crate) mod ptr;
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};

-/// A SIMD vector of `LANES` elements of type `T`.
+/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
+///
+/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
+/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
+/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
+/// the two arrays together and mapping the operator over each lane.
+///
+/// ```rust
+/// # #![feature(array_zip, portable_simd)]
+/// # use core::simd::{Simd};
+/// let a0: [i32; 4] = [-2, 0, 2, 4];
+/// let a1 = [10, 9, 8, 7];
+/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
+/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
+///
+/// // `Simd<T, N>` implements `From<[T; N]>
+/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
+/// // Which means arrays implement `Into<Simd<T, N>>`.
+/// assert_eq!(v0 + v1, zm_add.into());
+/// assert_eq!(v0 * v1, zm_mul.into());
+/// ```
+///
+/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
+/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
+/// This means there is no warning on overflows, even in "debug" builds.
+/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
+/// and even "debug builds" are unlikely to tolerate the loss of performance.
+/// You may want to consider using explicitly checked arithmetic if such is required.
+/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
+///
+/// [`Wrapping<T>`]: core::num::Wrapping
+///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
+///
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// The need for this may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where
@ -98,6 +170,7 @@ where
    #[must_use]
    #[inline]
    pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a known SIMD type.
        unsafe { intrinsics::simd_as(self) }
    }

@ -171,7 +244,7 @@ where
        or: Self,
    ) -> Self {
        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
        unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
    }

@ -212,7 +285,7 @@ where
        let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
        // Ferris forgive me, I have done pointer arithmetic here.
        let ptrs = base_ptr.wrapping_add(idxs);
-        // SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
+        // Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
        unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
    }

@ -264,7 +337,7 @@ where
        idxs: Simd<usize, LANES>,
    ) {
        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
-        // SAFETY: We have masked-off out-of-bounds lanes.
+        // Safety: We have masked-off out-of-bounds lanes.
        unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
    }

@ -303,7 +376,7 @@ where
        enable: Mask<isize, LANES>,
        idxs: Simd<usize, LANES>,
    ) {
-        // SAFETY: This block works with *mut T derived from &mut 'a [T],
+        // Safety: This block works with *mut T derived from &mut 'a [T],
        // which means it is delicate in Rust's borrowing model, circa 2021:
        // &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
        // Even though this block is largely safe methods, it must be exactly this way
@ -483,7 +556,9 @@ mod sealed {
 use sealed::Sealed;

 /// Marker trait for types that may be used as SIMD vector elements.
-/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
+///
+/// # Safety
+/// This trait, when implemented, asserts the compiler can monomorphize
 /// `#[repr(simd)]` structs with the marked type as an element.
 /// Strictly, it is valid to impl if the vector will not be miscompiled.
 /// Practically, it is user-unfriendly to impl it if the vector won't compile,
--- a/library/portable-simd/crates/core_simd/src/vector/ptr.rs
+++ b/library/portable-simd/crates/core_simd/src/vector/ptr.rs
@ -21,6 +21,8 @@ where
    #[inline]
    #[must_use]
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
@ -47,6 +49,8 @@ where
    #[inline]
    #[must_use]
    pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        // Safety: converting pointers to usize and vice-versa is safe
+        // (even if using that pointer is not)
        unsafe {
            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
--- a/library/portable-simd/crates/core_simd/src/vendor.rs
+++ b/library/portable-simd/crates/core_simd/src/vendor.rs
@ -9,6 +9,8 @@ macro_rules! from_transmute {
        impl core::convert::From<$from> for $to {
            #[inline]
            fn from(value: $from) -> $to {
+                // Safety: transmuting between vectors is safe, but the caller of this macro
+                // checks the invariants
                unsafe { core::mem::transmute(value) }
            }
        }
--- a/library/portable-simd/crates/core_simd/tests/masks.rs
+++ b/library/portable-simd/crates/core_simd/tests/masks.rs
@ -68,16 +68,16 @@ macro_rules! test_mask_api {
                assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
            }

-            #[cfg(feature = "generic_const_exprs")]
            #[test]
            fn roundtrip_bitmask_conversion() {
+                use core_simd::ToBitMask;
                let values = [
                    true, false, false, true, false, false, true, false,
                    true, true, false, false, false, false, false, true,
                ];
                let mask = core_simd::Mask::<$type, 16>::from_array(values);
                let bitmask = mask.to_bitmask();
-                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+                assert_eq!(bitmask, 0b1000001101001001);
                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
            }
        }
--- a/library/portable-simd/crates/core_simd/tests/ops_macros.rs
+++ b/library/portable-simd/crates/core_simd/tests/ops_macros.rs
@ -210,15 +210,21 @@ macro_rules! impl_signed_tests {
                    )
                }

+                fn div_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a / b, a);
+                }
+
+                fn rem_min_may_overflow<const LANES: usize>() {
+                    let a = Vector::<LANES>::splat(Scalar::MIN);
+                    let b = Vector::<LANES>::splat(-1);
+                    assert_eq!(a % b, Vector::<LANES>::splat(0));
+                }
+
            }

            test_helpers::test_lanes_panic! {
-                fn div_min_overflow_panics<const LANES: usize>() {
-                    let a = Vector::<LANES>::splat(Scalar::MIN);
-                    let b = Vector::<LANES>::splat(-1);
-                    let _ = a / b;
-                }
-
                fn div_by_all_zeros_panics<const LANES: usize>() {
                    let a = Vector::<LANES>::splat(42);
                    let b = Vector::<LANES>::splat(0);
@ -232,12 +238,6 @@ macro_rules! impl_signed_tests {
                    let _ = a / b;
                }

-                fn rem_min_overflow_panic<const LANES: usize>() {
-                    let a = Vector::<LANES>::splat(Scalar::MIN);
-                    let b = Vector::<LANES>::splat(-1);
-                    let _ = a % b;
-                }
-
                fn rem_zero_panic<const LANES: usize>() {
                    let a = Vector::<LANES>::splat(42);
                    let b = Vector::<LANES>::splat(0);
--- a/library/portable-simd/crates/core_simd/tests/round.rs
+++ b/library/portable-simd/crates/core_simd/tests/round.rs
@ -53,14 +53,6 @@ macro_rules! float_rounding_test {
            }

            test_helpers::test_lanes! {
-                fn from_int<const LANES: usize>() {
-                    test_helpers::test_unary_elementwise(
-                        &Vector::<LANES>::round_from_int,
-                        &|x| x as Scalar,
-                        &|_| true,
-                    )
-                }
-
                fn to_int_unchecked<const LANES: usize>() {
                    // The maximum integer that can be represented by the equivalently sized float has
                    // all of the mantissa digits set to 1, pushed up to the MSB.
@ -72,11 +64,11 @@ macro_rules! float_rounding_test {
                    runner.run(
                        &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
                        |x| {
-                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
+                            let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
                            let result_2 = {
-                                let mut result = [0; LANES];
+                                let mut result: [IntScalar; LANES] = [0; LANES];
                                for (i, o) in x.iter().zip(result.iter_mut()) {
-                                    *o = unsafe { i.to_int_unchecked() };
+                                    *o = unsafe { i.to_int_unchecked::<IntScalar>() };
                                }
                                result
                            };