From 80469c66d650735a0520ae471c5fe5ebaf8971b5 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 13 Mar 2022 19:07:36 +0000
Subject: [PATCH 01/32] Move comparisons to SimdPartialOrd and SimdOrd traits

---
 crates/core_simd/src/comparisons.rs  | 120 ---------------
 crates/core_simd/src/eq.rs           |  73 +++++++++
 crates/core_simd/src/masks.rs        |   4 +-
 crates/core_simd/src/mod.rs          |   5 +-
 crates/core_simd/src/ops.rs          |  10 +-
 crates/core_simd/src/ord.rs          | 222 +++++++++++++++++++++++++++
 crates/core_simd/src/vector.rs       |  16 +-
 crates/core_simd/src/vector/float.rs |  26 ++--
 crates/core_simd/src/vector/int.rs   |   6 +-
 crates/core_simd/tests/i16_ops.rs    |  27 ----
 crates/core_simd/tests/ops_macros.rs |  45 +++---
 11 files changed, 354 insertions(+), 200 deletions(-)
 delete mode 100644 crates/core_simd/src/comparisons.rs
 create mode 100644 crates/core_simd/src/eq.rs
 create mode 100644 crates/core_simd/src/ord.rs
diff --git a/crates/core_simd/src/comparisons.rs b/crates/core_simd/src/comparisons.rs
deleted file mode 100644
index 7b0d0a6864b..00000000000
--- a/crates/core_simd/src/comparisons.rs
+++ /dev/null
@@ -1,120 +0,0 @@
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
-
-impl<T, const LANES: usize> Simd<T, LANES>
-where
-    T: SimdElement + PartialEq,
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    /// Test if each lane is equal to the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
-    }
-
-    /// Test if each lane is not equal to the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
-    }
-}
-
-impl<T, const LANES: usize> Simd<T, LANES>
-where
-    T: SimdElement + PartialOrd,
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    /// Test if each lane is less than the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
-    }
-
-    /// Test if each lane is greater than the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
-    }
-
-    /// Test if each lane is less than or equal to the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
-    }
-
-    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
-        // Safety: `self` is a vector, and the result of the comparison
-        // is always a valid mask.
-        unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
-    }
-}
-
-macro_rules! impl_ord_methods_vector {
-    { $type:ty } => {
-        impl<const LANES: usize> Simd<$type, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            /// Returns the lane-wise minimum with `other`.
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            #[inline]
-            pub fn min(self, other: Self) -> Self {
-                self.lanes_gt(other).select(other, self)
-            }
-
-            /// Returns the lane-wise maximum with `other`.
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            #[inline]
-            pub fn max(self, other: Self) -> Self {
-                self.lanes_lt(other).select(other, self)
-            }
-
-            /// Restrict each lane to a certain interval.
-            ///
-            /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
-            /// less than `min`. Otherwise returns `self`.
-            ///
-            /// # Panics
-            ///
-            /// Panics if `min > max` on any lane.
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            #[inline]
-            pub fn clamp(self, min: Self, max: Self) -> Self {
-                assert!(
-                    min.lanes_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
-                );
-                self.max(min).min(max)
-            }
-        }
-    }
-}
-
-impl_ord_methods_vector!(i8);
-impl_ord_methods_vector!(i16);
-impl_ord_methods_vector!(i32);
-impl_ord_methods_vector!(i64);
-impl_ord_methods_vector!(isize);
-impl_ord_methods_vector!(u8);
-impl_ord_methods_vector!(u16);
-impl_ord_methods_vector!(u32);
-impl_ord_methods_vector!(u64);
-impl_ord_methods_vector!(usize);
diff --git a/crates/core_simd/src/eq.rs b/crates/core_simd/src/eq.rs
new file mode 100644
index 00000000000..c7111f720a8
--- /dev/null
+++ b/crates/core_simd/src/eq.rs
@@ -0,0 +1,73 @@
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+
+/// Parallel `PartialEq`.
+pub trait SimdPartialEq {
+    /// The mask type returned by each comparison.
+    type Mask;
+
+    /// Test if each lane is equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_eq(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_ne(self, other: Self) -> Self::Mask;
+}
+
+macro_rules! impl_number {
+    { $($number:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialEq for Simd<$number, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$number as SimdElement>::Mask, LANES>;
+
+            #[inline]
+            fn simd_eq(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ne(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
+            }
+        }
+        )*
+    }
+}
+
+impl_number! { f32, f64, u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
+
+macro_rules! impl_mask {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialEq for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Self;
+
+            #[inline]
+            fn simd_eq(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_eq(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_ne(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_ne(self.to_int(), other.to_int())) }
+            }
+        }
+        )*
+    }
+}
+
+impl_mask! { i8, i16, i32, i64, isize }
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index e1cd7930450..d4e57ed90bf 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -15,7 +15,7 @@ mod mask_impl;
 mod to_bitmask;
 pub use to_bitmask::ToBitMask;
 
-use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
@@ -56,7 +56,7 @@ macro_rules! impl_element {
             where
                 LaneCount<LANES>: SupportedLaneCount,
             {
-                (value.lanes_eq(Simd::splat(0)) | value.lanes_eq(Simd::splat(-1))).all()
+                (value.simd_eq(Simd::splat(0 as _)) | value.simd_eq(Simd::splat(-1 as _))).all()
             }
 
             fn eq(self, other: Self) -> bool { self == other }
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index 85026265956..42257f4e119 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -9,13 +9,14 @@ pub(crate) mod intrinsics;
 #[cfg(feature = "generic_const_exprs")]
 mod to_bytes;
 
-mod comparisons;
+mod eq;
 mod fmt;
 mod iter;
 mod lane_count;
 mod masks;
 mod math;
 mod ops;
+mod ord;
 mod round;
 mod select;
 mod vector;
@@ -25,8 +26,10 @@ mod vendor;
 pub mod simd {
     pub(crate) use crate::core_simd::intrinsics;
 
+    pub use crate::core_simd::eq::*;
     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
     pub use crate::core_simd::masks::*;
+    pub use crate::core_simd::ord::*;
     pub use crate::core_simd::swizzle::*;
     pub use crate::core_simd::vector::*;
 }
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index 1b35b3e717a..d39b4091df9 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -1,4 +1,4 @@
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
 use core::ops::{Add, Mul};
 use core::ops::{BitAnd, BitOr, BitXor};
 use core::ops::{Div, Rem, Sub};
@@ -74,7 +74,7 @@ macro_rules! int_divrem_guard {
             $simd_call:ident
         },
         $int:ident ) => {
-        if $rhs.lanes_eq(Simd::splat(0)).any() {
+        if $rhs.simd_eq(Simd::splat(0 as _)).any() {
             panic!($zero);
         } else {
             // Prevent otherwise-UB overflow on the MIN / -1 case.
@@ -82,10 +82,10 @@ macro_rules! int_divrem_guard {
                 // This should, at worst, optimize to a few branchless logical ops
                 // Ideally, this entire conditional should evaporate
                 // Fire LLVM and implement those manually if it doesn't get the hint
-                ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
+                ($lhs.simd_eq(Simd::splat(<$int>::MIN))
                 // type inference can break here, so cut an SInt to size
-                & $rhs.lanes_eq(Simd::splat(-1i64 as _)))
-                .select(Simd::splat(1), $rhs)
+                & $rhs.simd_eq(Simd::splat(-1i64 as _)))
+                .select(Simd::splat(1 as _), $rhs)
             } else {
                 // Nice base case to make it easy to const-fold away the other branch.
                 $rhs
diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs
new file mode 100644
index 00000000000..befa4594595
--- /dev/null
+++ b/crates/core_simd/src/ord.rs
@@ -0,0 +1,222 @@
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+
+/// Parallel `PartialOrd`.
+pub trait SimdPartialOrd {
+    /// The mask type returned by each comparison.
+    type Mask;
+
+    /// Test if each lane is less than the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_lt(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is less than or equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_le(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is greater than the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_gt(self, other: Self) -> Self::Mask;
+
+    /// Test if each lane is greater than or equal to the corresponding lane in `other`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn simd_ge(self, other: Self) -> Self::Mask;
+}
+
+/// Parallel `Ord`.
+pub trait SimdOrd: SimdPartialOrd {
+    /// Returns the lane-wise maximum with `other`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_max(self, other: Self) -> Self;
+
+    /// Returns the lane-wise minimum with `other`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_min(self, other: Self) -> Self;
+
+    /// Restrict each lane to a certain interval.
+    ///
+    /// For each lane, returns `max` if `self` is greater than `max`, and `min` if `self` is
+    /// less than `min`. Otherwise returns `self`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `min > max` on any lane.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_clamp(self, min: Self, max: Self) -> Self;
+}
+
+macro_rules! impl_integer {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Simd<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$integer as SimdElement>::Mask, LANES>;
+
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+            }
+        }
+
+        impl<const LANES: usize> SimdOrd for Simd<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                self.simd_lt(other).select(other, self)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                self.simd_gt(other).select(other, self)
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                self.simd_max(min).simd_min(max)
+            }
+        }
+        )*
+    }
+}
+
+impl_integer! { u8, u16, u32, u64, usize, i8, i16, i32, i64, isize }
+
+macro_rules! impl_float {
+    { $($float:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Simd<$float, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$float as SimdElement>::Mask, LANES>;
+
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
+            }
+        }
+        )*
+    }
+}
+
+impl_float! { f32, f64 }
+
+macro_rules! impl_mask {
+    { $($integer:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdPartialOrd for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Self;
+
+            #[inline]
+            fn simd_lt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_lt(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_le(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_le(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_gt(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_gt(self.to_int(), other.to_int())) }
+            }
+
+            #[inline]
+            fn simd_ge(self, other: Self) -> Self::Mask {
+                // Safety: `self` is a vector, and the result of the comparison
+                // is always a valid mask.
+                unsafe { Self::from_int_unchecked(intrinsics::simd_ge(self.to_int(), other.to_int())) }
+            }
+        }
+
+        impl<const LANES: usize> SimdOrd for Mask<$integer, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                self.simd_gt(other).select_mask(other, self)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                self.simd_lt(other).select_mask(other, self)
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                self.simd_max(min).simd_min(max)
+            }
+        }
+        )*
+    }
+}
+
+impl_mask! { i8, i16, i32, i64, isize }
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 2405c1429b3..13e35ecfa49 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -10,7 +10,7 @@ pub use uint::*;
 pub(crate) mod ptr;
 
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
+use crate::simd::{LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount};
 
 /// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
 ///
@@ -243,7 +243,7 @@ where
         idxs: Simd<usize, LANES>,
         or: Self,
     ) -> Self {
-        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
+        let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len()));
         // Safety: We have masked-off out-of-bounds lanes.
         unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
     }
@@ -260,13 +260,13 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::{Simd, Mask};
+    /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask};
     /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 5]);
     /// let alt = Simd::from_array([-5, -4, -3, -2]);
     /// let enable = Mask::from_array([true, true, true, false]); // Note the final mask lane.
     /// // If this mask was used to gather, it would be unsound. Let's fix that.
-    /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+    /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len()));
     ///
     /// // We have masked the OOB lane, so it's safe to gather now.
     /// let result = unsafe { Simd::gather_select_unchecked(&vec, enable, idxs, alt) };
@@ -317,7 +317,7 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::{Simd, Mask};
+    /// # use core_simd::simd::{Simd, Mask};
     /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 0]);
     /// let vals = Simd::from_array([-27, 82, -41, 124]);
@@ -333,7 +333,7 @@ where
         enable: Mask<isize, LANES>,
         idxs: Simd<usize, LANES>,
     ) {
-        let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
+        let enable: Mask<isize, LANES> = enable & idxs.simd_lt(Simd::splat(slice.len()));
         // Safety: We have masked-off out-of-bounds lanes.
         unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
     }
@@ -351,13 +351,13 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::{Simd, Mask};
+    /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask};
     /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 0]);
     /// let vals = Simd::from_array([-27, 82, -41, 124]);
     /// let enable = Mask::from_array([true, true, true, false]); // Note the mask of the last lane.
     /// // If this mask was used to scatter, it would be unsound. Let's fix that.
-    /// let enable = enable & idxs.lanes_lt(Simd::splat(vec.len()));
+    /// let enable = enable & idxs.simd_lt(Simd::splat(vec.len()));
     ///
     /// // We have masked the OOB lane, so it's safe to scatter now.
     /// unsafe { vals.scatter_select_unchecked(&mut vec, enable, idxs); }
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index fcc7f6d8d1c..ebe4fbcb6fa 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -1,7 +1,7 @@
 #![allow(non_camel_case_types)]
 
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SimdPartialOrd, SupportedLaneCount};
 
 /// Implements inherent methods for a float vector containing multiple
 /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
@@ -74,35 +74,35 @@ macro_rules! impl_float_vector {
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> {
                 let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
-                sign_bits.lanes_gt(Simd::splat(0))
+                sign_bits.simd_gt(Simd::splat(0))
             }
 
             /// Returns true for each lane if its value is `NaN`.
             #[inline]
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_nan(self) -> Mask<$mask_ty, LANES> {
-                self.lanes_ne(self)
+                self.simd_ne(self)
             }
 
             /// Returns true for each lane if its value is positive infinity or negative infinity.
             #[inline]
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_infinite(self) -> Mask<$mask_ty, LANES> {
-                self.abs().lanes_eq(Self::splat(<$type>::INFINITY))
+                self.abs().simd_eq(Self::splat(<$type>::INFINITY))
             }
 
             /// Returns true for each lane if its value is neither infinite nor `NaN`.
             #[inline]
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_finite(self) -> Mask<$mask_ty, LANES> {
-                self.abs().lanes_lt(Self::splat(<$type>::INFINITY))
+                self.abs().simd_lt(Self::splat(<$type>::INFINITY))
             }
 
             /// Returns true for each lane if its value is subnormal.
             #[inline]
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> {
-                self.abs().lanes_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).lanes_eq(Simd::splat(0))
+                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).simd_eq(Simd::splat(0))
             }
 
             /// Returns true for each lane if its value is neither zero, infinite,
@@ -110,7 +110,7 @@ macro_rules! impl_float_vector {
             #[inline]
             #[must_use = "method returns a new mask and does not mutate the original value"]
             pub fn is_normal(self) -> Mask<$mask_ty, LANES> {
-                !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
+                !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
             }
 
             /// Replaces each lane with a number that represents its sign.
@@ -140,7 +140,7 @@ macro_rules! impl_float_vector {
             /// If one of the values is `NAN`, then the other value is returned.
             #[inline]
             #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn min(self, other: Self) -> Self {
+            pub fn simd_min(self, other: Self) -> Self {
                 unsafe { intrinsics::simd_fmin(self, other) }
             }
 
@@ -149,7 +149,7 @@ macro_rules! impl_float_vector {
             /// If one of the values is `NAN`, then the other value is returned.
             #[inline]
             #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn max(self, other: Self) -> Self {
+            pub fn simd_max(self, other: Self) -> Self {
                 unsafe { intrinsics::simd_fmax(self, other) }
             }
 
@@ -160,14 +160,14 @@ macro_rules! impl_float_vector {
             /// than `min`.  Otherwise returns the lane in `self`.
             #[inline]
             #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn clamp(self, min: Self, max: Self) -> Self {
+            pub fn simd_clamp(self, min: Self, max: Self) -> Self {
                 assert!(
-                    min.lanes_le(max).all(),
+                    min.simd_le(max).all(),
                     "each lane in `min` must be less than or equal to the corresponding lane in `max`",
                 );
                 let mut x = self;
-                x = x.lanes_lt(min).select(min, x);
-                x = x.lanes_gt(max).select(max, x);
+                x = x.simd_lt(min).select(min, x);
+                x = x.simd_gt(max).select(max, x);
                 x
             }
         }
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 3eac02a2761..85fabdc4e00 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -1,6 +1,6 @@
 #![allow(non_camel_case_types)]
 
-use crate::simd::{LaneCount, Mask, Simd, SupportedLaneCount};
+use crate::simd::{LaneCount, Mask, Simd, SimdPartialOrd, SupportedLaneCount};
 
 /// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
 macro_rules! impl_integer_vector {
@@ -12,13 +12,13 @@ macro_rules! impl_integer_vector {
             /// Returns true for each positive lane and false if it is zero or negative.
             #[inline]
             pub fn is_positive(self) -> Mask<$type, LANES> {
-                self.lanes_gt(Self::splat(0))
+                self.simd_gt(Self::splat(0))
             }
 
             /// Returns true for each negative lane and false if it is zero or positive.
             #[inline]
             pub fn is_negative(self) -> Mask<$type, LANES> {
-                self.lanes_lt(Self::splat(0))
+                self.simd_lt(Self::splat(0))
             }
 
             /// Returns numbers representing the sign of each lane.
diff --git a/crates/core_simd/tests/i16_ops.rs b/crates/core_simd/tests/i16_ops.rs
index 171e5b472fa..f6c5d74fbbc 100644
--- a/crates/core_simd/tests/i16_ops.rs
+++ b/crates/core_simd/tests/i16_ops.rs
@@ -1,32 +1,5 @@
 #![feature(portable_simd)]
-use core_simd::i16x2;
 
 #[macro_use]
 mod ops_macros;
 impl_signed_tests! { i16 }
-
-#[test]
-fn max_is_not_lexicographic() {
-    let a = i16x2::splat(10);
-    let b = i16x2::from_array([-4, 12]);
-    assert_eq!(a.max(b), i16x2::from_array([10, 12]));
-}
-
-#[test]
-fn min_is_not_lexicographic() {
-    let a = i16x2::splat(10);
-    let b = i16x2::from_array([12, -4]);
-    assert_eq!(a.min(b), i16x2::from_array([10, -4]));
-}
-
-#[test]
-fn clamp_is_not_lexicographic() {
-    let a = i16x2::splat(10);
-    let lo = i16x2::from_array([-12, -4]);
-    let up = i16x2::from_array([-4, 12]);
-    assert_eq!(a.clamp(lo, up), i16x2::from_array([-4, 10]));
-
-    let x = i16x2::from_array([1, 10]);
-    let y = x.clamp(i16x2::splat(0), i16x2::splat(9));
-    assert_eq!(y, i16x2::from_array([1, 9]));
-}
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 7c9b17673ef..f8389c910c6 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -222,34 +222,37 @@ macro_rules! impl_signed_tests {
                     assert_eq!(a % b, Vector::<LANES>::splat(0));
                 }
 
-                fn min<const LANES: usize>() {
+                fn simd_min<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
                     let a = Vector::<LANES>::splat(Scalar::MIN);
                     let b = Vector::<LANES>::splat(0);
-                    assert_eq!(a.min(b), a);
+                    assert_eq!(a.simd_min(b), a);
                     let a = Vector::<LANES>::splat(Scalar::MAX);
                     let b = Vector::<LANES>::splat(0);
-                    assert_eq!(a.min(b), b);
+                    assert_eq!(a.simd_min(b), b);
                 }
 
-                fn max<const LANES: usize>() {
+                fn simd_max<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
                     let a = Vector::<LANES>::splat(Scalar::MIN);
                     let b = Vector::<LANES>::splat(0);
-                    assert_eq!(a.max(b), b);
+                    assert_eq!(a.simd_max(b), b);
                     let a = Vector::<LANES>::splat(Scalar::MAX);
                     let b = Vector::<LANES>::splat(0);
-                    assert_eq!(a.max(b), a);
+                    assert_eq!(a.simd_max(b), a);
                 }
 
-                fn clamp<const LANES: usize>() {
+                fn simd_clamp<const LANES: usize>() {
+                    use core_simd::simd::SimdOrd;
                     let min = Vector::<LANES>::splat(Scalar::MIN);
                     let max = Vector::<LANES>::splat(Scalar::MAX);
                     let zero = Vector::<LANES>::splat(0);
                     let one = Vector::<LANES>::splat(1);
                     let negone = Vector::<LANES>::splat(-1);
-                    assert_eq!(zero.clamp(min, max), zero);
-                    assert_eq!(zero.clamp(min, one), zero);
-                    assert_eq!(zero.clamp(one, max), one);
-                    assert_eq!(zero.clamp(min, negone), negone);
+                    assert_eq!(zero.simd_clamp(min, max), zero);
+                    assert_eq!(zero.simd_clamp(min, one), zero);
+                    assert_eq!(zero.simd_clamp(one, max), one);
+                    assert_eq!(zero.simd_clamp(min, negone), negone);
                 }
             }
 
@@ -458,10 +461,10 @@ macro_rules! impl_float_tests {
                     )
                 }
 
-                fn min<const LANES: usize>() {
+                fn simd_min<const LANES: usize>() {
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
-                        &Vector::<LANES>::min,
+                        &Vector::<LANES>::simd_min,
                         &Scalar::min,
                         // Reject the case where both values are zero with different signs
                         &|a, b| {
@@ -477,14 +480,14 @@ macro_rules! impl_float_tests {
                     // Special case where both values are zero
                     let p_zero = Vector::<LANES>::splat(0.);
                     let n_zero = Vector::<LANES>::splat(-0.);
-                    assert!(p_zero.min(n_zero).to_array().iter().all(|x| *x == 0.));
-                    assert!(n_zero.min(p_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(p_zero.simd_min(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.simd_min(p_zero).to_array().iter().all(|x| *x == 0.));
                 }
 
-                fn max<const LANES: usize>() {
+                fn simd_max<const LANES: usize>() {
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
-                        &Vector::<LANES>::max,
+                        &Vector::<LANES>::simd_max,
                         &Scalar::max,
                         // Reject the case where both values are zero with different signs
                         &|a, b| {
@@ -500,11 +503,11 @@ macro_rules! impl_float_tests {
                     // Special case where both values are zero
                     let p_zero = Vector::<LANES>::splat(0.);
                     let n_zero = Vector::<LANES>::splat(-0.);
-                    assert!(p_zero.max(n_zero).to_array().iter().all(|x| *x == 0.));
-                    assert!(n_zero.max(p_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(p_zero.simd_max(n_zero).to_array().iter().all(|x| *x == 0.));
+                    assert!(n_zero.simd_max(p_zero).to_array().iter().all(|x| *x == 0.));
                 }
 
-                fn clamp<const LANES: usize>() {
+                fn simd_clamp<const LANES: usize>() {
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {
@@ -522,7 +525,7 @@ macro_rules! impl_float_tests {
                         for i in 0..LANES {
                             result_scalar[i] = value[i].clamp(min[i], max[i]);
                         }
-                        let result_vector = Vector::from_array(value).clamp(min.into(), max.into()).to_array();
+                        let result_vector = Vector::from_array(value).simd_clamp(min.into(), max.into()).to_array();
                         test_helpers::prop_assert_biteq!(result_scalar, result_vector);
                         Ok(())
                     })

From 2a02c4d9cbee120915535f6fc8379849e10e8373 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 13 Mar 2022 19:57:06 +0000
Subject: [PATCH 02/32] Create SimdFloat trait

---
 crates/core_simd/src/vector/float.rs | 106 ++++++++++++++++++---------
 crates/core_simd/tests/ops_macros.rs |   3 +
 2 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index ebe4fbcb6fa..f422d161178 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -134,42 +134,6 @@ macro_rules! impl_float_vector {
                 let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
                 Self::from_bits(sign_bit | magnitude)
             }
-
-            /// Returns the minimum of each lane.
-            ///
-            /// If one of the values is `NAN`, then the other value is returned.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn simd_min(self, other: Self) -> Self {
-                unsafe { intrinsics::simd_fmin(self, other) }
-            }
-
-            /// Returns the maximum of each lane.
-            ///
-            /// If one of the values is `NAN`, then the other value is returned.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn simd_max(self, other: Self) -> Self {
-                unsafe { intrinsics::simd_fmax(self, other) }
-            }
-
-            /// Restrict each lane to a certain interval unless it is NaN.
-            ///
-            /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
-            /// greater than `max`, and the corresponding lane in `min` if the lane is less
-            /// than `min`.  Otherwise returns the lane in `self`.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn simd_clamp(self, min: Self, max: Self) -> Self {
-                assert!(
-                    min.simd_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
-                );
-                let mut x = self;
-                x = x.simd_lt(min).select(min, x);
-                x = x.simd_gt(max).select(max, x);
-                x
-            }
         }
     };
 }
@@ -197,3 +161,73 @@ pub type f64x4 = Simd<f64, 4>;
 
 /// Vector of eight `f64` values
 pub type f64x8 = Simd<f64, 8>;
+
+mod sealed {
+    pub trait Sealed {}
+}
+use sealed::Sealed;
+
+/// SIMD operations on vectors of floating point numbers.
+pub trait SimdFloat: Sized + Sealed {
+    /// Returns the minimum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_min(self, other: Self) -> Self;
+
+    /// Returns the maximum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_max(self, other: Self) -> Self;
+
+    /// Restrict each lane to a certain interval unless it is NaN.
+    ///
+    /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
+    /// greater than `max`, and the corresponding lane in `min` if the lane is less
+    /// than `min`.  Otherwise returns the lane in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_clamp(self, min: Self, max: Self) -> Self;
+}
+
+macro_rules! impl_simd_float {
+    { $($float:ty),* } => {
+        $(
+        impl <const LANES: usize> Sealed for Simd<$float, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
+        impl <const LANES: usize> SimdFloat for Simd<$float, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            #[inline]
+            #[must_use = "method returns a new vector and does not mutate the original value"]
+            fn simd_min(self, other: Self) -> Self {
+                unsafe { intrinsics::simd_fmin(self, other) }
+            }
+
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                unsafe { intrinsics::simd_fmax(self, other) }
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                let mut x = self;
+                x = x.simd_lt(min).select(min, x);
+                x = x.simd_gt(max).select(max, x);
+                x
+            }
+        }
+        )*
+    }
+}
+
+impl_simd_float! { f32, f64 }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index f8389c910c6..47fe49b0982 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -462,6 +462,7 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_min<const LANES: usize>() {
+                    use core_simd::simd::SimdFloat;
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
                         &Vector::<LANES>::simd_min,
@@ -485,6 +486,7 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_max<const LANES: usize>() {
+                    use core_simd::simd::SimdFloat;
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
                         &Vector::<LANES>::simd_max,
@@ -508,6 +510,7 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_clamp<const LANES: usize>() {
+                    use core_simd::simd::SimdFloat;
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {

From 60486e08ed58698c7b6c2b5cd62a9fbd9080bc2f Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Tue, 15 Mar 2022 00:17:14 +0000
Subject: [PATCH 03/32] SimdPartialOrd implies SimdPartialEq

---
 crates/core_simd/src/ord.rs | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/crates/core_simd/src/ord.rs b/crates/core_simd/src/ord.rs
index befa4594595..9a87bc2e344 100644
--- a/crates/core_simd/src/ord.rs
+++ b/crates/core_simd/src/ord.rs
@@ -1,10 +1,7 @@
-use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdElement, SupportedLaneCount};
+use crate::simd::{intrinsics, LaneCount, Mask, Simd, SimdPartialEq, SupportedLaneCount};
 
 /// Parallel `PartialOrd`.
-pub trait SimdPartialOrd {
-    /// The mask type returned by each comparison.
-    type Mask;
-
+pub trait SimdPartialOrd: SimdPartialEq {
     /// Test if each lane is less than the corresponding lane in `other`.
     #[must_use = "method returns a new mask and does not mutate the original value"]
     fn simd_lt(self, other: Self) -> Self::Mask;
@@ -51,8 +48,6 @@ macro_rules! impl_integer {
         where
             LaneCount<LANES>: SupportedLaneCount,
         {
-            type Mask = Mask<<$integer as SimdElement>::Mask, LANES>;
-
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
@@ -118,8 +113,6 @@ macro_rules! impl_float {
         where
             LaneCount<LANES>: SupportedLaneCount,
         {
-            type Mask = Mask<<$float as SimdElement>::Mask, LANES>;
-
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison
@@ -161,8 +154,6 @@ macro_rules! impl_mask {
         where
             LaneCount<LANES>: SupportedLaneCount,
         {
-            type Mask = Self;
-
             #[inline]
             fn simd_lt(self, other: Self) -> Self::Mask {
                 // Safety: `self` is a vector, and the result of the comparison

From 50fbfa4ebab8c8754d625163f4fba8a1ca0ab676 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Wed, 16 Mar 2022 20:27:32 -0400
Subject: [PATCH 04/32] add bitmask roundtrip test for vector length below 8

---
 crates/core_simd/tests/masks.rs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 3aec36ca7b7..1c587630a36 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -80,6 +80,18 @@ macro_rules! test_mask_api {
                 assert_eq!(bitmask, 0b1000001101001001);
                 assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
             }
+
+            #[test]
+            fn roundtrip_bitmask_conversion_short() {
+                use core_simd::ToBitMask;
+                let values = [
+                    false, false, false, true,
+                ];
+                let mask = core_simd::Mask::<$type, 4>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b1000);
+                assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask);
+            }
         }
     }
 }

From 60555b57f1a61962e0df48cd303fbefff2e61ec3 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Thu, 17 Mar 2022 10:42:39 -0400
Subject: [PATCH 05/32] fix big-endian bitmasks smaller than a byte

---
 crates/core_simd/src/masks/full_masks.rs | 20 ++++++++++++++++----
 crates/core_simd/tests/masks.rs          |  7 +++++++
 2 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 8bbdf637de8..efa688b128f 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -68,14 +68,26 @@ where
 
 // Used for bitmask bit order workaround
 pub(crate) trait ReverseBits {
-    fn reverse_bits(self) -> Self;
+    // Reverse the least significant `n` bits of `self`.
+    // (Remaining bits must be 0.)
+    fn reverse_bits(self, n: usize) -> Self;
 }
 
 macro_rules! impl_reverse_bits {
     { $($int:ty),* } => {
         $(
         impl ReverseBits for $int {
-            fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
+            #[inline(always)]
+            fn reverse_bits(self, n: usize) -> Self {
+                let rev = <$int>::reverse_bits(self);
+                let bitsize = core::mem::size_of::<$int>() * 8;
+                if n < bitsize {
+                    // Shift things back to the right
+                    rev >> (bitsize - n)
+                } else {
+                    rev
+                }
+            }
         }
         )*
     }
@@ -137,7 +149,7 @@ where
 
         // LLVM assumes bit order should match endianness
         if cfg!(target_endian = "big") {
-            bitmask.reverse_bits()
+            bitmask.reverse_bits(LANES)
         } else {
             bitmask
         }
@@ -150,7 +162,7 @@ where
     {
         // LLVM assumes bit order should match endianness
         let bitmask = if cfg!(target_endian = "big") {
-            bitmask.reverse_bits()
+            bitmask.reverse_bits(LANES)
         } else {
             bitmask
         };
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 1c587630a36..d10c6610f50 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -84,6 +84,7 @@ macro_rules! test_mask_api {
             #[test]
             fn roundtrip_bitmask_conversion_short() {
                 use core_simd::ToBitMask;
+
                 let values = [
                     false, false, false, true,
                 ];
@@ -91,6 +92,12 @@ macro_rules! test_mask_api {
                 let bitmask = mask.to_bitmask();
                 assert_eq!(bitmask, 0b1000);
                 assert_eq!(core_simd::Mask::<$type, 4>::from_bitmask(bitmask), mask);
+
+                let values = [true, false];
+                let mask = core_simd::Mask::<$type, 2>::from_array(values);
+                let bitmask = mask.to_bitmask();
+                assert_eq!(bitmask, 0b01);
+                assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask);
             }
         }
     }

From 35e16a1e0652a0c2f351be8a23c18514bdd35f34 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Sun, 20 Mar 2022 19:17:33 -0400
Subject: [PATCH 06/32] rust-lang/portable-simd#266: reduce Miri test count in
 round.rs

---
 crates/core_simd/tests/round.rs | 2 +-
 crates/test_helpers/src/lib.rs  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/tests/round.rs b/crates/core_simd/tests/round.rs
index 7feb0320a16..484fd5bf47d 100644
--- a/crates/core_simd/tests/round.rs
+++ b/crates/core_simd/tests/round.rs
@@ -59,7 +59,7 @@ macro_rules! float_rounding_test {
                     const MAX_REPRESENTABLE_VALUE: Scalar =
                         (ALL_MANTISSA_BITS << (core::mem::size_of::<Scalar>() * 8 - <Scalar>::MANTISSA_DIGITS as usize - 1)) as Scalar;
 
-                    let mut runner = proptest::test_runner::TestRunner::default();
+                    let mut runner = test_helpers::make_runner();
                     runner.run(
                         &test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
                         |x| {
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
index 8bf7f5ed3d2..141bee18a9a 100644
--- a/crates/test_helpers/src/lib.rs
+++ b/crates/test_helpers/src/lib.rs
@@ -78,11 +78,11 @@ impl<T: core::fmt::Debug + DefaultStrategy, const LANES: usize> DefaultStrategy
 }
 
 #[cfg(not(miri))]
-fn make_runner() -> proptest::test_runner::TestRunner {
+pub fn make_runner() -> proptest::test_runner::TestRunner {
     Default::default()
 }
 #[cfg(miri)]
-fn make_runner() -> proptest::test_runner::TestRunner {
+pub fn make_runner() -> proptest::test_runner::TestRunner {
     // Only run a few tests on Miri
     proptest::test_runner::TestRunner::new(proptest::test_runner::Config::with_cases(4))
 }

From 4e14017f3d9e0379304dd8d34772786631310104 Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Sun, 3 Apr 2022 15:20:00 -0600
Subject: [PATCH 07/32] Standardize documentation for SIMD vector and mask
 types

---
 crates/core_simd/src/masks.rs        | 36 ++++++++++++-------------
 crates/core_simd/src/vector/float.rs | 14 +++++-----
 crates/core_simd/src/vector/int.rs   | 40 ++++++++++++++--------------
 crates/core_simd/src/vector/uint.rs  | 40 ++++++++++++++--------------
 4 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index e1cd7930450..a56a154b437 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -507,58 +507,58 @@ where
     }
 }
 
-/// Vector of eight 8-bit masks
+/// A 64-bit SIMD vector mask for eight elements of 8 bits.
 pub type mask8x8 = Mask<i8, 8>;
 
-/// Vector of 16 8-bit masks
+/// A 128-bit SIMD vector mask for 16 elements of 8 bits.
 pub type mask8x16 = Mask<i8, 16>;
 
-/// Vector of 32 8-bit masks
+/// A 256-bit SIMD vector mask for 32 elements of 8 bits.
 pub type mask8x32 = Mask<i8, 32>;
 
-/// Vector of 16 8-bit masks
+/// A 512-bit SIMD vector mask for 64 elements of 8 bits.
 pub type mask8x64 = Mask<i8, 64>;
 
-/// Vector of four 16-bit masks
+/// A 64-bit SIMD vector mask for four elements of 16 bits.
 pub type mask16x4 = Mask<i16, 4>;
 
-/// Vector of eight 16-bit masks
+/// A 128-bit SIMD vector mask for eight elements of 16 bits.
 pub type mask16x8 = Mask<i16, 8>;
 
-/// Vector of 16 16-bit masks
+/// A 256-bit SIMD vector mask for 16 elements of 16 bits.
 pub type mask16x16 = Mask<i16, 16>;
 
-/// Vector of 32 16-bit masks
+/// A 512-bit SIMD vector mask for 32 elements of 16 bits.
 pub type mask16x32 = Mask<i16, 32>;
 
-/// Vector of two 32-bit masks
+/// A 64-bit SIMD vector mask for two elements of 32 bits.
 pub type mask32x2 = Mask<i32, 2>;
 
-/// Vector of four 32-bit masks
+/// A 128-bit SIMD vector mask for four elements of 32 bits.
 pub type mask32x4 = Mask<i32, 4>;
 
-/// Vector of eight 32-bit masks
+/// A 256-bit SIMD vector mask for eight elements of 32 bits.
 pub type mask32x8 = Mask<i32, 8>;
 
-/// Vector of 16 32-bit masks
+/// A 512-bit SIMD vector mask for 16 elements of 32 bits.
 pub type mask32x16 = Mask<i32, 16>;
 
-/// Vector of two 64-bit masks
+/// A 128-bit SIMD vector mask for two elements of 64 bits.
 pub type mask64x2 = Mask<i64, 2>;
 
-/// Vector of four 64-bit masks
+/// A 256-bit SIMD vector mask for four elements of 64 bits.
 pub type mask64x4 = Mask<i64, 4>;
 
-/// Vector of eight 64-bit masks
+/// A 512-bit SIMD vector mask for eight elements of 64 bits.
 pub type mask64x8 = Mask<i64, 8>;
 
-/// Vector of two pointer-width masks
+/// A SIMD vector mask for two elements of pointer width.
 pub type masksizex2 = Mask<isize, 2>;
 
-/// Vector of four pointer-width masks
+/// A SIMD vector mask for four elements of pointer width.
 pub type masksizex4 = Mask<isize, 4>;
 
-/// Vector of eight pointer-width masks
+/// A SIMD vector mask for eight elements of pointer width.
 pub type masksizex8 = Mask<isize, 8>;
 
 macro_rules! impl_from {
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index fcc7f6d8d1c..f7985b64710 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -177,23 +177,23 @@ macro_rules! impl_float_vector {
 impl_float_vector! { f32, u32, i32 }
 impl_float_vector! { f64, u64, i64 }
 
-/// Vector of two `f32` values
+/// A 64-bit SIMD vector with two elements of type `f32`.
 pub type f32x2 = Simd<f32, 2>;
 
-/// Vector of four `f32` values
+/// A 128-bit SIMD vector with four elements of type `f32`.
 pub type f32x4 = Simd<f32, 4>;
 
-/// Vector of eight `f32` values
+/// A 256-bit SIMD vector with eight elements of type `f32`.
 pub type f32x8 = Simd<f32, 8>;
 
-/// Vector of 16 `f32` values
+/// A 512-bit SIMD vector with 16 elements of type `f32`.
 pub type f32x16 = Simd<f32, 16>;
 
-/// Vector of two `f64` values
+/// A 128-bit SIMD vector with two elements of type `f64`.
 pub type f64x2 = Simd<f64, 2>;
 
-/// Vector of four `f64` values
+/// A 256-bit SIMD vector with four elements of type `f64`.
 pub type f64x4 = Simd<f64, 4>;
 
-/// Vector of eight `f64` values
+/// A 512-bit SIMD vector with eight elements of type `f64`.
 pub type f64x8 = Simd<f64, 8>;
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 3eac02a2761..eec483212b3 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -42,62 +42,62 @@ impl_integer_vector! { i32 }
 impl_integer_vector! { i64 }
 impl_integer_vector! { i8 }
 
-/// Vector of two `isize` values
+/// A SIMD vector with two elements of type `isize`.
 pub type isizex2 = Simd<isize, 2>;
 
-/// Vector of four `isize` values
+/// A SIMD vector with four elements of type `isize`.
 pub type isizex4 = Simd<isize, 4>;
 
-/// Vector of eight `isize` values
+/// A SIMD vector with eight elements of type `isize`.
 pub type isizex8 = Simd<isize, 8>;
 
-/// Vector of two `i16` values
+/// A 32-bit SIMD vector with two elements of type `i16`.
 pub type i16x2 = Simd<i16, 2>;
 
-/// Vector of four `i16` values
+/// A 64-bit SIMD vector with four elements of type `i16`.
 pub type i16x4 = Simd<i16, 4>;
 
-/// Vector of eight `i16` values
+/// A 128-bit SIMD vector with eight elements of type `i16`.
 pub type i16x8 = Simd<i16, 8>;
 
-/// Vector of 16 `i16` values
+/// A 256-bit SIMD vector with 16 elements of type `i16`.
 pub type i16x16 = Simd<i16, 16>;
 
-/// Vector of 32 `i16` values
+/// A 512-bit SIMD vector with 32 elements of type `i16`.
 pub type i16x32 = Simd<i16, 32>;
 
-/// Vector of two `i32` values
+/// A 64-bit SIMD vector with two elements of type `i32`.
 pub type i32x2 = Simd<i32, 2>;
 
-/// Vector of four `i32` values
+/// A 128-bit SIMD vector with four elements of type `i32`.
 pub type i32x4 = Simd<i32, 4>;
 
-/// Vector of eight `i32` values
+/// A 256-bit SIMD vector with eight elements of type `i32`.
 pub type i32x8 = Simd<i32, 8>;
 
-/// Vector of 16 `i32` values
+/// A 512-bit SIMD vector with 16 elements of type `i32`.
 pub type i32x16 = Simd<i32, 16>;
 
-/// Vector of two `i64` values
+/// A 128-bit SIMD vector with two elements of type `i64`.
 pub type i64x2 = Simd<i64, 2>;
 
-/// Vector of four `i64` values
+/// A 256-bit SIMD vector with four elements of type `i64`.
 pub type i64x4 = Simd<i64, 4>;
 
-/// Vector of eight `i64` values
+/// A 512-bit SIMD vector with eight elements of type `i64`.
 pub type i64x8 = Simd<i64, 8>;
 
-/// Vector of four `i8` values
+/// A 32-bit SIMD vector with four elements of type `i8`.
 pub type i8x4 = Simd<i8, 4>;
 
-/// Vector of eight `i8` values
+/// A 64-bit SIMD vector with eight elements of type `i8`.
 pub type i8x8 = Simd<i8, 8>;
 
-/// Vector of 16 `i8` values
+/// A 128-bit SIMD vector with 16 elements of type `i8`.
 pub type i8x16 = Simd<i8, 16>;
 
-/// Vector of 32 `i8` values
+/// A 256-bit SIMD vector with 32 elements of type `i8`.
 pub type i8x32 = Simd<i8, 32>;
 
-/// Vector of 64 `i8` values
+/// A 512-bit SIMD vector with 64 elements of type `i8`.
 pub type i8x64 = Simd<i8, 64>;
diff --git a/crates/core_simd/src/vector/uint.rs b/crates/core_simd/src/vector/uint.rs
index ed91fc3640e..b4a69c44363 100644
--- a/crates/core_simd/src/vector/uint.rs
+++ b/crates/core_simd/src/vector/uint.rs
@@ -2,62 +2,62 @@
 
 use crate::simd::Simd;
 
-/// Vector of two `usize` values
+/// A SIMD vector with two elements of type `usize`.
 pub type usizex2 = Simd<usize, 2>;
 
-/// Vector of four `usize` values
+/// A SIMD vector with four elements of type `usize`.
 pub type usizex4 = Simd<usize, 4>;
 
-/// Vector of eight `usize` values
+/// A SIMD vector with eight elements of type `usize`.
 pub type usizex8 = Simd<usize, 8>;
 
-/// Vector of two `u16` values
+/// A 32-bit SIMD vector with two elements of type `u16`.
 pub type u16x2 = Simd<u16, 2>;
 
-/// Vector of four `u16` values
+/// A 64-bit SIMD vector with four elements of type `u16`.
 pub type u16x4 = Simd<u16, 4>;
 
-/// Vector of eight `u16` values
+/// A 128-bit SIMD vector with eight elements of type `u16`.
 pub type u16x8 = Simd<u16, 8>;
 
-/// Vector of 16 `u16` values
+/// A 256-bit SIMD vector with 16 elements of type `u16`.
 pub type u16x16 = Simd<u16, 16>;
 
-/// Vector of 32 `u16` values
+/// A 512-bit SIMD vector with 32 elements of type `u16`.
 pub type u16x32 = Simd<u16, 32>;
 
-/// Vector of two `u32` values
+/// A 64-bit SIMD vector with two elements of type `u32`.
 pub type u32x2 = Simd<u32, 2>;
 
-/// Vector of four `u32` values
+/// A 128-bit SIMD vector with four elements of type `u32`.
 pub type u32x4 = Simd<u32, 4>;
 
-/// Vector of eight `u32` values
+/// A 256-bit SIMD vector with eight elements of type `u32`.
 pub type u32x8 = Simd<u32, 8>;
 
-/// Vector of 16 `u32` values
+/// A 512-bit SIMD vector with 16 elements of type `u32`.
 pub type u32x16 = Simd<u32, 16>;
 
-/// Vector of two `u64` values
+/// A 128-bit SIMD vector with two elements of type `u64`.
 pub type u64x2 = Simd<u64, 2>;
 
-/// Vector of four `u64` values
+/// A 256-bit SIMD vector with four elements of type `u64`.
 pub type u64x4 = Simd<u64, 4>;
 
-/// Vector of eight `u64` values
+/// A 512-bit SIMD vector with eight elements of type `u64`.
 pub type u64x8 = Simd<u64, 8>;
 
-/// Vector of four `u8` values
+/// A 32-bit SIMD vector with four elements of type `u8`.
 pub type u8x4 = Simd<u8, 4>;
 
-/// Vector of eight `u8` values
+/// A 64-bit SIMD vector with eight elements of type `u8`.
 pub type u8x8 = Simd<u8, 8>;
 
-/// Vector of 16 `u8` values
+/// A 128-bit SIMD vector with 16 elements of type `u8`.
 pub type u8x16 = Simd<u8, 16>;
 
-/// Vector of 32 `u8` values
+/// A 256-bit SIMD vector with 32 elements of type `u8`.
 pub type u8x32 = Simd<u8, 32>;
 
-/// Vector of 64 `u8` values
+/// A 512-bit SIMD vector with 64 elements of type `u8`.
 pub type u8x64 = Simd<u8, 64>;

From c73f1fbdde8bd1c257f2f683a595a5c434f32a2d Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Sun, 3 Apr 2022 15:59:22 -0600
Subject: [PATCH 08/32] Update crates/core_simd/src/masks.rs

Co-authored-by: Jacob Lifshay <programmerjake@gmail.com>
---
 crates/core_simd/src/masks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index a56a154b437..ba4158b3cfc 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -507,7 +507,7 @@ where
     }
 }
 
-/// A 64-bit SIMD vector mask for eight elements of 8 bits.
+/// A SIMD vector mask of eight elements, for operating with SIMD vectors with 8 bit elements. 
 pub type mask8x8 = Mask<i8, 8>;
 
 /// A 128-bit SIMD vector mask for 16 elements of 8 bits.

From 21b070ce4372c5141cb48ebd6fc6ca45030d703c Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Sun, 3 Apr 2022 16:15:27 -0600
Subject: [PATCH 09/32] Correct the Mask docs, and get them to fit in search
 results

---
 crates/core_simd/src/masks.rs | 36 +++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index ba4158b3cfc..d0021c91142 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -507,58 +507,58 @@ where
     }
 }
 
-/// A SIMD vector mask of eight elements, for operating with SIMD vectors with 8 bit elements. 
+/// A mask for SIMD vectors with eight elements of 8 bits.
 pub type mask8x8 = Mask<i8, 8>;
 
-/// A 128-bit SIMD vector mask for 16 elements of 8 bits.
+/// A mask for SIMD vectors with 16 elements of 8 bits.
 pub type mask8x16 = Mask<i8, 16>;
 
-/// A 256-bit SIMD vector mask for 32 elements of 8 bits.
+/// A mask for SIMD vectors with 32 elements of 8 bits.
 pub type mask8x32 = Mask<i8, 32>;
 
-/// A 512-bit SIMD vector mask for 64 elements of 8 bits.
+/// A mask for SIMD vectors with 64 elements of 8 bits.
 pub type mask8x64 = Mask<i8, 64>;
 
-/// A 64-bit SIMD vector mask for four elements of 16 bits.
+/// A mask for SIMD vectors with four elements of 16 bits.
 pub type mask16x4 = Mask<i16, 4>;
 
-/// A 128-bit SIMD vector mask for eight elements of 16 bits.
+/// A mask for SIMD vectors with eight elements of 16 bits.
 pub type mask16x8 = Mask<i16, 8>;
 
-/// A 256-bit SIMD vector mask for 16 elements of 16 bits.
+/// A mask for SIMD vectors with 16 elements of 16 bits.
 pub type mask16x16 = Mask<i16, 16>;
 
-/// A 512-bit SIMD vector mask for 32 elements of 16 bits.
+/// A mask for SIMD vectors with 32 elements of 16 bits.
 pub type mask16x32 = Mask<i16, 32>;
 
-/// A 64-bit SIMD vector mask for two elements of 32 bits.
+/// A mask for SIMD vectors with two elements of 32 bits.
 pub type mask32x2 = Mask<i32, 2>;
 
-/// A 128-bit SIMD vector mask for four elements of 32 bits.
+/// A mask for SIMD vectors with four elements of 32 bits.
 pub type mask32x4 = Mask<i32, 4>;
 
-/// A 256-bit SIMD vector mask for eight elements of 32 bits.
+/// A mask for SIMD vectors with eight elements of 32 bits.
 pub type mask32x8 = Mask<i32, 8>;
 
-/// A 512-bit SIMD vector mask for 16 elements of 32 bits.
+/// A mask for SIMD vectors with 16 elements of 32 bits.
 pub type mask32x16 = Mask<i32, 16>;
 
-/// A 128-bit SIMD vector mask for two elements of 64 bits.
+/// A mask for SIMD vectors with two elements of 64 bits.
 pub type mask64x2 = Mask<i64, 2>;
 
-/// A 256-bit SIMD vector mask for four elements of 64 bits.
+/// A mask for SIMD vectors with four elements of 64 bits.
 pub type mask64x4 = Mask<i64, 4>;
 
-/// A 512-bit SIMD vector mask for eight elements of 64 bits.
+/// A mask for SIMD vectors with eight elements of 64 bits.
 pub type mask64x8 = Mask<i64, 8>;
 
-/// A SIMD vector mask for two elements of pointer width.
+/// A mask for SIMD vectors with two elements of pointer width.
 pub type masksizex2 = Mask<isize, 2>;
 
-/// A SIMD vector mask for four elements of pointer width.
+/// A mask for SIMD vectors with four elements of pointer width.
 pub type masksizex4 = Mask<isize, 4>;
 
-/// A SIMD vector mask for eight elements of pointer width.
+/// A mask for SIMD vectors with eight elements of pointer width.
 pub type masksizex8 = Mask<isize, 8>;
 
 macro_rules! impl_from {

From 8cd9325e206bf362071b71a812849a3b463bcbcc Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Sun, 3 Apr 2022 16:22:31 -0600
Subject: [PATCH 10/32] Add a *small* blurb to Mask that is likely
 unobjectionable

---
 crates/core_simd/src/masks.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index d0021c91142..b97be97f7e6 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -77,6 +77,8 @@ impl_element! { isize }
 
 /// A SIMD vector mask for `LANES` elements of width specified by `Element`.
 ///
+/// Masks represent boolean inclusion/exclusion on a per-lane basis.
+///
 /// The layout of this type is unspecified.
 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(mask_impl::Mask<T, LANES>)

From 7136841cbd22ba66dbd49331f276bdb16401ec11 Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Mon, 11 Apr 2022 00:05:14 -0600
Subject: [PATCH 11/32] rust-lang/portable-simd#274: Use SIMD equality for
 PartialEq on SIMD vectors

---
 crates/core_simd/src/vector.rs | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 13e35ecfa49..d032f5459fd 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -429,8 +429,26 @@ where
 {
     #[inline]
     fn eq(&self, other: &Self) -> bool {
-        // TODO use SIMD equality
-        self.to_array() == other.to_array()
+        // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
+        let mask = unsafe {
+            let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_eq(*self, *other);
+            Mask::from_int_unchecked(tfvec)
+        };
+
+        // Two vectors are equal if all lanes tested true for vertical equality.
+        mask.all()
+    }
+
+    #[inline]
+    fn ne(&self, other: &Self) -> bool {
+        // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.
+        let mask = unsafe {
+            let tfvec: Simd<<T as SimdElement>::Mask, LANES> = intrinsics::simd_ne(*self, *other);
+            Mask::from_int_unchecked(tfvec)
+        };
+
+        // Two vectors are non-equal if any lane tested true for vertical non-equality.
+        mask.any()
     }
 }
 

From fcc5ca0f93e751f2e748c7654c78a541c0392575 Mon Sep 17 00:00:00 2001
From: Sean Stangl <sean.stangl@gmail.com>
Date: Mon, 11 Apr 2022 00:08:34 -0600
Subject: [PATCH 12/32] rust-lang/portable-simd#273: Documentation update for
 reduce functions, swizzle

Working through giving example documentation to every Simd function.

The major change in this patch is using doc macros to generate
type-specific examples for each function, using a visually-apparent type
constructor. This makes it feel nicer to have twelve separate
documentation entries for reduce_product(), for example.
---
 crates/core_simd/src/lane_count.rs |   8 +-
 crates/core_simd/src/reduction.rs  | 120 ++++++++++++++++++++++++++++-
 crates/core_simd/src/swizzle.rs    |  52 +++++++------
 crates/core_simd/src/vector.rs     |  48 +++++++++++-
 4 files changed, 195 insertions(+), 33 deletions(-)

diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs
index 3b316f12b3e..63723e2ec13 100644
--- a/crates/core_simd/src/lane_count.rs
+++ b/crates/core_simd/src/lane_count.rs
@@ -3,7 +3,7 @@ mod sealed {
 }
 use sealed::Sealed;
 
-/// A type representing a vector lane count.
+/// Specifies the number of lanes in a SIMD vector as a type.
 pub struct LaneCount<const LANES: usize>;
 
 impl<const LANES: usize> LaneCount<LANES> {
@@ -11,7 +11,11 @@ impl<const LANES: usize> LaneCount<LANES> {
     pub const BITMASK_LEN: usize = (LANES + 7) / 8;
 }
 
-/// Helper trait for vector lane counts.
+/// Statically guarantees that a lane count is marked as supported.
+///
+/// This trait is *sealed*: the list of implementors below is total.
+/// Users do not have the ability to mark additional `LaneCount<N>` values as supported.
+/// Only SIMD vectors with supported lane counts are constructable.
 pub trait SupportedLaneCount: Sealed {
     #[doc(hidden)]
     type BitMask: Copy + Default + AsRef<[u8]> + AsMut<[u8]>;
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 3177fd167fc..642ab319cdd 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -12,13 +12,41 @@ macro_rules! impl_integer_reductions {
             LaneCount<LANES>: SupportedLaneCount,
         {
             /// Reducing wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
+            /// assert_eq!(v.reduce_sum(), 10);
+            ///
+            /// // SIMD integer addition is always wrapping
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 1, 0, 0]);")]
+            #[doc = concat!("assert_eq!(v.reduce_sum(), ", stringify!($scalar), "::MIN);")]
+            /// ```
             #[inline]
             pub fn reduce_sum(self) -> $scalar {
                 // Safety: `self` is an integer vector
                 unsafe { simd_reduce_add_ordered(self, 0) }
             }
 
-            /// Reducing wrapping multiply.  Returns the product of the lanes of the vector, with wrapping multiplication.
+            /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
+            /// assert_eq!(v.reduce_product(), 24);
+            ///
+            /// // SIMD integer multiplication is always wrapping
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 2, 1, 1]);")]
+            #[doc = concat!("assert!(v.reduce_product() < ", stringify!($scalar), "::MAX);")]
+            /// ```
             #[inline]
             pub fn reduce_product(self) -> $scalar {
                 // Safety: `self` is an integer vector
@@ -26,6 +54,16 @@ macro_rules! impl_integer_reductions {
             }
 
             /// Reducing maximum.  Returns the maximum lane in the vector.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
+            /// assert_eq!(v.reduce_max(), 4);
+            /// ```
             #[inline]
             pub fn reduce_max(self) -> $scalar {
                 // Safety: `self` is an integer vector
@@ -33,6 +71,16 @@ macro_rules! impl_integer_reductions {
             }
 
             /// Reducing minimum.  Returns the minimum lane in the vector.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
+            /// assert_eq!(v.reduce_min(), 1);
+            /// ```
             #[inline]
             pub fn reduce_min(self) -> $scalar {
                 // Safety: `self` is an integer vector
@@ -61,6 +109,16 @@ macro_rules! impl_float_reductions {
         {
 
             /// Reducing add.  Returns the sum of the lanes of the vector.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")]
+            /// assert_eq!(v.reduce_sum(), 3.);
+            /// ```
             #[inline]
             pub fn reduce_sum(self) -> $scalar {
                 // LLVM sum is inaccurate on i586
@@ -73,6 +131,16 @@ macro_rules! impl_float_reductions {
             }
 
             /// Reducing multiply.  Returns the product of the lanes of the vector.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 4.]);")]
+            /// assert_eq!(v.reduce_product(), 12.);
+            /// ```
             #[inline]
             pub fn reduce_product(self) -> $scalar {
                 // LLVM product is inaccurate on i586
@@ -87,7 +155,30 @@ macro_rules! impl_float_reductions {
             /// Reducing maximum.  Returns the maximum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
-            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+            /// return either.
+            ///
+            /// This function will not return `NaN` unless all lanes are `NaN`.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")]
+            /// assert_eq!(v.reduce_max(), 2.);
+            ///
+            /// // NaN values are skipped...
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")]
+            /// assert_eq!(v.reduce_max(), 1.);
+            ///
+            /// // ...unless all values are NaN
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([",
+                stringify!($scalar), "::NAN, ",
+                stringify!($scalar), "::NAN]);"
+            )]
+            /// assert!(v.reduce_max().is_nan());
+            /// ```
             #[inline]
             pub fn reduce_max(self) -> $scalar {
                 // Safety: `self` is a float vector
@@ -97,7 +188,30 @@ macro_rules! impl_float_reductions {
             /// Reducing minimum.  Returns the minimum lane in the vector.
             ///
             /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
-            /// return either.  This function will not return `NaN` unless all lanes are `NaN`.
+            /// return either.
+            ///
+            /// This function will not return `NaN` unless all lanes are `NaN`.
+            ///
+            /// # Examples
+            ///
+            /// ```
+            /// # #![feature(portable_simd)]
+            /// # use core::simd::Simd;
+            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 7.]);")]
+            /// assert_eq!(v.reduce_min(), 3.);
+            ///
+            /// // NaN values are skipped...
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")]
+            /// assert_eq!(v.reduce_min(), 1.);
+            ///
+            /// // ...unless all values are NaN
+            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([",
+                stringify!($scalar), "::NAN, ",
+                stringify!($scalar), "::NAN]);"
+            )]
+            /// assert!(v.reduce_min().is_nan());
+            /// ```
             #[inline]
             pub fn reduce_min(self) -> $scalar {
                 // Safety: `self` is a float vector
diff --git a/crates/core_simd/src/swizzle.rs b/crates/core_simd/src/swizzle.rs
index ef47c4f3a4c..22999d24950 100644
--- a/crates/core_simd/src/swizzle.rs
+++ b/crates/core_simd/src/swizzle.rs
@@ -1,44 +1,46 @@
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
 
-/// Constructs a new vector by selecting values from the lanes of the source vector or vectors to use.
+/// Constructs a new SIMD vector by copying elements from selected lanes in other vectors.
 ///
-/// When swizzling one vector, the indices of the result vector are indicated by a `const` array
-/// of `usize`, like [`Swizzle`].
-/// When swizzling two vectors, the indices are indicated by a `const` array of [`Which`], like
-/// [`Swizzle2`].
+/// When swizzling one vector, lanes are selected by a `const` array of `usize`,
+/// like [`Swizzle`].
+///
+/// When swizzling two vectors, lanes are selected by a `const` array of [`Which`],
+/// like [`Swizzle2`].
 ///
 /// # Examples
-/// ## One source vector
+///
+/// With a single SIMD vector, the const array specifies lane indices in that vector:
 /// ```
 /// # #![feature(portable_simd)]
-/// # use core::simd::{Simd, simd_swizzle};
-/// let v = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
+/// # use core::simd::{u32x2, u32x4, simd_swizzle};
+/// let v = u32x4::from_array([10, 11, 12, 13]);
 ///
 /// // Keeping the same size
-/// let r = simd_swizzle!(v, [3, 0, 1, 2]);
-/// assert_eq!(r.to_array(), [3., 0., 1., 2.]);
+/// let r: u32x4 = simd_swizzle!(v, [3, 0, 1, 2]);
+/// assert_eq!(r.to_array(), [13, 10, 11, 12]);
 ///
 /// // Changing the number of lanes
-/// let r = simd_swizzle!(v, [3, 1]);
-/// assert_eq!(r.to_array(), [3., 1.]);
+/// let r: u32x2 = simd_swizzle!(v, [3, 1]);
+/// assert_eq!(r.to_array(), [13, 11]);
 /// ```
 ///
-/// ## Two source vectors
+/// With two input SIMD vectors, the const array uses `Which` to specify the source of each index:
 /// ```
 /// # #![feature(portable_simd)]
-/// # use core::simd::{Simd, simd_swizzle, Which};
-/// use Which::*;
-/// let a = Simd::<f32, 4>::from_array([0., 1., 2., 3.]);
-/// let b = Simd::<f32, 4>::from_array([4., 5., 6., 7.]);
+/// # use core::simd::{u32x2, u32x4, simd_swizzle, Which};
+/// use Which::{First, Second};
+/// let a = u32x4::from_array([0, 1, 2, 3]);
+/// let b = u32x4::from_array([4, 5, 6, 7]);
 ///
 /// // Keeping the same size
-/// let r = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
-/// assert_eq!(r.to_array(), [0., 1., 6., 7.]);
+/// let r: u32x4 = simd_swizzle!(a, b, [First(0), First(1), Second(2), Second(3)]);
+/// assert_eq!(r.to_array(), [0, 1, 6, 7]);
 ///
 /// // Changing the number of lanes
-/// let r = simd_swizzle!(a, b, [First(0), Second(0)]);
-/// assert_eq!(r.to_array(), [0., 4.]);
+/// let r: u32x2 = simd_swizzle!(a, b, [First(0), Second(0)]);
+/// assert_eq!(r.to_array(), [0, 4]);
 /// ```
 #[allow(unused_macros)]
 pub macro simd_swizzle {
@@ -68,12 +70,14 @@ pub macro simd_swizzle {
     }
 }
 
-/// An index into one of two vectors.
+/// Specifies a lane index into one of two SIMD vectors.
+///
+/// This is an input type for [Swizzle2] and helper macros like [simd_swizzle].
 #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub enum Which {
-    /// Indexes the first vector.
+    /// Index of a lane in the first input SIMD vector.
     First(usize),
-    /// Indexes the second vector.
+    /// Index of a lane in the second input SIMD vector.
     Second(usize),
 }
 
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index d032f5459fd..f7989ee762a 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -99,17 +99,44 @@ where
     /// Number of lanes in this vector.
     pub const LANES: usize = LANES;
 
-    /// Get the number of lanes in this vector.
+    /// Returns the number of lanes in this SIMD vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::u32x4;
+    /// let v = u32x4::splat(0);
+    /// assert_eq!(v.lanes(), 4);
+    /// ```
     pub const fn lanes(&self) -> usize {
         LANES
     }
 
-    /// Construct a SIMD vector by setting all lanes to the given value.
+    /// Constructs a new SIMD vector with all lanes set to the given value.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::u32x4;
+    /// let v = u32x4::splat(8);
+    /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
+    /// ```
     pub const fn splat(value: T) -> Self {
         Self([value; LANES])
     }
 
     /// Returns an array reference containing the entire SIMD vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::{Simd, u64x4};
+    /// let v: u64x4 = Simd::from_array([0, 1, 2, 3]);
+    /// assert_eq!(v.as_array(), &[0, 1, 2, 3]);
+    /// ```
     pub const fn as_array(&self) -> &[T; LANES] {
         &self.0
     }
@@ -129,9 +156,21 @@ where
         self.0
     }
 
-    /// Converts a slice to a SIMD vector containing `slice[..LANES]`
+    /// Converts a slice to a SIMD vector containing `slice[..LANES]`.
+    ///
     /// # Panics
-    /// `from_slice` will panic if the slice's `len` is less than the vector's `Simd::LANES`.
+    ///
+    /// Panics if the slice's length is less than the vector's `Simd::LANES`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::{Simd, u32x4};
+    /// let source = vec![1, 2, 3, 4, 5, 6];
+    /// let v = u32x4::from_slice(&source);
+    /// assert_eq!(v.as_array(), &[1, 2, 3, 4]);
+    /// ```
     #[must_use]
     pub const fn from_slice(slice: &[T]) -> Self {
         assert!(
@@ -148,6 +187,7 @@ where
     }
 
     /// Performs lanewise conversion of a SIMD vector's elements to another SIMD-valid type.
+    ///
     /// This follows the semantics of Rust's `as` conversion for casting
     /// integers to unsigned integers (interpreting as the other type, so `-1` to `MAX`),
     /// and from floats to integers (truncating, or saturating at the limits) for each lane,

From 9718639d61b32d4efd2fac330ab1058732b3b758 Mon Sep 17 00:00:00 2001
From: Andrew Straw <strawman@astraw.com>
Date: Mon, 11 Apr 2022 21:17:44 +0200
Subject: [PATCH 13/32] rust-lang/portable-simd#276: Mention slice methods
 as_simd() and as_simd_mut()

This links to a practical suggestion for how to solve the issues brought up in this section.
---
 beginners-guide.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/beginners-guide.md b/beginners-guide.md
index 75158e5aa85..17ade06ae80 100644
--- a/beginners-guide.md
+++ b/beginners-guide.md
@@ -82,5 +82,10 @@ Fortunately, most SIMD types have a fairly predictable size. `i32x4` is bit-equi
 
 However, this is not the same as alignment. Computer architectures generally prefer aligned accesses, especially when moving data between memory and vector registers, and while some support specialized operations that can bend the rules to help with this, unaligned access is still typically slow, or even undefined behavior. In addition, different architectures can require different alignments when interacting with their native SIMD types. For this reason, any `#[repr(simd)]` type has a non-portable alignment. If it is necessary to directly interact with the alignment of these types, it should be via [`mem::align_of`].
 
+When working with slices, data correctly aligned for SIMD can be acquired using the [`as_simd`] and [`as_simd_mut`] methods of the slice primitive.
+
 [`mem::transmute`]: https://doc.rust-lang.org/core/mem/fn.transmute.html
 [`mem::align_of`]: https://doc.rust-lang.org/core/mem/fn.align_of.html
+[`as_simd`]: https://doc.rust-lang.org/nightly/std/primitive.slice.html#method.as_simd
+[`as_simd_mut`]: https://doc.rust-lang.org/nightly/std/primitive.slice.html#method.as_simd_mut
+

From 376957ad8cac70a85c08a26a147eb534d5cee380 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 11 Apr 2022 01:38:07 -0400
Subject: [PATCH 14/32] Move integer functions to traits.

---
 crates/core_simd/src/elements.rs      |   5 +
 crates/core_simd/src/elements/int.rs  | 273 ++++++++++++++++++++++++++
 crates/core_simd/src/elements/uint.rs | 128 ++++++++++++
 crates/core_simd/src/math.rs          | 156 ---------------
 crates/core_simd/src/mod.rs           |   3 +-
 crates/core_simd/src/reduction.rs     | 144 +-------------
 crates/core_simd/src/vector/int.rs    |  42 +---
 crates/core_simd/tests/ops_macros.rs  |   2 +
 8 files changed, 413 insertions(+), 340 deletions(-)
 create mode 100644 crates/core_simd/src/elements.rs
 create mode 100644 crates/core_simd/src/elements/int.rs
 create mode 100644 crates/core_simd/src/elements/uint.rs
 delete mode 100644 crates/core_simd/src/math.rs

diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/elements.rs
new file mode 100644
index 00000000000..0fb1f5b9fe9
--- /dev/null
+++ b/crates/core_simd/src/elements.rs
@@ -0,0 +1,5 @@
+mod int;
+mod uint;
+
+pub use int::*;
+pub use uint::*;
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
new file mode 100644
index 00000000000..61135427456
--- /dev/null
+++ b/crates/core_simd/src/elements/int.rs
@@ -0,0 +1,273 @@
+use crate::simd::{
+    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount,
+};
+
+/// Operations on SIMD vectors of signed integers.
+pub trait SimdInt: Sized {
+    /// Mask type used for manipulating this SIMD vector type.
+    type Mask;
+
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Lanewise saturating add.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, 0, 1, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x + max;
+    /// let sat = x.saturating_add(max);
+    /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
+    /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
+    /// ```
+    fn saturating_add(self, second: Self) -> Self;
+
+    /// Lanewise saturating subtract.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, -2, -1, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x - max;
+    /// let sat = x.saturating_sub(max);
+    /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
+    /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
+    fn saturating_sub(self, second: Self) -> Self;
+
+    /// Lanewise absolute value, implemented in Rust.
+    /// Every lane becomes its absolute value.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::i32::{MIN, MAX};
+    /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
+    /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
+    /// ```
+    fn abs(self) -> Self;
+
+    /// Lanewise saturating absolute value, implemented in Rust.
+    /// As abs(), except the MIN value becomes MAX instead of itself.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::i32::{MIN, MAX};
+    /// let xs = Simd::from_array([MIN, -2, 0, 3]);
+    /// let unsat = xs.abs();
+    /// let sat = xs.saturating_abs();
+    /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
+    /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
+    /// ```
+    fn saturating_abs(self) -> Self;
+
+    /// Lanewise saturating negation, implemented in Rust.
+    /// As neg(), except the MIN value becomes MAX instead of itself.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::i32::{MIN, MAX};
+    /// let x = Simd::from_array([MIN, -2, 3, MAX]);
+    /// let unsat = -x;
+    /// let sat = x.saturating_neg();
+    /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
+    /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
+    /// ```
+    fn saturating_neg(self) -> Self;
+
+    /// Returns true for each positive lane and false if it is zero or negative.
+    fn is_positive(self) -> Self::Mask;
+
+    /// Returns true for each negative lane and false if it is zero or positive.
+    fn is_negative(self) -> Self::Mask;
+
+    /// Returns numbers representing the sign of each lane.
+    /// * `0` if the number is zero
+    /// * `1` if the number is positive
+    /// * `-1` if the number is negative
+    fn signum(self) -> Self;
+
+    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::i32x4;
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_sum(), 10);
+    ///
+    /// // SIMD integer addition is always wrapping
+    /// let v = i32x4::from_array([i32::MAX, 1, 0, 0]);
+    /// assert_eq!(v.reduce_sum(), i32::MIN);
+    /// ```
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::i32x4;
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_product(), 24);
+    ///
+    /// // SIMD integer multiplication is always wrapping
+    /// let v = i32x4::from_array([i32::MAX, 2, 1, 1]);
+    /// assert!(v.reduce_product() < i32::MAX);
+    /// ```
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::i32x4;
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_max(), 4);
+    /// ```
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::i32x4;
+    /// let v = i32x4::from_array([1, 2, 3, 4]);
+    /// assert_eq!(v.reduce_min(), 1);
+    /// ```
+    fn reduce_min(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    fn reduce_and(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    fn reduce_or(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    fn reduce_xor(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$ty as SimdElement>::Mask, LANES>;
+            type Scalar = $ty;
+
+            #[inline]
+            fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_add(self, second) }
+            }
+
+            #[inline]
+            fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_sub(self, second) }
+            }
+
+            #[inline]
+            fn abs(self) -> Self {
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
+                let m = self >> Simd::splat(SHR);
+                (self^m) - m
+            }
+
+            #[inline]
+            fn saturating_abs(self) -> Self {
+                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
+                const SHR: $ty = <$ty>::BITS as $ty - 1;
+                let m = self >> Simd::splat(SHR);
+                (self^m).saturating_sub(m)
+            }
+
+            #[inline]
+            fn saturating_neg(self) -> Self {
+                Self::splat(0).saturating_sub(self)
+            }
+
+            #[inline]
+            fn is_positive(self) -> Self::Mask {
+                self.simd_gt(Self::splat(0))
+            }
+
+            #[inline]
+            fn is_negative(self) -> Self::Mask {
+                self.simd_lt(Self::splat(0))
+            }
+
+            #[inline]
+            fn signum(self) -> Self {
+                self.is_positive().select(
+                    Self::splat(1),
+                    self.is_negative().select(Self::splat(-1), Self::splat(0))
+                )
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+
+            #[inline]
+            fn reduce_and(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_and(self) }
+            }
+
+            #[inline]
+            fn reduce_or(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_or(self) }
+            }
+
+            #[inline]
+            fn reduce_xor(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_xor(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { i8, i16, i32, i64, isize }
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
new file mode 100644
index 00000000000..da3213535a3
--- /dev/null
+++ b/crates/core_simd/src/elements/uint.rs
@@ -0,0 +1,128 @@
+use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount};
+
+/// Operations on SIMD vectors of unsigned integers.
+pub trait SimdUint: Sized {
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Lanewise saturating add.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::u32::MAX;
+    /// let x = Simd::from_array([2, 1, 0, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x + max;
+    /// let sat = x.saturating_add(max);
+    /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
+    /// assert_eq!(sat, max);
+    /// ```
+    fn saturating_add(self, second: Self) -> Self;
+
+    /// Lanewise saturating subtract.
+    ///
+    /// # Examples
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::Simd;
+    /// use core::u32::MAX;
+    /// let x = Simd::from_array([2, 1, 0, MAX]);
+    /// let max = Simd::splat(MAX);
+    /// let unsat = x - max;
+    /// let sat = x.saturating_sub(max);
+    /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
+    /// assert_eq!(sat, Simd::splat(0));
+    fn saturating_sub(self, second: Self) -> Self;
+
+    /// Returns the sum of the lanes of the vector, with wrapping addition.
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Returns the product of the lanes of the vector, with wrapping multiplication.
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    fn reduce_min(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "and" across the lanes of the vector.
+    fn reduce_and(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "or" across the lanes of the vector.
+    fn reduce_or(self) -> Self::Scalar;
+
+    /// Returns the cumulative bitwise "xor" across the lanes of the vector.
+    fn reduce_xor(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty),* } => {
+        $(
+        impl<const LANES: usize> SimdUint for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Scalar = $ty;
+
+            #[inline]
+            fn saturating_add(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_add(self, second) }
+            }
+
+            #[inline]
+            fn saturating_sub(self, second: Self) -> Self {
+                // Safety: `self` is a vector
+                unsafe { intrinsics::simd_saturating_sub(self, second) }
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_add_ordered(self, 0) }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_mul_ordered(self, 1) }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+
+            #[inline]
+            fn reduce_and(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_and(self) }
+            }
+
+            #[inline]
+            fn reduce_or(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_or(self) }
+            }
+
+            #[inline]
+            fn reduce_xor(self) -> Self::Scalar {
+                // Safety: `self` is an integer vector
+                unsafe { intrinsics::simd_reduce_xor(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { u8, u16, u32, u64, usize }
diff --git a/crates/core_simd/src/math.rs b/crates/core_simd/src/math.rs
deleted file mode 100644
index 606021e983e..00000000000
--- a/crates/core_simd/src/math.rs
+++ /dev/null
@@ -1,156 +0,0 @@
-use crate::simd::intrinsics::{simd_saturating_add, simd_saturating_sub};
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
-
-macro_rules! impl_uint_arith {
-    ($($ty:ty),+) => {
-        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
-
-            /// Lanewise saturating add.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
-            /// let x = Simd::from_array([2, 1, 0, MAX]);
-            /// let max = Simd::splat(MAX);
-            /// let unsat = x + max;
-            /// let sat = x.saturating_add(max);
-            /// assert_eq!(unsat, Simd::from_array([1, 0, MAX, MAX - 1]));
-            /// assert_eq!(sat, max);
-            /// ```
-            #[inline]
-            pub fn saturating_add(self, second: Self) -> Self {
-                // Safety: `self` is a vector
-                unsafe { simd_saturating_add(self, second) }
-            }
-
-            /// Lanewise saturating subtract.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::MAX;")]
-            /// let x = Simd::from_array([2, 1, 0, MAX]);
-            /// let max = Simd::splat(MAX);
-            /// let unsat = x - max;
-            /// let sat = x.saturating_sub(max);
-            /// assert_eq!(unsat, Simd::from_array([3, 2, 1, 0]));
-            /// assert_eq!(sat, Simd::splat(0));
-            #[inline]
-            pub fn saturating_sub(self, second: Self) -> Self {
-                // Safety: `self` is a vector
-                unsafe { simd_saturating_sub(self, second) }
-            }
-        })+
-    }
-}
-
-macro_rules! impl_int_arith {
-    ($($ty:ty),+) => {
-        $( impl<const LANES: usize> Simd<$ty, LANES> where LaneCount<LANES>: SupportedLaneCount {
-
-            /// Lanewise saturating add.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
-            /// let x = Simd::from_array([MIN, 0, 1, MAX]);
-            /// let max = Simd::splat(MAX);
-            /// let unsat = x + max;
-            /// let sat = x.saturating_add(max);
-            /// assert_eq!(unsat, Simd::from_array([-1, MAX, MIN, -2]));
-            /// assert_eq!(sat, Simd::from_array([-1, MAX, MAX, MAX]));
-            /// ```
-            #[inline]
-            pub fn saturating_add(self, second: Self) -> Self {
-                // Safety: `self` is a vector
-                unsafe { simd_saturating_add(self, second) }
-            }
-
-            /// Lanewise saturating subtract.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
-            /// let x = Simd::from_array([MIN, -2, -1, MAX]);
-            /// let max = Simd::splat(MAX);
-            /// let unsat = x - max;
-            /// let sat = x.saturating_sub(max);
-            /// assert_eq!(unsat, Simd::from_array([1, MAX, MIN, 0]));
-            /// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
-            #[inline]
-            pub fn saturating_sub(self, second: Self) -> Self {
-                // Safety: `self` is a vector
-                unsafe { simd_saturating_sub(self, second) }
-            }
-
-            /// Lanewise absolute value, implemented in Rust.
-            /// Every lane becomes its absolute value.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
-            /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
-            /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
-            /// ```
-            #[inline]
-            pub fn abs(self) -> Self {
-                const SHR: $ty = <$ty>::BITS as $ty - 1;
-                let m = self >> Simd::splat(SHR);
-                (self^m) - m
-            }
-
-            /// Lanewise saturating absolute value, implemented in Rust.
-            /// As abs(), except the MIN value becomes MAX instead of itself.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
-            /// let xs = Simd::from_array([MIN, -2, 0, 3]);
-            /// let unsat = xs.abs();
-            /// let sat = xs.saturating_abs();
-            /// assert_eq!(unsat, Simd::from_array([MIN, 2, 0, 3]));
-            /// assert_eq!(sat, Simd::from_array([MAX, 2, 0, 3]));
-            /// ```
-            #[inline]
-            pub fn saturating_abs(self) -> Self {
-                // arith shift for -1 or 0 mask based on sign bit, giving 2s complement
-                const SHR: $ty = <$ty>::BITS as $ty - 1;
-                let m = self >> Simd::splat(SHR);
-                (self^m).saturating_sub(m)
-            }
-
-            /// Lanewise saturating negation, implemented in Rust.
-            /// As neg(), except the MIN value becomes MAX instead of itself.
-            ///
-            /// # Examples
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::", stringify!($ty), "::{MIN, MAX};")]
-            /// let x = Simd::from_array([MIN, -2, 3, MAX]);
-            /// let unsat = -x;
-            /// let sat = x.saturating_neg();
-            /// assert_eq!(unsat, Simd::from_array([MIN, 2, -3, MIN + 1]));
-            /// assert_eq!(sat, Simd::from_array([MAX, 2, -3, MIN + 1]));
-            /// ```
-            #[inline]
-            pub fn saturating_neg(self) -> Self {
-                Self::splat(0).saturating_sub(self)
-            }
-        })+
-    }
-}
-
-impl_uint_arith! { u8, u16, u32, u64, usize }
-impl_int_arith! { i8, i16, i32, i64, isize }
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index 42257f4e119..2d4fe2b7fde 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -9,12 +9,12 @@ pub(crate) mod intrinsics;
 #[cfg(feature = "generic_const_exprs")]
 mod to_bytes;
 
+mod elements;
 mod eq;
 mod fmt;
 mod iter;
 mod lane_count;
 mod masks;
-mod math;
 mod ops;
 mod ord;
 mod round;
@@ -26,6 +26,7 @@ mod vendor;
 pub mod simd {
     pub(crate) use crate::core_simd::intrinsics;
 
+    pub use crate::core_simd::elements::*;
     pub use crate::core_simd::eq::*;
     pub use crate::core_simd::lane_count::{LaneCount, SupportedLaneCount};
     pub use crate::core_simd::masks::*;
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
index 642ab319cdd..9d8639feeee 100644
--- a/crates/core_simd/src/reduction.rs
+++ b/crates/core_simd/src/reduction.rs
@@ -1,105 +1,7 @@
 use crate::simd::intrinsics::{
-    simd_reduce_add_ordered, simd_reduce_and, simd_reduce_max, simd_reduce_min,
-    simd_reduce_mul_ordered, simd_reduce_or, simd_reduce_xor,
+    simd_reduce_add_ordered, simd_reduce_max, simd_reduce_min, simd_reduce_mul_ordered,
 };
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
-use core::ops::{BitAnd, BitOr, BitXor};
-
-macro_rules! impl_integer_reductions {
-    { $scalar:ty } => {
-        impl<const LANES: usize> Simd<$scalar, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            /// Reducing wrapping add.  Returns the sum of the lanes of the vector, with wrapping addition.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
-            /// assert_eq!(v.reduce_sum(), 10);
-            ///
-            /// // SIMD integer addition is always wrapping
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 1, 0, 0]);")]
-            #[doc = concat!("assert_eq!(v.reduce_sum(), ", stringify!($scalar), "::MIN);")]
-            /// ```
-            #[inline]
-            pub fn reduce_sum(self) -> $scalar {
-                // Safety: `self` is an integer vector
-                unsafe { simd_reduce_add_ordered(self, 0) }
-            }
-
-            /// Reducing wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
-            /// assert_eq!(v.reduce_product(), 24);
-            ///
-            /// // SIMD integer multiplication is always wrapping
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([", stringify!($scalar) ,"::MAX, 2, 1, 1]);")]
-            #[doc = concat!("assert!(v.reduce_product() < ", stringify!($scalar), "::MAX);")]
-            /// ```
-            #[inline]
-            pub fn reduce_product(self) -> $scalar {
-                // Safety: `self` is an integer vector
-                unsafe { simd_reduce_mul_ordered(self, 1) }
-            }
-
-            /// Reducing maximum.  Returns the maximum lane in the vector.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
-            /// assert_eq!(v.reduce_max(), 4);
-            /// ```
-            #[inline]
-            pub fn reduce_max(self) -> $scalar {
-                // Safety: `self` is an integer vector
-                unsafe { simd_reduce_max(self) }
-            }
-
-            /// Reducing minimum.  Returns the minimum lane in the vector.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x4;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x4::from_array([1, 2, 3, 4]);")]
-            /// assert_eq!(v.reduce_min(), 1);
-            /// ```
-            #[inline]
-            pub fn reduce_min(self) -> $scalar {
-                // Safety: `self` is an integer vector
-                unsafe { simd_reduce_min(self) }
-            }
-        }
-    }
-}
-
-impl_integer_reductions! { i8 }
-impl_integer_reductions! { i16 }
-impl_integer_reductions! { i32 }
-impl_integer_reductions! { i64 }
-impl_integer_reductions! { isize }
-impl_integer_reductions! { u8 }
-impl_integer_reductions! { u16 }
-impl_integer_reductions! { u32 }
-impl_integer_reductions! { u64 }
-impl_integer_reductions! { usize }
+use crate::simd::{LaneCount, Simd, SupportedLaneCount};
 
 macro_rules! impl_float_reductions {
     { $scalar:ty } => {
@@ -223,45 +125,3 @@ macro_rules! impl_float_reductions {
 
 impl_float_reductions! { f32 }
 impl_float_reductions! { f64 }
-
-impl<T, const LANES: usize> Simd<T, LANES>
-where
-    Self: BitAnd<Self, Output = Self>,
-    T: SimdElement + BitAnd<T, Output = T>,
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    /// Reducing bitwise "and".  Returns the cumulative bitwise "and" across the lanes of
-    /// the vector.
-    #[inline]
-    pub fn reduce_and(self) -> T {
-        unsafe { simd_reduce_and(self) }
-    }
-}
-
-impl<T, const LANES: usize> Simd<T, LANES>
-where
-    Self: BitOr<Self, Output = Self>,
-    T: SimdElement + BitOr<T, Output = T>,
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    /// Reducing bitwise "or".  Returns the cumulative bitwise "or" across the lanes of
-    /// the vector.
-    #[inline]
-    pub fn reduce_or(self) -> T {
-        unsafe { simd_reduce_or(self) }
-    }
-}
-
-impl<T, const LANES: usize> Simd<T, LANES>
-where
-    Self: BitXor<Self, Output = Self>,
-    T: SimdElement + BitXor<T, Output = T>,
-    LaneCount<LANES>: SupportedLaneCount,
-{
-    /// Reducing bitwise "xor".  Returns the cumulative bitwise "xor" across the lanes of
-    /// the vector.
-    #[inline]
-    pub fn reduce_xor(self) -> T {
-        unsafe { simd_reduce_xor(self) }
-    }
-}
diff --git a/crates/core_simd/src/vector/int.rs b/crates/core_simd/src/vector/int.rs
index 384f01d822a..20e56c7dc64 100644
--- a/crates/core_simd/src/vector/int.rs
+++ b/crates/core_simd/src/vector/int.rs
@@ -1,46 +1,6 @@
 #![allow(non_camel_case_types)]
 
-use crate::simd::{LaneCount, Mask, Simd, SimdPartialOrd, SupportedLaneCount};
-
-/// Implements additional integer traits (Eq, Ord, Hash) on the specified vector `$name`, holding multiple `$lanes` of `$type`.
-macro_rules! impl_integer_vector {
-    { $type:ty } => {
-        impl<const LANES: usize> Simd<$type, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            /// Returns true for each positive lane and false if it is zero or negative.
-            #[inline]
-            pub fn is_positive(self) -> Mask<$type, LANES> {
-                self.simd_gt(Self::splat(0))
-            }
-
-            /// Returns true for each negative lane and false if it is zero or positive.
-            #[inline]
-            pub fn is_negative(self) -> Mask<$type, LANES> {
-                self.simd_lt(Self::splat(0))
-            }
-
-            /// Returns numbers representing the sign of each lane.
-            /// * `0` if the number is zero
-            /// * `1` if the number is positive
-            /// * `-1` if the number is negative
-            #[inline]
-            pub fn signum(self) -> Self {
-                self.is_positive().select(
-                    Self::splat(1),
-                    self.is_negative().select(Self::splat(-1), Self::splat(0))
-                )
-            }
-        }
-    }
-}
-
-impl_integer_vector! { isize }
-impl_integer_vector! { i16 }
-impl_integer_vector! { i32 }
-impl_integer_vector! { i64 }
-impl_integer_vector! { i8 }
+use crate::simd::Simd;
 
 /// A SIMD vector with two elements of type `isize`.
 pub type isizex2 = Simd<isize, 2>;
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 47fe49b0982..48c512be7d0 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -172,6 +172,7 @@ macro_rules! impl_common_integer_tests {
 macro_rules! impl_signed_tests {
     { $scalar:tt } => {
         mod $scalar {
+            use core_simd::simd::SimdInt;
             type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 
@@ -312,6 +313,7 @@ macro_rules! impl_signed_tests {
 macro_rules! impl_unsigned_tests {
     { $scalar:tt } => {
         mod $scalar {
+            use core_simd::simd::SimdUint;
             type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 

From 04be48ff97757a803e934ab8d2e90631b59557f8 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 15 Apr 2022 01:44:18 -0400
Subject: [PATCH 15/32] Add float trait, and seal traits.

---
 crates/core_simd/src/elements.rs       |   6 +
 crates/core_simd/src/elements/float.rs | 344 +++++++++++++++++++++++++
 crates/core_simd/src/elements/int.rs   |   9 +-
 crates/core_simd/src/elements/uint.rs  |   9 +-
 crates/core_simd/src/mod.rs            |   3 -
 crates/core_simd/src/reduction.rs      | 127 ---------
 crates/core_simd/src/vector/float.rs   | 211 +--------------
 crates/core_simd/tests/ops_macros.rs   |   4 +-
 8 files changed, 368 insertions(+), 345 deletions(-)
 create mode 100644 crates/core_simd/src/elements/float.rs
 delete mode 100644 crates/core_simd/src/reduction.rs

diff --git a/crates/core_simd/src/elements.rs b/crates/core_simd/src/elements.rs
index 0fb1f5b9fe9..701eb66b248 100644
--- a/crates/core_simd/src/elements.rs
+++ b/crates/core_simd/src/elements.rs
@@ -1,5 +1,11 @@
+mod float;
 mod int;
 mod uint;
 
+mod sealed {
+    pub trait Sealed {}
+}
+
+pub use float::*;
 pub use int::*;
 pub use uint::*;
diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
new file mode 100644
index 00000000000..fafbd2a4d21
--- /dev/null
+++ b/crates/core_simd/src/elements/float.rs
@@ -0,0 +1,344 @@
+use super::sealed::Sealed;
+use crate::simd::{
+    intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialEq, SimdPartialOrd,
+    SupportedLaneCount,
+};
+
+/// Operations on SIMD vectors of floats.
+pub trait SimdFloat: Sized + Sealed {
+    /// Mask type used for manipulating this SIMD vector type.
+    type Mask;
+
+    /// Scalar type contained by this SIMD vector type.
+    type Scalar;
+
+    /// Bit representation of this SIMD vector type.
+    type Bits;
+
+    /// Raw transmutation to an unsigned integer vector type with the
+    /// same size and number of lanes.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_bits(self) -> Self::Bits;
+
+    /// Raw transmutation from an unsigned integer vector type with the
+    /// same size and number of lanes.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn from_bits(bits: Self::Bits) -> Self;
+
+    /// Produces a vector where every lane has the absolute value of the
+    /// equivalently-indexed lane in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn abs(self) -> Self;
+
+    /// Takes the reciprocal (inverse) of each lane, `1/x`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn recip(self) -> Self;
+
+    /// Converts each lane from radians to degrees.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_degrees(self) -> Self;
+
+    /// Converts each lane from degrees to radians.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn to_radians(self) -> Self;
+
+    /// Returns true for each lane if it has a positive sign, including
+    /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_sign_positive(self) -> Self::Mask;
+
+    /// Returns true for each lane if it has a negative sign, including
+    /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_sign_negative(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_nan(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is positive infinity or negative infinity.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_infinite(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is neither infinite nor `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_finite(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is subnormal.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_subnormal(self) -> Self::Mask;
+
+    /// Returns true for each lane if its value is neither zero, infinite,
+    /// subnormal, nor `NaN`.
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    fn is_normal(self) -> Self::Mask;
+
+    /// Replaces each lane with a number that represents its sign.
+    ///
+    /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
+    /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
+    /// * `NAN` if the number is `NAN`
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn signum(self) -> Self;
+
+    /// Returns each lane with the magnitude of `self` and the sign of `sign`.
+    ///
+    /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn copysign(self, sign: Self) -> Self;
+
+    /// Returns the minimum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_min(self, other: Self) -> Self;
+
+    /// Returns the maximum of each lane.
+    ///
+    /// If one of the values is `NAN`, then the other value is returned.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_max(self, other: Self) -> Self;
+
+    /// Restrict each lane to a certain interval unless it is NaN.
+    ///
+    /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
+    /// greater than `max`, and the corresponding lane in `min` if the lane is less
+    /// than `min`.  Otherwise returns the lane in `self`.
+    #[must_use = "method returns a new vector and does not mutate the original value"]
+    fn simd_clamp(self, min: Self, max: Self) -> Self;
+
+    /// Returns the sum of the lanes of the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::f32x2;
+    /// let v = f32x2::from_array([1., 2.]);
+    /// assert_eq!(v.reduce_sum(), 3.);
+    /// ```
+    fn reduce_sum(self) -> Self::Scalar;
+
+    /// Reducing multiply.  Returns the product of the lanes of the vector.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::f32x2;
+    /// let v = f32x2::from_array([3., 4.]);
+    /// assert_eq!(v.reduce_product(), 12.);
+    /// ```
+    fn reduce_product(self) -> Self::Scalar;
+
+    /// Returns the maximum lane in the vector.
+    ///
+    /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+    /// return either.
+    ///
+    /// This function will not return `NaN` unless all lanes are `NaN`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::f32x2;
+    /// let v = f32x2::from_array([1., 2.]);
+    /// assert_eq!(v.reduce_max(), 2.);
+    ///
+    /// // NaN values are skipped...
+    /// let v = f32x2::from_array([1., f32::NAN]);
+    /// assert_eq!(v.reduce_max(), 1.);
+    ///
+    /// // ...unless all values are NaN
+    /// let v = f32x2::from_array([f32::NAN, f32::NAN]);
+    /// assert!(v.reduce_max().is_nan());
+    /// ```
+    fn reduce_max(self) -> Self::Scalar;
+
+    /// Returns the minimum lane in the vector.
+    ///
+    /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
+    /// return either.
+    ///
+    /// This function will not return `NaN` unless all lanes are `NaN`.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// # #![feature(portable_simd)]
+    /// # use core::simd::f32x2;
+    /// let v = f32x2::from_array([3., 7.]);
+    /// assert_eq!(v.reduce_min(), 3.);
+    ///
+    /// // NaN values are skipped...
+    /// let v = f32x2::from_array([1., f32::NAN]);
+    /// assert_eq!(v.reduce_min(), 1.);
+    ///
+    /// // ...unless all values are NaN
+    /// let v = f32x2::from_array([f32::NAN, f32::NAN]);
+    /// assert!(v.reduce_min().is_nan());
+    /// ```
+    fn reduce_min(self) -> Self::Scalar;
+}
+
+macro_rules! impl_trait {
+    { $($ty:ty { bits: $bits_ty:ty, mask: $mask_ty:ty }),* } => {
+        $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
+        impl<const LANES: usize> SimdFloat for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+            type Mask = Mask<<$mask_ty as SimdElement>::Mask, LANES>;
+            type Scalar = $ty;
+            type Bits = Simd<$bits_ty, LANES>;
+
+            #[inline]
+            fn to_bits(self) -> Simd<$bits_ty, LANES> {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                unsafe { core::mem::transmute_copy(&self) }
+            }
+
+            #[inline]
+            fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
+                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                unsafe { core::mem::transmute_copy(&bits) }
+            }
+
+            #[inline]
+            fn abs(self) -> Self {
+                unsafe { intrinsics::simd_fabs(self) }
+            }
+
+            #[inline]
+            fn recip(self) -> Self {
+                Self::splat(1.0) / self
+            }
+
+            #[inline]
+            fn to_degrees(self) -> Self {
+                // to_degrees uses a special constant for better precision, so extract that constant
+                self * Self::splat(Self::Scalar::to_degrees(1.))
+            }
+
+            #[inline]
+            fn to_radians(self) -> Self {
+                self * Self::splat(Self::Scalar::to_radians(1.))
+            }
+
+            #[inline]
+            fn is_sign_positive(self) -> Self::Mask {
+                !self.is_sign_negative()
+            }
+
+            #[inline]
+            fn is_sign_negative(self) -> Self::Mask {
+                let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
+                sign_bits.simd_gt(Simd::splat(0))
+            }
+
+            #[inline]
+            fn is_nan(self) -> Self::Mask {
+                self.simd_ne(self)
+            }
+
+            #[inline]
+            fn is_infinite(self) -> Self::Mask {
+                self.abs().simd_eq(Self::splat(Self::Scalar::INFINITY))
+            }
+
+            #[inline]
+            fn is_finite(self) -> Self::Mask {
+                self.abs().simd_lt(Self::splat(Self::Scalar::INFINITY))
+            }
+
+            #[inline]
+            fn is_subnormal(self) -> Self::Mask {
+                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(Self::Scalar::INFINITY).to_bits()).simd_eq(Simd::splat(0))
+            }
+
+            #[inline]
+            #[must_use = "method returns a new mask and does not mutate the original value"]
+            fn is_normal(self) -> Self::Mask {
+                !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
+            }
+
+            #[inline]
+            fn signum(self) -> Self {
+                self.is_nan().select(Self::splat(Self::Scalar::NAN), Self::splat(1.0).copysign(self))
+            }
+
+            #[inline]
+            fn copysign(self, sign: Self) -> Self {
+                let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
+                let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
+                Self::from_bits(sign_bit | magnitude)
+            }
+
+            #[inline]
+            fn simd_min(self, other: Self) -> Self {
+                unsafe { intrinsics::simd_fmin(self, other) }
+            }
+
+            #[inline]
+            fn simd_max(self, other: Self) -> Self {
+                unsafe { intrinsics::simd_fmax(self, other) }
+            }
+
+            #[inline]
+            fn simd_clamp(self, min: Self, max: Self) -> Self {
+                assert!(
+                    min.simd_le(max).all(),
+                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
+                );
+                let mut x = self;
+                x = x.simd_lt(min).select(min, x);
+                x = x.simd_gt(max).select(max, x);
+                x
+            }
+
+            #[inline]
+            fn reduce_sum(self) -> Self::Scalar {
+                // LLVM sum is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_array().iter().sum()
+                } else {
+                    // Safety: `self` is a float vector
+                    unsafe { intrinsics::simd_reduce_add_ordered(self, 0.) }
+                }
+            }
+
+            #[inline]
+            fn reduce_product(self) -> Self::Scalar {
+                // LLVM product is inaccurate on i586
+                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
+                    self.as_array().iter().product()
+                } else {
+                    // Safety: `self` is a float vector
+                    unsafe { intrinsics::simd_reduce_mul_ordered(self, 1.) }
+                }
+            }
+
+            #[inline]
+            fn reduce_max(self) -> Self::Scalar {
+                // Safety: `self` is a float vector
+                unsafe { intrinsics::simd_reduce_max(self) }
+            }
+
+            #[inline]
+            fn reduce_min(self) -> Self::Scalar {
+                // Safety: `self` is a float vector
+                unsafe { intrinsics::simd_reduce_min(self) }
+            }
+        }
+        )*
+    }
+}
+
+impl_trait! { f32 { bits: u32, mask: i32 }, f64 { bits: u64, mask: i64 } }
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index 61135427456..c3139b4ba3e 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -1,9 +1,10 @@
+use super::sealed::Sealed;
 use crate::simd::{
     intrinsics, LaneCount, Mask, Simd, SimdElement, SimdPartialOrd, SupportedLaneCount,
 };
 
 /// Operations on SIMD vectors of signed integers.
-pub trait SimdInt: Sized {
+pub trait SimdInt: Sized + Sealed {
     /// Mask type used for manipulating this SIMD vector type.
     type Mask;
 
@@ -167,6 +168,12 @@ pub trait SimdInt: Sized {
 macro_rules! impl_trait {
     { $($ty:ty),* } => {
         $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
         impl<const LANES: usize> SimdInt for Simd<$ty, LANES>
         where
             LaneCount<LANES>: SupportedLaneCount,
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
index da3213535a3..cba1a9b0ce0 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/elements/uint.rs
@@ -1,7 +1,8 @@
+use super::sealed::Sealed;
 use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount};
 
 /// Operations on SIMD vectors of unsigned integers.
-pub trait SimdUint: Sized {
+pub trait SimdUint: Sized + Sealed {
     /// Scalar type contained by this SIMD vector type.
     type Scalar;
 
@@ -61,6 +62,12 @@ pub trait SimdUint: Sized {
 macro_rules! impl_trait {
     { $($ty:ty),* } => {
         $(
+        impl<const LANES: usize> Sealed for Simd<$ty, LANES>
+        where
+            LaneCount<LANES>: SupportedLaneCount,
+        {
+        }
+
         impl<const LANES: usize> SimdUint for Simd<$ty, LANES>
         where
             LaneCount<LANES>: SupportedLaneCount,
diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index 2d4fe2b7fde..590b2e4a153 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -1,6 +1,3 @@
-#[macro_use]
-mod reduction;
-
 #[macro_use]
 mod swizzle;
 
diff --git a/crates/core_simd/src/reduction.rs b/crates/core_simd/src/reduction.rs
deleted file mode 100644
index 9d8639feeee..00000000000
--- a/crates/core_simd/src/reduction.rs
+++ /dev/null
@@ -1,127 +0,0 @@
-use crate::simd::intrinsics::{
-    simd_reduce_add_ordered, simd_reduce_max, simd_reduce_min, simd_reduce_mul_ordered,
-};
-use crate::simd::{LaneCount, Simd, SupportedLaneCount};
-
-macro_rules! impl_float_reductions {
-    { $scalar:ty } => {
-        impl<const LANES: usize> Simd<$scalar, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-
-            /// Reducing add.  Returns the sum of the lanes of the vector.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")]
-            /// assert_eq!(v.reduce_sum(), 3.);
-            /// ```
-            #[inline]
-            pub fn reduce_sum(self) -> $scalar {
-                // LLVM sum is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_array().iter().sum()
-                } else {
-                    // Safety: `self` is a float vector
-                    unsafe { simd_reduce_add_ordered(self, 0.) }
-                }
-            }
-
-            /// Reducing multiply.  Returns the product of the lanes of the vector.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 4.]);")]
-            /// assert_eq!(v.reduce_product(), 12.);
-            /// ```
-            #[inline]
-            pub fn reduce_product(self) -> $scalar {
-                // LLVM product is inaccurate on i586
-                if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
-                    self.as_array().iter().product()
-                } else {
-                    // Safety: `self` is a float vector
-                    unsafe { simd_reduce_mul_ordered(self, 1.) }
-                }
-            }
-
-            /// Reducing maximum.  Returns the maximum lane in the vector.
-            ///
-            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
-            /// return either.
-            ///
-            /// This function will not return `NaN` unless all lanes are `NaN`.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., 2.]);")]
-            /// assert_eq!(v.reduce_max(), 2.);
-            ///
-            /// // NaN values are skipped...
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")]
-            /// assert_eq!(v.reduce_max(), 1.);
-            ///
-            /// // ...unless all values are NaN
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([",
-                stringify!($scalar), "::NAN, ",
-                stringify!($scalar), "::NAN]);"
-            )]
-            /// assert!(v.reduce_max().is_nan());
-            /// ```
-            #[inline]
-            pub fn reduce_max(self) -> $scalar {
-                // Safety: `self` is a float vector
-                unsafe { simd_reduce_max(self) }
-            }
-
-            /// Reducing minimum.  Returns the minimum lane in the vector.
-            ///
-            /// Returns values based on equality, so a vector containing both `0.` and `-0.` may
-            /// return either.
-            ///
-            /// This function will not return `NaN` unless all lanes are `NaN`.
-            ///
-            /// # Examples
-            ///
-            /// ```
-            /// # #![feature(portable_simd)]
-            /// # use core::simd::Simd;
-            #[doc = concat!("# use core::simd::", stringify!($scalar), "x2;")]
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([3., 7.]);")]
-            /// assert_eq!(v.reduce_min(), 3.);
-            ///
-            /// // NaN values are skipped...
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([1., ", stringify!($scalar), "::NAN]);")]
-            /// assert_eq!(v.reduce_min(), 1.);
-            ///
-            /// // ...unless all values are NaN
-            #[doc = concat!("let v = ", stringify!($scalar), "x2::from_array([",
-                stringify!($scalar), "::NAN, ",
-                stringify!($scalar), "::NAN]);"
-            )]
-            /// assert!(v.reduce_min().is_nan());
-            /// ```
-            #[inline]
-            pub fn reduce_min(self) -> $scalar {
-                // Safety: `self` is a float vector
-                unsafe { simd_reduce_min(self) }
-            }
-        }
-    }
-}
-
-impl_float_reductions! { f32 }
-impl_float_reductions! { f64 }
diff --git a/crates/core_simd/src/vector/float.rs b/crates/core_simd/src/vector/float.rs
index 13b1d3995a1..f836c99b1e2 100644
--- a/crates/core_simd/src/vector/float.rs
+++ b/crates/core_simd/src/vector/float.rs
@@ -1,145 +1,6 @@
 #![allow(non_camel_case_types)]
 
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Mask, Simd, SimdPartialEq, SimdPartialOrd, SupportedLaneCount};
-
-/// Implements inherent methods for a float vector containing multiple
-/// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
-/// representation.
-macro_rules! impl_float_vector {
-    { $type:ty, $bits_ty:ty, $mask_ty:ty } => {
-        impl<const LANES: usize> Simd<$type, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            /// Raw transmutation to an unsigned integer vector type with the
-            /// same size and number of lanes.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn to_bits(self) -> Simd<$bits_ty, LANES> {
-                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
-                unsafe { core::mem::transmute_copy(&self) }
-            }
-
-            /// Raw transmutation from an unsigned integer vector type with the
-            /// same size and number of lanes.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
-                assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Simd<$bits_ty, LANES>>());
-                unsafe { core::mem::transmute_copy(&bits) }
-            }
-
-            /// Produces a vector where every lane has the absolute value of the
-            /// equivalently-indexed lane in `self`.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn abs(self) -> Self {
-                unsafe { intrinsics::simd_fabs(self) }
-            }
-
-            /// Takes the reciprocal (inverse) of each lane, `1/x`.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn recip(self) -> Self {
-                Self::splat(1.0) / self
-            }
-
-            /// Converts each lane from radians to degrees.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn to_degrees(self) -> Self {
-                // to_degrees uses a special constant for better precision, so extract that constant
-                self * Self::splat(<$type>::to_degrees(1.))
-            }
-
-            /// Converts each lane from degrees to radians.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn to_radians(self) -> Self {
-                self * Self::splat(<$type>::to_radians(1.))
-            }
-
-            /// Returns true for each lane if it has a positive sign, including
-            /// `+0.0`, `NaN`s with positive sign bit and positive infinity.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_sign_positive(self) -> Mask<$mask_ty, LANES> {
-                !self.is_sign_negative()
-            }
-
-            /// Returns true for each lane if it has a negative sign, including
-            /// `-0.0`, `NaN`s with negative sign bit and negative infinity.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_sign_negative(self) -> Mask<$mask_ty, LANES> {
-                let sign_bits = self.to_bits() & Simd::splat((!0 >> 1) + 1);
-                sign_bits.simd_gt(Simd::splat(0))
-            }
-
-            /// Returns true for each lane if its value is `NaN`.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_nan(self) -> Mask<$mask_ty, LANES> {
-                self.simd_ne(self)
-            }
-
-            /// Returns true for each lane if its value is positive infinity or negative infinity.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_infinite(self) -> Mask<$mask_ty, LANES> {
-                self.abs().simd_eq(Self::splat(<$type>::INFINITY))
-            }
-
-            /// Returns true for each lane if its value is neither infinite nor `NaN`.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_finite(self) -> Mask<$mask_ty, LANES> {
-                self.abs().simd_lt(Self::splat(<$type>::INFINITY))
-            }
-
-            /// Returns true for each lane if its value is subnormal.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_subnormal(self) -> Mask<$mask_ty, LANES> {
-                self.abs().simd_ne(Self::splat(0.0)) & (self.to_bits() & Self::splat(<$type>::INFINITY).to_bits()).simd_eq(Simd::splat(0))
-            }
-
-            /// Returns true for each lane if its value is neither zero, infinite,
-            /// subnormal, nor `NaN`.
-            #[inline]
-            #[must_use = "method returns a new mask and does not mutate the original value"]
-            pub fn is_normal(self) -> Mask<$mask_ty, LANES> {
-                !(self.abs().simd_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
-            }
-
-            /// Replaces each lane with a number that represents its sign.
-            ///
-            /// * `1.0` if the number is positive, `+0.0`, or `INFINITY`
-            /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY`
-            /// * `NAN` if the number is `NAN`
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn signum(self) -> Self {
-                self.is_nan().select(Self::splat(<$type>::NAN), Self::splat(1.0).copysign(self))
-            }
-
-            /// Returns each lane with the magnitude of `self` and the sign of `sign`.
-            ///
-            /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            pub fn copysign(self, sign: Self) -> Self {
-                let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits();
-                let magnitude = self.to_bits() & !Self::splat(-0.).to_bits();
-                Self::from_bits(sign_bit | magnitude)
-            }
-        }
-    };
-}
-
-impl_float_vector! { f32, u32, i32 }
-impl_float_vector! { f64, u64, i64 }
+use crate::simd::Simd;
 
 /// A 64-bit SIMD vector with two elements of type `f32`.
 pub type f32x2 = Simd<f32, 2>;
@@ -161,73 +22,3 @@ pub type f64x4 = Simd<f64, 4>;
 
 /// A 512-bit SIMD vector with eight elements of type `f64`.
 pub type f64x8 = Simd<f64, 8>;
-
-mod sealed {
-    pub trait Sealed {}
-}
-use sealed::Sealed;
-
-/// SIMD operations on vectors of floating point numbers.
-pub trait SimdFloat: Sized + Sealed {
-    /// Returns the minimum of each lane.
-    ///
-    /// If one of the values is `NAN`, then the other value is returned.
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn simd_min(self, other: Self) -> Self;
-
-    /// Returns the maximum of each lane.
-    ///
-    /// If one of the values is `NAN`, then the other value is returned.
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn simd_max(self, other: Self) -> Self;
-
-    /// Restrict each lane to a certain interval unless it is NaN.
-    ///
-    /// For each lane in `self`, returns the corresponding lane in `max` if the lane is
-    /// greater than `max`, and the corresponding lane in `min` if the lane is less
-    /// than `min`.  Otherwise returns the lane in `self`.
-    #[must_use = "method returns a new vector and does not mutate the original value"]
-    fn simd_clamp(self, min: Self, max: Self) -> Self;
-}
-
-macro_rules! impl_simd_float {
-    { $($float:ty),* } => {
-        $(
-        impl <const LANES: usize> Sealed for Simd<$float, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-        }
-
-        impl <const LANES: usize> SimdFloat for Simd<$float, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            #[inline]
-            #[must_use = "method returns a new vector and does not mutate the original value"]
-            fn simd_min(self, other: Self) -> Self {
-                unsafe { intrinsics::simd_fmin(self, other) }
-            }
-
-            #[inline]
-            fn simd_max(self, other: Self) -> Self {
-                unsafe { intrinsics::simd_fmax(self, other) }
-            }
-
-            #[inline]
-            fn simd_clamp(self, min: Self, max: Self) -> Self {
-                assert!(
-                    min.simd_le(max).all(),
-                    "each lane in `min` must be less than or equal to the corresponding lane in `max`",
-                );
-                let mut x = self;
-                x = x.simd_lt(min).select(min, x);
-                x = x.simd_gt(max).select(max, x);
-                x
-            }
-        }
-        )*
-    }
-}
-
-impl_simd_float! { f32, f64 }
diff --git a/crates/core_simd/tests/ops_macros.rs b/crates/core_simd/tests/ops_macros.rs
index 48c512be7d0..f759394d075 100644
--- a/crates/core_simd/tests/ops_macros.rs
+++ b/crates/core_simd/tests/ops_macros.rs
@@ -348,6 +348,7 @@ macro_rules! impl_unsigned_tests {
 macro_rules! impl_float_tests {
     { $scalar:tt, $int_scalar:tt } => {
         mod $scalar {
+            use core_simd::SimdFloat;
             type Vector<const LANES: usize> = core_simd::Simd<Scalar, LANES>;
             type Scalar = $scalar;
 
@@ -464,7 +465,6 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_min<const LANES: usize>() {
-                    use core_simd::simd::SimdFloat;
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
                         &Vector::<LANES>::simd_min,
@@ -488,7 +488,6 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_max<const LANES: usize>() {
-                    use core_simd::simd::SimdFloat;
                     // Regular conditions (both values aren't zero)
                     test_helpers::test_binary_elementwise(
                         &Vector::<LANES>::simd_max,
@@ -512,7 +511,6 @@ macro_rules! impl_float_tests {
                 }
 
                 fn simd_clamp<const LANES: usize>() {
-                    use core_simd::simd::SimdFloat;
                     test_helpers::test_3(&|value: [Scalar; LANES], mut min: [Scalar; LANES], mut max: [Scalar; LANES]| {
                         for (min, max) in min.iter_mut().zip(max.iter_mut()) {
                             if max < min {

From 528bc8593ad756239a6ded0443f10af657488559 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Fri, 15 Apr 2022 13:47:43 -0400
Subject: [PATCH 16/32] Improve copysign documentation

---
 crates/core_simd/src/elements/float.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index fafbd2a4d21..456dd780dac 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -83,7 +83,7 @@ pub trait SimdFloat: Sized + Sealed {
 
     /// Returns each lane with the magnitude of `self` and the sign of `sign`.
     ///
-    /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned.
+    /// For any lane containing a `NAN`, a `NAN` with the sign of `sign` is returned.
     #[must_use = "method returns a new vector and does not mutate the original value"]
     fn copysign(self, sign: Self) -> Self;
 

From 62d3b2e39c3610046afd927843942cfdddab6753 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 16 Apr 2022 16:17:43 -0400
Subject: [PATCH 17/32] Add Copy bound to SIMD traits

---
 crates/core_simd/src/elements/float.rs | 2 +-
 crates/core_simd/src/elements/int.rs   | 2 +-
 crates/core_simd/src/elements/uint.rs  | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index 456dd780dac..5a628f2121e 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -5,7 +5,7 @@ use crate::simd::{
 };
 
 /// Operations on SIMD vectors of floats.
-pub trait SimdFloat: Sized + Sealed {
+pub trait SimdFloat: Copy + Sealed {
     /// Mask type used for manipulating this SIMD vector type.
     type Mask;
 
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index c3139b4ba3e..787a0741146 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -4,7 +4,7 @@ use crate::simd::{
 };
 
 /// Operations on SIMD vectors of signed integers.
-pub trait SimdInt: Sized + Sealed {
+pub trait SimdInt: Copy + Sealed {
     /// Mask type used for manipulating this SIMD vector type.
     type Mask;
 
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
index cba1a9b0ce0..f9d43a1d19b 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/elements/uint.rs
@@ -2,7 +2,7 @@ use super::sealed::Sealed;
 use crate::simd::{intrinsics, LaneCount, Simd, SupportedLaneCount};
 
 /// Operations on SIMD vectors of unsigned integers.
-pub trait SimdUint: Sized + Sealed {
+pub trait SimdUint: Copy + Sealed {
     /// Scalar type contained by this SIMD vector type.
     type Scalar;
 

From af53b5de24712a29f952b657d6e3107ba32f6e67 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Mon, 25 Apr 2022 16:32:56 -0400
Subject: [PATCH 18/32] rust-lang/portable-simd#279: Silence clippy false
 alarms

---
 crates/core_simd/src/ops.rs    | 1 +
 crates/core_simd/src/vector.rs | 1 +
 2 files changed, 2 insertions(+)

diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index d39b4091df9..8dbae346b5f 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -48,6 +48,7 @@ macro_rules! unsafe_base {
 // cg_clif defaults to this, and scalar MIR shifts also default to wrapping
 macro_rules! wrap_bitshift {
     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
+        #[allow(clippy::suspicious_arithmetic_impl)]
         unsafe {
             $crate::simd::intrinsics::$simd_call(
                 $lhs,
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index f7989ee762a..9eb51f957d1 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -479,6 +479,7 @@ where
         mask.all()
     }
 
+    #[allow(clippy::partialeq_ne_impl)]
     #[inline]
     fn ne(&self, other: &Self) -> bool {
         // Safety: All SIMD vectors are SimdPartialEq, and the comparison produces a valid mask.

From 98cd636d581962c29ca336ca89dce083dbed926f Mon Sep 17 00:00:00 2001
From: Jubilee Young <workingjubilee@gmail.com>
Date: Wed, 2 Mar 2022 20:09:01 -0800
Subject: [PATCH 19/32] Add Mask::cast

---
 crates/core_simd/src/masks.rs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index e8962b86b11..8f2f3f6aee0 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -181,6 +181,13 @@ where
         self.0.to_int()
     }
 
+    /// Converts the mask to a mask of any other lane size.
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn cast<U: MaskElement>(self) -> Mask<U, LANES> {
+        Mask(self.0.convert())
+    }
+
     /// Tests the value of the specified lane.
     ///
     /// # Safety

From aa11959f19789f7db6d3a37bc52f5d7718cc9224 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 21 May 2022 15:13:20 -0400
Subject: [PATCH 20/32] Add mask cast tests

---
 crates/core_simd/tests/masks.rs | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index d10c6610f50..3a0493d4ee6 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -99,6 +99,29 @@ macro_rules! test_mask_api {
                 assert_eq!(bitmask, 0b01);
                 assert_eq!(core_simd::Mask::<$type, 2>::from_bitmask(bitmask), mask);
             }
+
+            #[test]
+            fn cast() {
+                fn cast_impl<T: core_simd::MaskElement>()
+                where
+                    core_simd::Mask<$type, 8>: Into<core_simd::Mask<T, 8>>,
+                {
+                    let values = [true, false, false, true, false, false, true, false];
+                    let mask = core_simd::Mask::<$type, 8>::from_array(values);
+
+                    let cast_mask = mask.cast::<T>();
+                    assert_eq!(values, cast_mask.to_array());
+
+                    let into_mask: core_simd::Mask<T, 8> = mask.into();
+                    assert_eq!(values, into_mask.to_array());
+                }
+
+                cast_impl::<i8>();
+                cast_impl::<i16>();
+                cast_impl::<i32>();
+                cast_impl::<i64>();
+                cast_impl::<isize>();
+            }
         }
     }
 }

From c9f4e0ef98a4c8c54919d25eafd83e9bcaf2e4df Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 21 May 2022 16:49:03 -0400
Subject: [PATCH 21/32] Use Mask::cast in From impl

---
 crates/core_simd/src/masks.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 8f2f3f6aee0..dcec336cfaf 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -578,7 +578,7 @@ macro_rules! impl_from {
             LaneCount<LANES>: SupportedLaneCount,
         {
             fn from(value: Mask<$from, LANES>) -> Self {
-                Self(value.0.convert())
+                value.cast()
             }
         }
         )*

From b7fea94c96769e939ecdc98f368bb6daa330aa0b Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 13 Jan 2022 21:20:17 -0500
Subject: [PATCH 22/32] Generically implement ToBitMaskArray

---
 crates/core_simd/src/masks.rs            |  5 +-
 crates/core_simd/src/masks/bitmask.rs    | 20 ++++++-
 crates/core_simd/src/masks/full_masks.rs | 68 +++++++++++++++++++++++-
 crates/core_simd/src/masks/to_bitmask.rs | 38 +++++++++++++
 crates/core_simd/tests/masks.rs          | 13 +++++
 5 files changed, 141 insertions(+), 3 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index dcec336cfaf..e65548a3287 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -13,7 +13,10 @@
 mod mask_impl;
 
 mod to_bitmask;
-pub use to_bitmask::ToBitMask;
+pub use to_bitmask::{ToBitMask, ToBitMaskArray};
+
+#[cfg(feature = "generic_const_exprs")]
+pub use to_bitmask::bitmask_len;
 
 use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
 use core::cmp::Ordering;
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index ec4dd357ee9..2e2c0a45c51 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray};
 use core::marker::PhantomData;
 
 /// A mask where each lane is represented by a single bit.
@@ -115,6 +115,24 @@ where
         unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
     }
 
+    #[inline]
+    #[must_use = "method returns a new array and does not mutate the original value"]
+    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
+        assert!(core::mem::size_of::<Self>() == N);
+
+        // Safety: converting an integer to an array of bytes of the same size is safe
+        unsafe { core::mem::transmute_copy(&self.0) }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
+        assert!(core::mem::size_of::<Self>() == N);
+
+        // Safety: converting an array of bytes to an integer of the same size is safe
+        Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
+    }
+
     #[inline]
     pub fn to_bitmask_integer<U>(self) -> U
     where
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index efa688b128f..b1c3b2b88ad 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -2,7 +2,7 @@
 
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray};
 
 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@@ -139,6 +139,72 @@ where
         unsafe { Mask(intrinsics::simd_cast(self.0)) }
     }
 
+    #[inline]
+    #[must_use = "method returns a new array and does not mutate the original value"]
+    pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
+    where
+        super::Mask<T, LANES>: ToBitMaskArray,
+        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
+    {
+        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+
+        // Safety: N is the correct bitmask size
+        //
+        // The transmute below allows this function to be marked safe, since it will prevent
+        // monomorphization errors in the case of an incorrect size.
+        unsafe {
+            // Compute the bitmask
+            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+                intrinsics::simd_bitmask(self.0);
+
+            // Transmute to the return type, previously asserted to be the same size
+            let mut bitmask: [u8; N] = core::mem::transmute_copy(&bitmask);
+
+            // LLVM assumes bit order should match endianness
+            if cfg!(target_endian = "big") {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
+                }
+            };
+
+            bitmask
+        }
+    }
+
+    #[inline]
+    #[must_use = "method returns a new mask and does not mutate the original value"]
+    pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
+    where
+        super::Mask<T, LANES>: ToBitMaskArray,
+        [(); <super::Mask<T, LANES> as ToBitMaskArray>::BYTES]: Sized,
+    {
+        assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
+
+        // Safety: N is the correct bitmask size
+        //
+        // The transmute below allows this function to be marked safe, since it will prevent
+        // monomorphization errors in the case of an incorrect size.
+        unsafe {
+            // LLVM assumes bit order should match endianness
+            if cfg!(target_endian = "big") {
+                for x in bitmask.as_mut() {
+                    *x = x.reverse_bits();
+                }
+            }
+
+            // Transmute to the bitmask type, previously asserted to be the same size
+            let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
+                core::mem::transmute_copy(&bitmask);
+
+            // Compute the regular mask
+            Self::from_int_unchecked(intrinsics::simd_select_bitmask(
+                bitmask,
+                Self::splat(true).to_int(),
+                Self::splat(false).to_int(),
+            ))
+        }
+    }
+
     #[inline]
     pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
     where
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index c263f6a4eec..ee229fc7a44 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -31,6 +31,24 @@ pub unsafe trait ToBitMask: Sealed {
     fn from_bitmask(bitmask: Self::BitMask) -> Self;
 }
 
+/// Converts masks to and from byte array bitmasks.
+///
+/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
+///
+/// # Safety
+/// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in
+/// the mask.
+pub unsafe trait ToBitMaskArray: Sealed {
+    /// The length of the bitmask array.
+    const BYTES: usize;
+
+    /// Converts a mask to a bitmask.
+    fn to_bitmask_array(self) -> [u8; Self::BYTES];
+
+    /// Converts a bitmask to a mask.
+    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self;
+}
+
 macro_rules! impl_integer_intrinsic {
     { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
         $(
@@ -58,3 +76,23 @@ impl_integer_intrinsic! {
     unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
     unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
 }
+
+/// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes.
+pub const fn bitmask_len(lanes: usize) -> usize {
+    (lanes + 7) / 8
+}
+
+unsafe impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
+where
+    LaneCount<LANES>: SupportedLaneCount,
+{
+    const BYTES: usize = bitmask_len(LANES);
+
+    fn to_bitmask_array(self) -> [u8; Self::BYTES] {
+        self.0.to_bitmask_array()
+    }
+
+    fn from_bitmask_array(bitmask: [u8; Self::BYTES]) -> Self {
+        Mask(mask_impl::Mask::from_bitmask_array(bitmask))
+    }
+}
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 3a0493d4ee6..6150124b8ca 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -122,6 +122,19 @@ macro_rules! test_mask_api {
                 cast_impl::<i64>();
                 cast_impl::<isize>();
             }
+
+            #[test]
+            fn roundtrip_bitmask_array_conversion() {
+                use core_simd::ToBitMaskArray;
+                let values = [
+                    true, false, false, true, false, false, true, false,
+                    true, true, false, false, false, false, false, true,
+                ];
+                let mask = core_simd::Mask::<$type, 16>::from_array(values);
+                let bitmask = mask.to_bitmask_array();
+                assert_eq!(bitmask, [0b01001001, 0b10000011]);
+                assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask_array(bitmask), mask);
+            }
         }
     }
 }

From 1cee9304b3678523ad3e234d9d72fcd1d73fff99 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 21 May 2022 20:08:38 -0400
Subject: [PATCH 23/32] Fix generic_const_exprs feature

---
 crates/core_simd/src/masks.rs            | 4 ++--
 crates/core_simd/src/masks/bitmask.rs    | 4 +++-
 crates/core_simd/src/masks/full_masks.rs | 7 ++++++-
 crates/core_simd/src/masks/to_bitmask.rs | 3 +++
 crates/core_simd/tests/masks.rs          | 1 +
 5 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index e65548a3287..11d7288eccb 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -13,10 +13,10 @@
 mod mask_impl;
 
 mod to_bitmask;
-pub use to_bitmask::{ToBitMask, ToBitMaskArray};
+pub use to_bitmask::ToBitMask;
 
 #[cfg(feature = "generic_const_exprs")]
-pub use to_bitmask::bitmask_len;
+pub use to_bitmask::{bitmask_len, ToBitMaskArray};
 
 use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SimdPartialEq, SupportedLaneCount};
 use core::cmp::Ordering;
diff --git a/crates/core_simd/src/masks/bitmask.rs b/crates/core_simd/src/masks/bitmask.rs
index 2e2c0a45c51..365ecc0a325 100644
--- a/crates/core_simd/src/masks/bitmask.rs
+++ b/crates/core_simd/src/masks/bitmask.rs
@@ -1,7 +1,7 @@
 #![allow(unused_imports)]
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
 use core::marker::PhantomData;
 
 /// A mask where each lane is represented by a single bit.
@@ -115,6 +115,7 @@ where
         unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
     }
 
+    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
     pub fn to_bitmask_array<const N: usize>(self) -> [u8; N] {
@@ -124,6 +125,7 @@ where
         unsafe { core::mem::transmute_copy(&self.0) }
     }
 
+    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_bitmask_array<const N: usize>(bitmask: [u8; N]) -> Self {
diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index b1c3b2b88ad..7ed844de625 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -2,7 +2,10 @@
 
 use super::MaskElement;
 use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask, ToBitMaskArray};
+use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
+
+#[cfg(feature = "generic_const_exprs")]
+use crate::simd::ToBitMaskArray;
 
 #[repr(transparent)]
 pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@@ -139,6 +142,7 @@ where
         unsafe { Mask(intrinsics::simd_cast(self.0)) }
     }
 
+    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new array and does not mutate the original value"]
     pub fn to_bitmask_array<const N: usize>(self) -> [u8; N]
@@ -171,6 +175,7 @@ where
         }
     }
 
+    #[cfg(feature = "generic_const_exprs")]
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_bitmask_array<const N: usize>(mut bitmask: [u8; N]) -> Self
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index ee229fc7a44..954f88ea511 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -38,6 +38,7 @@ pub unsafe trait ToBitMask: Sealed {
 /// # Safety
 /// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in
 /// the mask.
+#[cfg(feature = "generic_const_exprs")]
 pub unsafe trait ToBitMaskArray: Sealed {
     /// The length of the bitmask array.
     const BYTES: usize;
@@ -78,10 +79,12 @@ impl_integer_intrinsic! {
 }
 
 /// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes.
+#[cfg(feature = "generic_const_exprs")]
 pub const fn bitmask_len(lanes: usize) -> usize {
     (lanes + 7) / 8
 }
 
+#[cfg(feature = "generic_const_exprs")]
 unsafe impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
 where
     LaneCount<LANES>: SupportedLaneCount,
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
index 6150124b8ca..673d0db93fe 100644
--- a/crates/core_simd/tests/masks.rs
+++ b/crates/core_simd/tests/masks.rs
@@ -123,6 +123,7 @@ macro_rules! test_mask_api {
                 cast_impl::<isize>();
             }
 
+            #[cfg(feature = "generic_const_exprs")]
             #[test]
             fn roundtrip_bitmask_array_conversion() {
                 use core_simd::ToBitMaskArray;

From bca8dec404c18d9f4ef9fa1ec5f19766910d0c84 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sun, 22 May 2022 01:20:28 -0400
Subject: [PATCH 24/32] Remove incorrect comment

---
 crates/core_simd/src/masks/full_masks.rs | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/crates/core_simd/src/masks/full_masks.rs b/crates/core_simd/src/masks/full_masks.rs
index 7ed844de625..adf0fcbeae2 100644
--- a/crates/core_simd/src/masks/full_masks.rs
+++ b/crates/core_simd/src/masks/full_masks.rs
@@ -153,9 +153,6 @@ where
         assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
 
         // Safety: N is the correct bitmask size
-        //
-        // The transmute below allows this function to be marked safe, since it will prevent
-        // monomorphization errors in the case of an incorrect size.
         unsafe {
             // Compute the bitmask
             let bitmask: [u8; <super::Mask<T, LANES> as ToBitMaskArray>::BYTES] =
@@ -186,9 +183,6 @@ where
         assert_eq!(<super::Mask<T, LANES> as ToBitMaskArray>::BYTES, N);
 
         // Safety: N is the correct bitmask size
-        //
-        // The transmute below allows this function to be marked safe, since it will prevent
-        // monomorphization errors in the case of an incorrect size.
         unsafe {
             // LLVM assumes bit order should match endianness
             if cfg!(target_endian = "big") {

From 05c92c73c181015ed512255abef8377292d5cbf7 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Sat, 21 May 2022 14:55:52 -0400
Subject: [PATCH 25/32] Document remaining internal unsafety, and deny
 undocumented unsafety

---
 crates/core_simd/src/elements/float.rs   |  5 ++++
 crates/core_simd/src/lib.rs              |  2 +-
 crates/core_simd/src/masks.rs            |  1 +
 crates/core_simd/src/masks/to_bitmask.rs | 32 +++++++++---------------
 crates/core_simd/src/ops.rs              |  3 +++
 crates/core_simd/src/ops/unary.rs        |  1 +
 crates/core_simd/src/round.rs            |  2 ++
 crates/core_simd/src/vector.rs           | 26 ++++++++++++++++++-
 8 files changed, 50 insertions(+), 22 deletions(-)

diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index 5a628f2121e..67e4454e5e1 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -202,17 +202,20 @@ macro_rules! impl_trait {
             #[inline]
             fn to_bits(self) -> Simd<$bits_ty, LANES> {
                 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                // Safety: transmuting between vector types is safe
                 unsafe { core::mem::transmute_copy(&self) }
             }
 
             #[inline]
             fn from_bits(bits: Simd<$bits_ty, LANES>) -> Self {
                 assert_eq!(core::mem::size_of::<Self>(), core::mem::size_of::<Self::Bits>());
+                // Safety: transmuting between vector types is safe
                 unsafe { core::mem::transmute_copy(&bits) }
             }
 
             #[inline]
             fn abs(self) -> Self {
+                // Safety: `self` is a float vector
                 unsafe { intrinsics::simd_fabs(self) }
             }
 
@@ -283,11 +286,13 @@ macro_rules! impl_trait {
 
             #[inline]
             fn simd_min(self, other: Self) -> Self {
+                // Safety: `self` and `other` are float vectors
                 unsafe { intrinsics::simd_fmin(self, other) }
             }
 
             #[inline]
             fn simd_max(self, other: Self) -> Self {
+                // Safety: `self` and `other` are floating point vectors
                 unsafe { intrinsics::simd_fmax(self, other) }
             }
 
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
index 2632073622e..715f258f617 100644
--- a/crates/core_simd/src/lib.rs
+++ b/crates/core_simd/src/lib.rs
@@ -12,7 +12,7 @@
 #![cfg_attr(feature = "generic_const_exprs", feature(generic_const_exprs))]
 #![cfg_attr(feature = "generic_const_exprs", allow(incomplete_features))]
 #![warn(missing_docs)]
-#![deny(unsafe_op_in_unsafe_fn)]
+#![deny(unsafe_op_in_unsafe_fn, clippy::undocumented_unsafe_blocks)]
 #![unstable(feature = "portable_simd", issue = "86656")]
 //! Portable SIMD module.
 
diff --git a/crates/core_simd/src/masks.rs b/crates/core_simd/src/masks.rs
index 11d7288eccb..c36c336d8a2 100644
--- a/crates/core_simd/src/masks.rs
+++ b/crates/core_simd/src/masks.rs
@@ -68,6 +68,7 @@ macro_rules! impl_element {
             const FALSE: Self = 0;
         }
 
+        // Safety: this is a valid mask element type
         unsafe impl MaskElement for $ty {}
     }
 }
diff --git a/crates/core_simd/src/masks/to_bitmask.rs b/crates/core_simd/src/masks/to_bitmask.rs
index 954f88ea511..65d3ce9be65 100644
--- a/crates/core_simd/src/masks/to_bitmask.rs
+++ b/crates/core_simd/src/masks/to_bitmask.rs
@@ -16,11 +16,7 @@ where
 /// Converts masks to and from integer bitmasks.
 ///
 /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
-///
-/// # Safety
-/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
-/// the mask.
-pub unsafe trait ToBitMask: Sealed {
+pub trait ToBitMask: Sealed {
     /// The integer bitmask type.
     type BitMask;
 
@@ -34,12 +30,8 @@ pub unsafe trait ToBitMask: Sealed {
 /// Converts masks to and from byte array bitmasks.
 ///
 /// Each bit of the bitmask corresponds to a mask lane, starting with the LSB of the first byte.
-///
-/// # Safety
-/// This trait is `unsafe` and sealed, since the `BYTES` value must match the number of lanes in
-/// the mask.
 #[cfg(feature = "generic_const_exprs")]
-pub unsafe trait ToBitMaskArray: Sealed {
+pub trait ToBitMaskArray: Sealed {
     /// The length of the bitmask array.
     const BYTES: usize;
 
@@ -51,9 +43,9 @@ pub unsafe trait ToBitMaskArray: Sealed {
 }
 
 macro_rules! impl_integer_intrinsic {
-    { $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
+    { $(impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
         $(
-        unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
+        impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
             type BitMask = $int;
 
             fn to_bitmask(self) -> $int {
@@ -69,13 +61,13 @@ macro_rules! impl_integer_intrinsic {
 }
 
 impl_integer_intrinsic! {
-    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 1>
-    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 2>
-    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 4>
-    unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
-    unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
-    unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
-    unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
+    impl ToBitMask<BitMask=u8> for Mask<_, 1>
+    impl ToBitMask<BitMask=u8> for Mask<_, 2>
+    impl ToBitMask<BitMask=u8> for Mask<_, 4>
+    impl ToBitMask<BitMask=u8> for Mask<_, 8>
+    impl ToBitMask<BitMask=u16> for Mask<_, 16>
+    impl ToBitMask<BitMask=u32> for Mask<_, 32>
+    impl ToBitMask<BitMask=u64> for Mask<_, 64>
 }
 
 /// Returns the minimum numnber of bytes in a bitmask with `lanes` lanes.
@@ -85,7 +77,7 @@ pub const fn bitmask_len(lanes: usize) -> usize {
 }
 
 #[cfg(feature = "generic_const_exprs")]
-unsafe impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
+impl<T: MaskElement, const LANES: usize> ToBitMaskArray for Mask<T, LANES>
 where
     LaneCount<LANES>: SupportedLaneCount,
 {
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
index 8dbae346b5f..5a077a469d8 100644
--- a/crates/core_simd/src/ops.rs
+++ b/crates/core_simd/src/ops.rs
@@ -33,6 +33,7 @@ where
 
 macro_rules! unsafe_base {
     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $($_:tt)*) => {
+        // Safety: $lhs and $rhs are vectors
         unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
     };
 }
@@ -49,6 +50,7 @@ macro_rules! unsafe_base {
 macro_rules! wrap_bitshift {
     ($lhs:ident, $rhs:ident, {$simd_call:ident}, $int:ident) => {
         #[allow(clippy::suspicious_arithmetic_impl)]
+        // Safety: $lhs and the bitand result are vectors
         unsafe {
             $crate::simd::intrinsics::$simd_call(
                 $lhs,
@@ -91,6 +93,7 @@ macro_rules! int_divrem_guard {
                 // Nice base case to make it easy to const-fold away the other branch.
                 $rhs
             };
+            // Safety: $lhs and rhs are vectors
             unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
         }
     };
diff --git a/crates/core_simd/src/ops/unary.rs b/crates/core_simd/src/ops/unary.rs
index 4ebea560fc6..4ad02215034 100644
--- a/crates/core_simd/src/ops/unary.rs
+++ b/crates/core_simd/src/ops/unary.rs
@@ -14,6 +14,7 @@ macro_rules! neg {
             #[inline]
             #[must_use = "operator returns a new vector without mutating the input"]
             fn neg(self) -> Self::Output {
+                // Safety: `self` is a signed vector
                 unsafe { intrinsics::simd_neg(self) }
             }
         })*
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
index 556bc2cc1fe..e111f3e0494 100644
--- a/crates/core_simd/src/round.rs
+++ b/crates/core_simd/src/round.rs
@@ -30,6 +30,8 @@ macro_rules! implement {
                 $type: FloatToInt<I>,
                 I: SimdElement,
             {
+                // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to
+                // an integer.
                 unsafe { intrinsics::simd_cast(self) }
             }
         }
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 9eb51f957d1..fac7dca51f4 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -213,7 +213,7 @@ where
     #[inline]
     #[cfg(not(bootstrap))]
     pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
-        // Safety: The input argument is a vector of a known SIMD type.
+        // Safety: The input argument is a vector of a valid SIMD element type.
         unsafe { intrinsics::simd_as(self) }
     }
 
@@ -624,61 +624,85 @@ pub unsafe trait SimdElement: Sealed + Copy {
 }
 
 impl Sealed for u8 {}
+
+// Safety: u8 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for u8 {
     type Mask = i8;
 }
 
 impl Sealed for u16 {}
+
+// Safety: u16 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for u16 {
     type Mask = i16;
 }
 
 impl Sealed for u32 {}
+
+// Safety: u32 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for u32 {
     type Mask = i32;
 }
 
 impl Sealed for u64 {}
+
+// Safety: u64 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for u64 {
     type Mask = i64;
 }
 
 impl Sealed for usize {}
+
+// Safety: usize is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for usize {
     type Mask = isize;
 }
 
 impl Sealed for i8 {}
+
+// Safety: i8 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for i8 {
     type Mask = i8;
 }
 
 impl Sealed for i16 {}
+
+// Safety: i16 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for i16 {
     type Mask = i16;
 }
 
 impl Sealed for i32 {}
+
+// Safety: i32 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for i32 {
     type Mask = i32;
 }
 
 impl Sealed for i64 {}
+
+// Safety: i64 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for i64 {
     type Mask = i64;
 }
 
 impl Sealed for isize {}
+
+// Safety: isize is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for isize {
     type Mask = isize;
 }
 
 impl Sealed for f32 {}
+
+// Safety: f32 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for f32 {
     type Mask = i32;
 }
 
 impl Sealed for f64 {}
+
+// Safety: f64 is a valid SIMD element type, and is supported by this API
 unsafe impl SimdElement for f64 {
     type Mask = i64;
 }

From 5562b02ff059fd519d59c1fb7873bdd386efc22e Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Thu, 2 Jun 2022 10:19:20 -0400
Subject: [PATCH 26/32] Deduplicate to_int_unchecked

---
 crates/core_simd/src/mod.rs    |  1 -
 crates/core_simd/src/round.rs  | 42 ----------------------------------
 crates/core_simd/src/vector.rs | 25 ++++++++++++++++++++
 3 files changed, 25 insertions(+), 43 deletions(-)
 delete mode 100644 crates/core_simd/src/round.rs

diff --git a/crates/core_simd/src/mod.rs b/crates/core_simd/src/mod.rs
index 590b2e4a153..b472aa3abe2 100644
--- a/crates/core_simd/src/mod.rs
+++ b/crates/core_simd/src/mod.rs
@@ -14,7 +14,6 @@ mod lane_count;
 mod masks;
 mod ops;
 mod ord;
-mod round;
 mod select;
 mod vector;
 mod vendor;
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
deleted file mode 100644
index e111f3e0494..00000000000
--- a/crates/core_simd/src/round.rs
+++ /dev/null
@@ -1,42 +0,0 @@
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
-use core::convert::FloatToInt;
-
-macro_rules! implement {
-    {
-        $type:ty
-    } => {
-        impl<const LANES: usize> Simd<$type, LANES>
-        where
-            LaneCount<LANES>: SupportedLaneCount,
-        {
-            /// Rounds toward zero and converts to the same-width integer type, assuming that
-            /// the value is finite and fits in that type.
-            ///
-            /// # Safety
-            /// The value must:
-            ///
-            /// * Not be NaN
-            /// * Not be infinite
-            /// * Be representable in the return type, after truncating off its fractional part
-            ///
-            /// If these requirements are infeasible or costly, consider using the safe function [cast],
-            /// which saturates on conversion.
-            ///
-            /// [cast]: Simd::cast
-            #[inline]
-            pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
-            where
-                $type: FloatToInt<I>,
-                I: SimdElement,
-            {
-                // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to
-                // an integer.
-                unsafe { intrinsics::simd_cast(self) }
-            }
-        }
-    }
-}
-
-implement! { f32 }
-implement! { f64 }
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index fac7dca51f4..7433a695da9 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -217,6 +217,31 @@ where
         unsafe { intrinsics::simd_as(self) }
     }
 
+    /// Rounds toward zero and converts to the same-width integer type, assuming that
+    /// the value is finite and fits in that type.
+    ///
+    /// # Safety
+    /// The value must:
+    ///
+    /// * Not be NaN
+    /// * Not be infinite
+    /// * Be representable in the return type, after truncating off its fractional part
+    ///
+    /// If these requirements are infeasible or costly, consider using the safe function [cast],
+    /// which saturates on conversion.
+    ///
+    /// [cast]: Simd::cast
+    #[inline]
+    pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
+    where
+        T: core::convert::FloatToInt<I>,
+        I: SimdElement,
+    {
+        // Safety: `self` is a vector, and `FloatToInt` ensures the type can be casted to
+        // an integer.
+        unsafe { intrinsics::simd_cast(self) }
+    }
+
     /// Reads from potentially discontiguous indices in `slice` to construct a SIMD vector.
     /// If an index is out-of-bounds, the lane is instead selected from the `or` vector.
     ///

From c9636158d91dda241eca5a729fcb7e2a45d7a950 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Mon, 6 Jun 2022 12:16:17 -0700
Subject: [PATCH 27/32] Change `Simd::splat` to not generate a loop

This fixes poor codegen in some circumstances for `u16x8::splat` on x86_64
https://rust-lang.zulipchat.com/#narrow/stream/257879-project-portable-simd/topic/Very.20bad.20.60u16x8.3A.3Asplat.60.20codegen.20on.20x86_64
---
 crates/core_simd/src/vector.rs | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index fac7dca51f4..8379135826d 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -9,8 +9,9 @@ pub use uint::*;
 // Vectors of pointers are not for public use at the current time.
 pub(crate) mod ptr;
 
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount};
+use crate::simd::{
+    intrinsics, LaneCount, Mask, MaskElement, SimdPartialOrd, SupportedLaneCount, Swizzle,
+};
 
 /// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
 ///
@@ -123,8 +124,12 @@ where
     /// let v = u32x4::splat(8);
     /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
     /// ```
-    pub const fn splat(value: T) -> Self {
-        Self([value; LANES])
+    pub fn splat(value: T) -> Self {
+        struct Splat;
+        impl<const LANES: usize> Swizzle<1, LANES> for Splat {
+            const INDEX: [usize; LANES] = [0; LANES];
+        }
+        Splat::swizzle(Simd::<T, 1>::from([value]))
     }
 
     /// Returns an array reference containing the entire SIMD vector.

From f7412ad7b918578864f4d9a0fc24279f7fbebc31 Mon Sep 17 00:00:00 2001
From: Jacob Lifshay <programmerjake@gmail.com>
Date: Mon, 6 Jun 2022 12:43:29 -0700
Subject: [PATCH 28/32] add workaround comment in `Simd::splat`

---
 crates/core_simd/src/vector.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 8379135826d..19bf45385db 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -125,6 +125,8 @@ where
     /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
     /// ```
     pub fn splat(value: T) -> Self {
+        // This is a workaround for `[value; LANES]` generating a loop:
+        // https://github.com/rust-lang/rust/issues/97804
         struct Splat;
         impl<const LANES: usize> Swizzle<1, LANES> for Splat {
             const INDEX: [usize; LANES] = [0; LANES];

From ed8092e96bb5ad10f7242589f2c263746adafa35 Mon Sep 17 00:00:00 2001
From: Caleb Zulawski <caleb.zulawski@gmail.com>
Date: Tue, 21 Jun 2022 20:52:43 -0400
Subject: [PATCH 29/32] Clarify comment

---
 crates/core_simd/src/vector.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index c6f588672b4..761151ab8b2 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -125,7 +125,7 @@ where
     /// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
     /// ```
     pub fn splat(value: T) -> Self {
-        // This is a workaround for `[value; LANES]` generating a loop:
+        // This is preferred over `[value; LANES]`, since it's explicitly a splat:
         // https://github.com/rust-lang/rust/issues/97804
         struct Splat;
         impl<const LANES: usize> Swizzle<1, LANES> for Splat {

From 64bef2910be17ca75ced3f0a99b4584f69114c74 Mon Sep 17 00:00:00 2001
From: Ralf Jung <post@ralfj.de>
Date: Tue, 12 Apr 2022 11:01:22 -0400
Subject: [PATCH 30/32] portable-simd: use simd_arith_offset to avoid ptr-int
 transmutation

---
 crates/core_simd/src/intrinsics.rs |  4 ++++
 crates/core_simd/src/vector/ptr.rs | 11 +++++++++++
 2 files changed, 15 insertions(+)

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index ee7408b62de..a1de8474fb2 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -61,6 +61,10 @@ extern "platform-intrinsic" {
     /// xor
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
+    /// getelementptr (without inbounds)
+    #[cfg(not(bootstrap))]
+    pub(crate) fn simd_arith_offset<T, U>(ptrs: T, offsets: U) -> T;
+
     /// fptoui/fptosi/uitofp/sitofp
     /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
     /// but the truncated value must fit in the target type or the result is poison.
diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs
index 417d255c28d..68a9c67f795 100644
--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@@ -1,5 +1,8 @@
 //! Private implementation details of public gather/scatter APIs.
+#[cfg(not(bootstrap))]
+use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SupportedLaneCount};
+#[cfg(bootstrap)]
 use core::mem;
 
 /// A vector of *const T.
@@ -21,12 +24,16 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        #[cfg(bootstrap)]
         // Safety: converting pointers to usize and vice-versa is safe
         // (even if using that pointer is not)
         unsafe {
             let x: Simd<usize, LANES> = mem::transmute_copy(&self);
             mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
         }
+        #[cfg(not(bootstrap))]
+        // Safety: this intrinsic doesn't have a precondition
+        unsafe { intrinsics::simd_arith_offset(self, addend) }
     }
 }
 
@@ -49,11 +56,15 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
+        #[cfg(bootstrap)]
         // Safety: converting pointers to usize and vice-versa is safe
         // (even if using that pointer is not)
         unsafe {
             let x: Simd<usize, LANES> = mem::transmute_copy(&self);
             mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
         }
+        #[cfg(not(bootstrap))]
+        // Safety: this intrinsic doesn't have a precondition
+        unsafe { intrinsics::simd_arith_offset(self, addend) }
     }
 }

From bbf31f9c78ad35ad1ef8fa91146e31cb447662b9 Mon Sep 17 00:00:00 2001
From: Mark Rousskov <mark.simulacrum@gmail.com>
Date: Fri, 20 May 2022 08:54:10 -0400
Subject: [PATCH 31/32] Finish bumping stage0

It looks like the last time had left some remaining cfg's -- which made me think
that the stage0 bump was actually successful. This brings us to a released 1.62
beta though.
---
 crates/core_simd/src/intrinsics.rs |  1 -
 crates/core_simd/src/vector/ptr.rs | 19 -------------------
 2 files changed, 20 deletions(-)

diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
index a1de8474fb2..6047890a093 100644
--- a/crates/core_simd/src/intrinsics.rs
+++ b/crates/core_simd/src/intrinsics.rs
@@ -62,7 +62,6 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
     /// getelementptr (without inbounds)
-    #[cfg(not(bootstrap))]
     pub(crate) fn simd_arith_offset<T, U>(ptrs: T, offsets: U) -> T;
 
     /// fptoui/fptosi/uitofp/sitofp
diff --git a/crates/core_simd/src/vector/ptr.rs b/crates/core_simd/src/vector/ptr.rs
index 68a9c67f795..fa756344db9 100644
--- a/crates/core_simd/src/vector/ptr.rs
+++ b/crates/core_simd/src/vector/ptr.rs
@@ -1,9 +1,6 @@
 //! Private implementation details of public gather/scatter APIs.
-#[cfg(not(bootstrap))]
 use crate::simd::intrinsics;
 use crate::simd::{LaneCount, Simd, SupportedLaneCount};
-#[cfg(bootstrap)]
-use core::mem;
 
 /// A vector of *const T.
 #[derive(Debug, Copy, Clone)]
@@ -24,14 +21,6 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
-        #[cfg(bootstrap)]
-        // Safety: converting pointers to usize and vice-versa is safe
-        // (even if using that pointer is not)
-        unsafe {
-            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
-            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
-        }
-        #[cfg(not(bootstrap))]
         // Safety: this intrinsic doesn't have a precondition
         unsafe { intrinsics::simd_arith_offset(self, addend) }
     }
@@ -56,14 +45,6 @@ where
     #[inline]
     #[must_use]
     pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
-        #[cfg(bootstrap)]
-        // Safety: converting pointers to usize and vice-versa is safe
-        // (even if using that pointer is not)
-        unsafe {
-            let x: Simd<usize, LANES> = mem::transmute_copy(&self);
-            mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
-        }
-        #[cfg(not(bootstrap))]
         // Safety: this intrinsic doesn't have a precondition
         unsafe { intrinsics::simd_arith_offset(self, addend) }
     }

From 2e081db92aa3ee0a4563bc28ce01bdad5b1b2efd Mon Sep 17 00:00:00 2001
From: The Atelier <workingjubilee@gmail.com>
Date: Wed, 20 Jul 2022 17:23:46 -0700
Subject: [PATCH 32/32] Fix doctest imports using as_crate feature

Within core, `use self::` does not work to import these items.
And because core is not core_simd, neither does the existing `use`.
So, use this quirky hack instead, switching the import on a feature.
---
 crates/core_simd/Cargo.toml            |  3 ++-
 crates/core_simd/src/elements/float.rs | 16 +++++++++---
 crates/core_simd/src/elements/int.rs   | 36 +++++++++++++++++++-------
 crates/core_simd/src/elements/uint.rs  |  8 ++++--
 crates/core_simd/src/vector.rs         | 14 +++++++---
 5 files changed, 57 insertions(+), 20 deletions(-)

diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml
index 8877c6df66e..8a29cf15696 100644
--- a/crates/core_simd/Cargo.toml
+++ b/crates/core_simd/Cargo.toml
@@ -9,7 +9,8 @@ categories = ["hardware-support", "no-std"]
 license = "MIT OR Apache-2.0"
 
 [features]
-default = []
+default = ["as_crate"]
+as_crate = []
 std = []
 generic_const_exprs = []
 
diff --git a/crates/core_simd/src/elements/float.rs b/crates/core_simd/src/elements/float.rs
index 67e4454e5e1..d6022327055 100644
--- a/crates/core_simd/src/elements/float.rs
+++ b/crates/core_simd/src/elements/float.rs
@@ -113,7 +113,9 @@ pub trait SimdFloat: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::f32x2;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
     /// let v = f32x2::from_array([1., 2.]);
     /// assert_eq!(v.reduce_sum(), 3.);
     /// ```
@@ -125,7 +127,9 @@ pub trait SimdFloat: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::f32x2;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
     /// let v = f32x2::from_array([3., 4.]);
     /// assert_eq!(v.reduce_product(), 12.);
     /// ```
@@ -142,7 +146,9 @@ pub trait SimdFloat: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::f32x2;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
     /// let v = f32x2::from_array([1., 2.]);
     /// assert_eq!(v.reduce_max(), 2.);
     ///
@@ -167,7 +173,9 @@ pub trait SimdFloat: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::f32x2;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{f32x2, SimdFloat};
     /// let v = f32x2::from_array([3., 7.]);
     /// assert_eq!(v.reduce_min(), 3.);
     ///
diff --git a/crates/core_simd/src/elements/int.rs b/crates/core_simd/src/elements/int.rs
index 787a0741146..9b8c37ed466 100644
--- a/crates/core_simd/src/elements/int.rs
+++ b/crates/core_simd/src/elements/int.rs
@@ -16,7 +16,9 @@ pub trait SimdInt: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, 0, 1, MAX]);
     /// let max = Simd::splat(MAX);
@@ -32,7 +34,9 @@ pub trait SimdInt: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, -2, -1, MAX]);
     /// let max = Simd::splat(MAX);
@@ -48,7 +52,9 @@ pub trait SimdInt: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
     /// use core::i32::{MIN, MAX};
     /// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
     /// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
@@ -61,7 +67,9 @@ pub trait SimdInt: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
     /// use core::i32::{MIN, MAX};
     /// let xs = Simd::from_array([MIN, -2, 0, 3]);
     /// let unsat = xs.abs();
@@ -77,7 +85,9 @@ pub trait SimdInt: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdInt};
     /// use core::i32::{MIN, MAX};
     /// let x = Simd::from_array([MIN, -2, 3, MAX]);
     /// let unsat = -x;
@@ -105,7 +115,9 @@ pub trait SimdInt: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::i32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_sum(), 10);
     ///
@@ -121,7 +133,9 @@ pub trait SimdInt: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::i32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_product(), 24);
     ///
@@ -137,7 +151,9 @@ pub trait SimdInt: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::i32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_max(), 4);
     /// ```
@@ -149,7 +165,9 @@ pub trait SimdInt: Copy + Sealed {
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::i32x4;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{i32x4, SimdInt};
     /// let v = i32x4::from_array([1, 2, 3, 4]);
     /// assert_eq!(v.reduce_min(), 1);
     /// ```
diff --git a/crates/core_simd/src/elements/uint.rs b/crates/core_simd/src/elements/uint.rs
index f9d43a1d19b..21e7e76eb3d 100644
--- a/crates/core_simd/src/elements/uint.rs
+++ b/crates/core_simd/src/elements/uint.rs
@@ -11,7 +11,9 @@ pub trait SimdUint: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdUint};
     /// use core::u32::MAX;
     /// let x = Simd::from_array([2, 1, 0, MAX]);
     /// let max = Simd::splat(MAX);
@@ -27,7 +29,9 @@ pub trait SimdUint: Copy + Sealed {
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::Simd;
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdUint};
     /// use core::u32::MAX;
     /// let x = Simd::from_array([2, 1, 0, MAX]);
     /// let max = Simd::splat(MAX);
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
index 761151ab8b2..8661be938d5 100644
--- a/crates/core_simd/src/vector.rs
+++ b/crates/core_simd/src/vector.rs
@@ -173,7 +173,7 @@ where
     ///
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core::simd::{Simd, u32x4};
+    /// # use core::simd::u32x4;
     /// let source = vec![1, 2, 3, 4, 5, 6];
     /// let v = u32x4::from_slice(&source);
     /// assert_eq!(v.as_array(), &[1, 2, 3, 4]);
@@ -332,7 +332,9 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask};
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdPartialOrd, Mask};
     /// let vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 5]);
     /// let alt = Simd::from_array([-5, -4, -3, -2]);
@@ -389,7 +391,9 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core_simd::simd::{Simd, Mask};
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, Mask};
     /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 0]);
     /// let vals = Simd::from_array([-27, 82, -41, 124]);
@@ -423,7 +427,9 @@ where
     /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
-    /// # use core_simd::simd::{Simd, SimdPartialOrd, Mask};
+    /// # #[cfg(feature = "as_crate")] use core_simd::simd;
+    /// # #[cfg(not(feature = "as_crate"))] use core::simd;
+    /// # use simd::{Simd, SimdPartialOrd, Mask};
     /// let mut vec: Vec<i32> = vec![10, 11, 12, 13, 14, 15, 16, 17, 18];
     /// let idxs = Simd::from_array([9, 3, 0, 0]);
     /// let vals = Simd::from_array([-27, 82, -41, 124]);