Merge commit '3383cfbd3572465febc7a8f816a46304373de46a' into sync-from-portable-simd-2025-01-18

This commit is contained in:
Caleb Zulawski 2025-01-18 15:37:14 -05:00
commit 1ff6c555bb
31 changed files with 865 additions and 388 deletions

View File

@ -9,6 +9,7 @@ on:
env:
CARGO_NET_RETRY: 10
RUSTUP_MAX_RETRIES: 10
PROPTEST_CASES: 64
jobs:
rustfmt:
@ -16,12 +17,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
rustup component add rustfmt
- uses: actions/checkout@v4
- name: Run rustfmt
run: cargo fmt --all -- --check
@ -37,7 +33,9 @@ jobs:
- i686-unknown-linux-gnu
- i586-unknown-linux-gnu
- aarch64-unknown-linux-gnu
- arm64ec-pc-windows-msvc
- armv7-unknown-linux-gnueabihf
- loongarch64-unknown-linux-gnu
# non-nightly since https://github.com/rust-lang/rust/pull/113274
# - mips-unknown-linux-gnu
# - mips64-unknown-linux-gnuabi64
@ -49,13 +47,9 @@ jobs:
- wasm32-unknown-unknown
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
rustup target add ${{ matrix.target }}
rustup component add clippy
run: rustup target add ${{ matrix.target }}
- name: Run Clippy
run: cargo clippy --all-targets --target ${{ matrix.target }}
@ -65,26 +59,19 @@ jobs:
strategy:
fail-fast: false
matrix:
target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu, x86_64-apple-darwin]
target: [x86_64-pc-windows-msvc, i686-pc-windows-msvc, i586-pc-windows-msvc, x86_64-unknown-linux-gnu]
# `default` means we use the default target config for the target,
# `native` means we run with `-Ctarget-cpu=native`, and anything else is
# an arg to `-Ctarget-feature`
target_feature: [default, native, +sse3, +ssse3, +sse4.1, +sse4.2, +avx, +avx2]
exclude:
# The macos runners seem to only reliably support up to `avx`.
- { target: x86_64-apple-darwin, target_feature: +avx2 }
# These features are statically known to be present for all 64 bit
# macs, and thus are covered by the `default` test
- { target: x86_64-apple-darwin, target_feature: +sse3 }
- { target: x86_64-apple-darwin, target_feature: +ssse3 }
# -Ctarget-cpu=native sounds like bad-news if target != host
- { target: i686-pc-windows-msvc, target_feature: native }
- { target: i586-pc-windows-msvc, target_feature: native }
include:
# Populate the `matrix.os` field
- { target: x86_64-apple-darwin, os: macos-latest }
- { target: x86_64-unknown-linux-gnu, os: ubuntu-latest }
- { target: x86_64-pc-windows-msvc, os: windows-latest }
- { target: i686-pc-windows-msvc, os: windows-latest }
@ -98,12 +85,9 @@ jobs:
# avx512vl, but occasionally doesn't. Maybe one day we can enable it.
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
rustup target add ${{ matrix.target }}
run: rustup target add ${{ matrix.target }}
- name: Configure RUSTFLAGS
shell: bash
@ -145,6 +129,35 @@ jobs:
run: cargo doc --verbose --target=${{ matrix.target }}
env:
RUSTDOCFLAGS: -Dwarnings
macos-tests:
name: ${{ matrix.target }}
runs-on: macos-latest
strategy:
fail-fast: false
matrix:
target:
- aarch64-apple-darwin
- x86_64-apple-darwin
steps:
- uses: actions/checkout@v4
- name: Setup Rust
run: rustup target add ${{ matrix.target }}
- name: Configure RUSTFLAGS
shell: bash
run: echo "RUSTFLAGS=-Dwarnings" >> $GITHUB_ENV
- name: Test (debug)
run: cargo test --verbose --target=${{ matrix.target }}
- name: Test (release)
run: cargo test --verbose --target=${{ matrix.target }} --release
- name: Generate docs
run: cargo doc --verbose --target=${{ matrix.target }}
env:
RUSTDOCFLAGS: -Dwarnings
wasm-tests:
name: "wasm (firefox, ${{ matrix.name }})"
@ -155,11 +168,7 @@ jobs:
- { name: default, RUSTFLAGS: "" }
- { name: simd128, RUSTFLAGS: "-C target-feature=+simd128" }
steps:
- uses: actions/checkout@v2
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
- uses: actions/checkout@v4
- name: Install wasm-pack
run: curl https://rustwasm.github.io/wasm-pack/installer/init.sh -sSf | sh
- name: Test (debug)
@ -174,6 +183,8 @@ jobs:
cross-tests:
name: "${{ matrix.target_feature }} on ${{ matrix.target }} (via cross)"
runs-on: ubuntu-latest
env:
PROPTEST_CASES: 16
strategy:
fail-fast: false
@ -185,6 +196,7 @@ jobs:
- powerpc-unknown-linux-gnu
- powerpc64le-unknown-linux-gnu # includes altivec by default
- riscv64gc-unknown-linux-gnu
- loongarch64-unknown-linux-gnu
# MIPS uses a nonstandard binary representation for NaNs which makes it worth testing
# non-nightly since https://github.com/rust-lang/rust/pull/113274
# - mips-unknown-linux-gnu
@ -201,24 +213,14 @@ jobs:
# - { target: riscv64gc-unknown-linux-gnu, target_feature: "+v,+zvl128b" }
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
rustup target add ${{ matrix.target }}
rustup component add rust-src
run: rustup target add ${{ matrix.target }}
- name: Install Cross
# Equivalent to `cargo install cross`, but downloading a prebuilt
# binary. Ideally we wouldn't hardcode a version, but the version number
# being part of the tarball means we can't just use the download/latest
# URL :(
# Install the latest git version for newer targets.
run: |
CROSS_URL=https://github.com/cross-rs/cross/releases/download/v0.2.5/cross-x86_64-unknown-linux-gnu.tar.gz
mkdir -p "$HOME/.bin"
curl -sfSL --retry-delay 10 --retry 5 "${CROSS_URL}" | tar zxf - -C "$HOME/.bin"
echo "$HOME/.bin" >> $GITHUB_PATH
cargo install cross --git https://github.com/cross-rs/cross --rev 4090beca3cfffa44371a5bba524de3a578aa46c3
- name: Configure Emulated CPUs
run: |
@ -242,34 +244,11 @@ jobs:
- name: Test (release)
run: cross test --verbose --target=${{ matrix.target }} --release
features:
name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
miri:
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
simd:
- ""
- "avx512"
features:
- ""
- "--features std"
- "--features all_lane_counts"
- "--all-features"
env:
PROPTEST_CASES: 16
steps:
- uses: actions/checkout@v2
- name: Setup Rust
run: |
rustup update nightly --no-self-update
rustup default nightly
- name: Detect AVX512
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
- name: Check build
if: ${{ matrix.simd == '' }}
run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
- name: Check AVX
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
run: |
echo "Found AVX features: $CPU_FEATURE"
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}
- uses: actions/checkout@v4
- name: Test (Miri)
run: cargo miri test

View File

@ -12,7 +12,7 @@ jobs:
steps:
- name: Checkout Repository
uses: actions/checkout@v1
uses: actions/checkout@v4
- name: Setup Rust
run: |

View File

@ -1 +1,2 @@
/target
git-subtree.sh

View File

@ -5,3 +5,9 @@ members = [
"crates/std_float",
"crates/test_helpers",
]
[profile.test.package."*"]
opt-level = 2
[profile.test.package.test_helpers]
opt-level = 2

View File

@ -0,0 +1,2 @@
[build.env]
passthrough = ["PROPTEST_CASES"]

View File

@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
license = "MIT OR Apache-2.0"
[features]
default = ["as_crate"]
default = ["as_crate", "std"]
as_crate = []
std = []
all_lane_counts = []
[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen = "0.2"

View File

@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
};
}
supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
#[cfg(feature = "all_lane_counts")]
supported_lane_count!(
3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
);

View File

@ -1,7 +1,6 @@
#![no_std]
#![feature(
const_refs_to_cell,
const_mut_refs,
const_eval_select,
convert_float_to_int,
core_intrinsics,
decl_macro,
@ -26,6 +25,7 @@
all(target_arch = "arm", target_feature = "v7"),
feature(stdarch_arm_neon_intrinsics)
)]
#![cfg_attr(target_arch = "loongarch64", feature(stdarch_loongarch))]
#![cfg_attr(
any(target_arch = "powerpc", target_arch = "powerpc64"),
feature(stdarch_powerpc)

View File

@ -308,48 +308,6 @@ where
Self(mask_impl::Mask::from_bitmask_integer(bitmask))
}
/// Creates a bitmask vector from a mask.
///
/// Each bit is set if the corresponding element in the mask is `true`.
/// The remaining bits are unset.
///
/// The bits are packed into the first N bits of the vector:
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x8;
/// let mask = mask32x8::from_array([true, false, true, false, false, false, true, false]);
/// assert_eq!(mask.to_bitmask_vector()[0], 0b01000101);
/// ```
#[inline]
#[must_use = "method returns a new integer and does not mutate the original value"]
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
self.0.to_bitmask_vector()
}
/// Creates a mask from a bitmask vector.
///
/// For each bit, if it is set, the corresponding element in the mask is set to `true`.
///
/// The bits are packed into the first N bits of the vector:
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::{mask32x8, u8x8};
/// let bitmask = u8x8::from_array([0b01000101, 0, 0, 0, 0, 0, 0, 0]);
/// assert_eq!(
/// mask32x8::from_bitmask_vector(bitmask),
/// mask32x8::from_array([true, false, true, false, false, false, true, false]),
/// );
/// ```
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
Self(mask_impl::Mask::from_bitmask_vector(bitmask))
}
/// Finds the index of the first set element.
///
/// ```

View File

@ -122,23 +122,6 @@ where
unsafe { Self(core::intrinsics::simd::simd_bitmask(value), PhantomData) }
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
let mut bitmask = Simd::splat(0);
bitmask.as_mut_array()[..self.0.as_ref().len()].copy_from_slice(self.0.as_ref());
bitmask
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
let len = bytes.as_ref().len();
bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
Self(bytes, PhantomData)
}
#[inline]
pub fn to_bitmask_integer(self) -> u64 {
let mut bitmask = [0u8; 8];

View File

@ -140,62 +140,6 @@ where
unsafe { Mask(core::intrinsics::simd::simd_cast(self.0)) }
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub fn to_bitmask_vector(self) -> Simd<u8, N> {
let mut bitmask = Simd::splat(0);
// Safety: Bytes is the right size array
unsafe {
// Compute the bitmask
let mut bytes: <LaneCount<N> as SupportedLaneCount>::BitMask =
core::intrinsics::simd::simd_bitmask(self.0);
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
for x in bytes.as_mut() {
*x = x.reverse_bits()
}
if N % 8 > 0 {
bytes.as_mut()[N / 8] >>= 8 - N % 8;
}
}
bitmask.as_mut_array()[..bytes.as_ref().len()].copy_from_slice(bytes.as_ref());
}
bitmask
}
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask_vector(bitmask: Simd<u8, N>) -> Self {
let mut bytes = <LaneCount<N> as SupportedLaneCount>::BitMask::default();
// Safety: Bytes is the right size array
unsafe {
let len = bytes.as_ref().len();
bytes.as_mut().copy_from_slice(&bitmask.as_array()[..len]);
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
for x in bytes.as_mut() {
*x = x.reverse_bits();
}
if N % 8 > 0 {
bytes.as_mut()[N / 8] >>= 8 - N % 8;
}
}
// Compute the regular mask
Self::from_int_unchecked(core::intrinsics::simd::simd_select_bitmask(
bytes,
Self::splat(true).to_int(),
Self::splat(false).to_int(),
))
}
}
#[inline]
unsafe fn to_bitmask_impl<U: ReverseBits, const M: usize>(self) -> U
where
@ -283,7 +227,7 @@ where
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn all(self) -> bool {
// Safety: use `self` as an integer vector
unsafe { core::intrinsics::simd::simd_reduce_all(self.to_int()) }

View File

@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
( $lhs:ident,
$rhs:ident,
{ const PANIC_ZERO: &'static str = $zero:literal;
$simd_call:ident
$simd_call:ident, $op:tt
},
$int:ident ) => {
if $rhs.simd_eq(Simd::splat(0 as _)).any() {
@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
// Nice base case to make it easy to const-fold away the other branch.
$rhs
};
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
// aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
// these operations aren't vectorized on aarch64 anyway
#[cfg(target_arch = "aarch64")]
{
let mut out = Simd::splat(0 as _);
for i in 0..Self::LEN {
out[i] = $lhs[i] $op rhs[i];
}
out
}
#[cfg(not(target_arch = "aarch64"))]
{
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
}
}
};
}
@ -205,14 +220,14 @@ for_base_ops! {
impl Div::div {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to divide by zero";
simd_div
simd_div, /
}
}
impl Rem::rem {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
simd_rem
simd_rem, %
}
}

View File

@ -12,7 +12,7 @@ pub trait SimdPartialEq {
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_eq(self, other: Self) -> Self::Mask;
/// Test if each element is equal to the corresponding element in `other`.
/// Test if each element is not equal to the corresponding element in `other`.
#[must_use = "method returns a new mask and does not mutate the original value"]
fn simd_ne(self, other: Self) -> Self::Mask;
}

View File

@ -255,6 +255,7 @@ macro_rules! impl_trait {
type Bits = Simd<$bits_ty, N>;
type Cast<T: SimdElement> = Simd<T, N>;
#[cfg(not(target_arch = "aarch64"))]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
@ -262,6 +263,33 @@ macro_rules! impl_trait {
unsafe { core::intrinsics::simd::simd_as(self) }
}
// https://github.com/llvm/llvm-project/issues/94694
#[cfg(target_arch = "aarch64")]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
const { assert!(N <= 64) };
if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
// Safety: supported types are guaranteed by SimdCast
unsafe { core::intrinsics::simd::simd_as(self) }
} else if N < 4 {
let x = self.resize::<4>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 8 {
let x = self.resize::<8>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 16 {
let x = self.resize::<16>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 32 {
let x = self.resize::<32>(Default::default()).cast();
x.resize::<N>(x[0])
} else {
let x = self.resize::<64>(Default::default()).cast();
x.resize::<N>(x[0])
}
}
#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
@ -391,7 +419,7 @@ macro_rules! impl_trait {
self.as_array().iter().sum()
} else {
// Safety: `self` is a float vector
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, 0.) }
unsafe { core::intrinsics::simd::simd_reduce_add_ordered(self, -0.) }
}
}

View File

@ -1,6 +1,6 @@
use super::sealed::Sealed;
use crate::simd::{
cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
cmp::SimdOrd, cmp::SimdPartialOrd, num::SimdUint, LaneCount, Mask, Simd, SimdCast, SimdElement,
SupportedLaneCount,
};
@ -70,11 +70,27 @@ pub trait SimdInt: Copy + Sealed {
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let xs = Simd::from_array([MIN, MIN +1, -5, 0]);
/// let xs = Simd::from_array([MIN, MIN + 1, -5, 0]);
/// assert_eq!(xs.abs(), Simd::from_array([MIN, MAX, 5, 0]));
/// ```
fn abs(self) -> Self;
/// Lanewise absolute difference.
/// Every element becomes the absolute difference of `self` and `second`.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::prelude::*;
/// use core::i32::{MIN, MAX};
/// let a = Simd::from_array([MIN, MAX, 100, -100]);
/// let b = Simd::from_array([MAX, MIN, -80, -120]);
/// assert_eq!(a.abs_diff(b), Simd::from_array([u32::MAX, u32::MAX, 180, 20]));
/// ```
fn abs_diff(self, second: Self) -> Self::Unsigned;
/// Lanewise saturating absolute value, implemented in Rust.
/// As abs(), except the MIN value becomes MAX instead of itself.
///
@ -203,6 +219,12 @@ pub trait SimdInt: Copy + Sealed {
/// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
fn reverse_bits(self) -> Self;
/// Returns the number of ones in the binary representation of each element.
fn count_ones(self) -> Self::Unsigned;
/// Returns the number of zeros in the binary representation of each element.
fn count_zeros(self) -> Self::Unsigned;
/// Returns the number of leading zeros in the binary representation of each element.
fn leading_zeros(self) -> Self::Unsigned;
@ -259,6 +281,13 @@ macro_rules! impl_trait {
(self^m) - m
}
#[inline]
fn abs_diff(self, second: Self) -> Self::Unsigned {
let max = self.simd_max(second);
let min = self.simd_min(second);
(max - min).cast()
}
#[inline]
fn saturating_abs(self) -> Self {
// arith shift for -1 or 0 mask based on sign bit, giving 2s complement
@ -344,6 +373,16 @@ macro_rules! impl_trait {
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
}
#[inline]
fn count_ones(self) -> Self::Unsigned {
self.cast::<$unsigned>().count_ones()
}
#[inline]
fn count_zeros(self) -> Self::Unsigned {
self.cast::<$unsigned>().count_zeros()
}
#[inline]
fn leading_zeros(self) -> Self::Unsigned {
self.cast::<$unsigned>().leading_zeros()

View File

@ -1,5 +1,5 @@
use super::sealed::Sealed;
use crate::simd::{LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
use crate::simd::{cmp::SimdOrd, LaneCount, Simd, SimdCast, SimdElement, SupportedLaneCount};
/// Operations on SIMD vectors of unsigned integers.
pub trait SimdUint: Copy + Sealed {
@ -57,6 +57,22 @@ pub trait SimdUint: Copy + Sealed {
/// assert_eq!(sat, Simd::splat(0));
fn saturating_sub(self, second: Self) -> Self;
/// Lanewise absolute difference.
/// Every element becomes the absolute difference of `self` and `second`.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::prelude::*;
/// use core::u32::MAX;
/// let a = Simd::from_array([0, MAX, 100, 20]);
/// let b = Simd::from_array([MAX, 0, 80, 200]);
/// assert_eq!(a.abs_diff(b), Simd::from_array([MAX, MAX, 20, 180]));
/// ```
fn abs_diff(self, second: Self) -> Self;
/// Returns the sum of the elements of the vector, with wrapping addition.
fn reduce_sum(self) -> Self::Scalar;
@ -85,6 +101,12 @@ pub trait SimdUint: Copy + Sealed {
/// The least significant bit becomes the most significant bit, second least-significant bit becomes second most-significant bit, etc.
fn reverse_bits(self) -> Self;
/// Returns the number of ones in the binary representation of each element.
fn count_ones(self) -> Self;
/// Returns the number of zeros in the binary representation of each element.
fn count_zeros(self) -> Self;
/// Returns the number of leading zeros in the binary representation of each element.
fn leading_zeros(self) -> Self;
@ -138,6 +160,13 @@ macro_rules! impl_trait {
unsafe { core::intrinsics::simd::simd_saturating_sub(self, second) }
}
#[inline]
fn abs_diff(self, second: Self) -> Self {
let max = self.simd_max(second);
let min = self.simd_min(second);
max - min
}
#[inline]
fn reduce_sum(self) -> Self::Scalar {
// Safety: `self` is an integer vector
@ -192,6 +221,17 @@ macro_rules! impl_trait {
unsafe { core::intrinsics::simd::simd_bitreverse(self) }
}
#[inline]
fn count_ones(self) -> Self {
// Safety: `self` is an integer vector
unsafe { core::intrinsics::simd::simd_ctpop(self) }
}
#[inline]
fn count_zeros(self) -> Self {
(!self).count_ones()
}
#[inline]
fn leading_zeros(self) -> Self {
// Safety: `self` is an integer vector

View File

@ -42,6 +42,19 @@ pub trait SimdConstPtr: Copy + Sealed {
/// Equivalent to calling [`pointer::addr`] on each element.
fn addr(self) -> Self::Usize;
/// Converts an address to a pointer without giving it any provenance.
///
/// Without provenance, this pointer is not associated with any actual allocation. Such a
/// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
/// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
/// are little more than a usize address in disguise.
///
/// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
/// previously exposed provenance.
///
/// Equivalent to calling [`core::ptr::without_provenance`] on each element.
fn without_provenance(addr: Self::Usize) -> Self;
/// Creates a new pointer with the given address.
///
/// This performs the same operation as a cast, but copies the *address-space* and
@ -118,6 +131,14 @@ where
unsafe { core::mem::transmute_copy(&self) }
}
#[inline]
fn without_provenance(addr: Self::Usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
// SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
// provenance).
unsafe { core::mem::transmute_copy(&addr) }
}
#[inline]
fn with_addr(self, addr: Self::Usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.

View File

@ -39,6 +39,19 @@ pub trait SimdMutPtr: Copy + Sealed {
/// Equivalent to calling [`pointer::addr`] on each element.
fn addr(self) -> Self::Usize;
/// Converts an address to a pointer without giving it any provenance.
///
/// Without provenance, this pointer is not associated with any actual allocation. Such a
/// no-provenance pointer may be used for zero-sized memory accesses (if suitably aligned), but
/// non-zero-sized memory accesses with a no-provenance pointer are UB. No-provenance pointers
/// are little more than a usize address in disguise.
///
/// This is different from [`Self::with_exposed_provenance`], which creates a pointer that picks up a
/// previously exposed provenance.
///
/// Equivalent to calling [`core::ptr::without_provenance`] on each element.
fn without_provenance(addr: Self::Usize) -> Self;
/// Creates a new pointer with the given address.
///
/// This performs the same operation as a cast, but copies the *address-space* and
@ -115,6 +128,14 @@ where
unsafe { core::mem::transmute_copy(&self) }
}
#[inline]
fn without_provenance(addr: Self::Usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.
// SAFETY: Integer-to-pointer transmutes are valid (if you are okay with not getting any
// provenance).
unsafe { core::mem::transmute_copy(&addr) }
}
#[inline]
fn with_addr(self, addr: Self::Usize) -> Self {
// FIXME(strict_provenance_magic): I am magic and should be a compiler intrinsic.

View File

@ -155,8 +155,7 @@ pub trait Swizzle<const N: usize> {
/// Creates a new mask from the elements of `mask`.
///
/// Element `i` of the output is `concat[Self::INDEX[i]]`, where `concat` is the concatenation of
/// `first` and `second`.
/// Element `i` of the output is `mask[Self::INDEX[i]]`.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original inputs"]
fn swizzle_mask<T, const M: usize>(mask: Mask<T, M>) -> Mask<T, N>
@ -260,6 +259,50 @@ where
Rotate::<OFFSET>::swizzle(self)
}
/// Shifts the vector elements to the left by `OFFSET`, filling in with
/// `padding` from the right.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn shift_elements_left<const OFFSET: usize>(self, padding: T) -> Self {
struct Shift<const OFFSET: usize>;
impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
const INDEX: [usize; N] = const {
let mut index = [N; N];
let mut i = 0;
while i + OFFSET < N {
index[i] = i + OFFSET;
i += 1;
}
index
};
}
Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
}
/// Shifts the vector elements to the right by `OFFSET`, filling in with
/// `padding` from the left.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn shift_elements_right<const OFFSET: usize>(self, padding: T) -> Self {
struct Shift<const OFFSET: usize>;
impl<const OFFSET: usize, const N: usize> Swizzle<N> for Shift<OFFSET> {
const INDEX: [usize; N] = const {
let mut index = [N; N];
let mut i = OFFSET;
while i < N {
index[i] = i - OFFSET;
i += 1;
}
index
};
}
Shift::<OFFSET>::concat_swizzle(self, Simd::splat(padding))
}
/// Interleave two vectors.
///
/// The resulting vectors contain elements taken alternatively from `self` and `other`, first
@ -320,7 +363,9 @@ where
///
/// ```
/// # #![feature(portable_simd)]
/// # use core::simd::Simd;
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::Simd;
/// let a = Simd::from_array([0, 4, 1, 5]);
/// let b = Simd::from_array([2, 6, 3, 7]);
/// let (x, y) = a.deinterleave(b);
@ -391,4 +436,210 @@ where
}
Resize::<N>::concat_swizzle(self, Simd::splat(value))
}
/// Extract a vector from another vector.
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::u32x4;
/// let x = u32x4::from_array([0, 1, 2, 3]);
/// assert_eq!(x.extract::<1, 2>().to_array(), [1, 2]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn extract<const START: usize, const LEN: usize>(self) -> Simd<T, LEN>
where
LaneCount<LEN>: SupportedLaneCount,
{
struct Extract<const N: usize, const START: usize>;
impl<const N: usize, const START: usize, const LEN: usize> Swizzle<LEN> for Extract<N, START> {
const INDEX: [usize; LEN] = const {
assert!(START + LEN <= N, "index out of bounds");
let mut index = [0; LEN];
let mut i = 0;
while i < LEN {
index[i] = START + i;
i += 1;
}
index
};
}
Extract::<N, START>::swizzle(self)
}
}
impl<T, const N: usize> Mask<T, N>
where
T: MaskElement,
LaneCount<N>: SupportedLaneCount,
{
/// Reverse the order of the elements in the mask.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn reverse(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().reverse()) }
}
/// Rotates the mask such that the first `OFFSET` elements of the slice move to the end
/// while the last `self.len() - OFFSET` elements move to the front. After calling `rotate_elements_left`,
/// the element previously at index `OFFSET` will become the first element in the slice.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn rotate_elements_left<const OFFSET: usize>(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_left::<OFFSET>()) }
}
/// Rotates the mask such that the first `self.len() - OFFSET` elements of the mask move to
/// the end while the last `OFFSET` elements move to the front. After calling `rotate_elements_right`,
/// the element previously at index `self.len() - OFFSET` will become the first element in the slice.
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn rotate_elements_right<const OFFSET: usize>(self) -> Self {
// Safety: swizzles are safe for masks
unsafe { Self::from_int_unchecked(self.to_int().rotate_elements_right::<OFFSET>()) }
}
/// Shifts the mask elements to the left by `OFFSET`, filling in with
/// `padding` from the right.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original inputs"]
pub fn shift_elements_left<const OFFSET: usize>(self, padding: bool) -> Self {
// Safety: swizzles are safe for masks
unsafe {
Self::from_int_unchecked(self.to_int().shift_elements_left::<OFFSET>(if padding {
T::TRUE
} else {
T::FALSE
}))
}
}
/// Shifts the mask elements to the right by `OFFSET`, filling in with
/// `padding` from the left.
#[inline]
#[must_use = "method returns a new mask and does not mutate the original inputs"]
pub fn shift_elements_right<const OFFSET: usize>(self, padding: bool) -> Self {
// Safety: swizzles are safe for masks
unsafe {
Self::from_int_unchecked(self.to_int().shift_elements_right::<OFFSET>(if padding {
T::TRUE
} else {
T::FALSE
}))
}
}
/// Interleave two masks.
///
/// The resulting masks contain elements taken alternatively from `self` and `other`, first
/// filling the first result, and then the second.
///
/// The reverse of this operation is [`Mask::deinterleave`].
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x4;
/// let a = mask32x4::from_array([false, true, false, true]);
/// let b = mask32x4::from_array([false, false, true, true]);
/// let (x, y) = a.interleave(b);
/// assert_eq!(x.to_array(), [false, false, true, false]);
/// assert_eq!(y.to_array(), [false, true, true, true]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn interleave(self, other: Self) -> (Self, Self) {
let (lo, hi) = self.to_int().interleave(other.to_int());
// Safety: swizzles are safe for masks
unsafe { (Self::from_int_unchecked(lo), Self::from_int_unchecked(hi)) }
}
/// Deinterleave two masks.
///
/// The first result takes every other element of `self` and then `other`, starting with
/// the first element.
///
/// The second result takes every other element of `self` and then `other`, starting with
/// the second element.
///
/// The reverse of this operation is [`Mask::interleave`].
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x4;
/// let a = mask32x4::from_array([false, true, false, true]);
/// let b = mask32x4::from_array([false, false, true, true]);
/// let (x, y) = a.deinterleave(b);
/// assert_eq!(x.to_array(), [false, false, false, true]);
/// assert_eq!(y.to_array(), [true, true, false, true]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn deinterleave(self, other: Self) -> (Self, Self) {
let (even, odd) = self.to_int().deinterleave(other.to_int());
// Safety: swizzles are safe for masks
unsafe {
(
Self::from_int_unchecked(even),
Self::from_int_unchecked(odd),
)
}
}
/// Resize a mask.
///
/// If `M` > `N`, extends the length of a mask, setting the new elements to `value`.
/// If `M` < `N`, truncates the mask to the first `M` elements.
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x4;
/// let x = mask32x4::from_array([false, true, true, false]);
/// assert_eq!(x.resize::<8>(true).to_array(), [false, true, true, false, true, true, true, true]);
/// assert_eq!(x.resize::<2>(true).to_array(), [false, true]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn resize<const M: usize>(self, value: bool) -> Mask<T, M>
where
LaneCount<M>: SupportedLaneCount,
{
// Safety: swizzles are safe for masks
unsafe {
Mask::<T, M>::from_int_unchecked(self.to_int().resize::<M>(if value {
T::TRUE
} else {
T::FALSE
}))
}
}
/// Extract a vector from another vector.
///
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "as_crate")] use core_simd::simd;
/// # #[cfg(not(feature = "as_crate"))] use core::simd;
/// # use simd::mask32x4;
/// let x = mask32x4::from_array([false, true, true, false]);
/// assert_eq!(x.extract::<1, 2>().to_array(), [true, true]);
/// ```
#[inline]
#[must_use = "method returns a new vector and does not mutate the original inputs"]
pub fn extract<const START: usize, const LEN: usize>(self) -> Mask<T, LEN>
where
LaneCount<LEN>: SupportedLaneCount,
{
// Safety: swizzles are safe for masks
unsafe { Mask::<T, LEN>::from_int_unchecked(self.to_int().extract::<START, LEN>()) }
}
}

View File

@ -59,15 +59,40 @@ where
target_endian = "little"
))]
16 => transize(vqtbl1q_u8, self, idxs),
#[cfg(all(
target_arch = "arm",
target_feature = "v7",
target_feature = "neon",
target_endian = "little"
))]
16 => transize(armv7_neon_swizzle_u8x16, self, idxs),
#[cfg(all(target_feature = "avx2", not(target_feature = "avx512vbmi")))]
32 => transize(avx2_pshufb, self, idxs),
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
32 => transize(x86::_mm256_permutexvar_epi8, zeroing_idxs(idxs), self),
// Notable absence: avx512bw shuffle
// If avx512bw is available, odds of avx512vbmi are good
// FIXME: initial AVX512VBMI variant didn't actually pass muster
// #[cfg(target_feature = "avx512vbmi")]
// 64 => transize(x86::_mm512_permutexvar_epi8, self, idxs),
32 => {
// Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
let swizzler = |bytes, idxs| {
let mask = x86::_mm256_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
idxs,
Simd::<u8, 32>::splat(N as u8).into(),
);
x86::_mm256_maskz_permutexvar_epi8(mask, idxs, bytes)
};
transize(swizzler, self, idxs)
}
// Notable absence: avx512bw pshufb shuffle
#[cfg(all(target_feature = "avx512vl", target_feature = "avx512vbmi"))]
64 => {
// Unlike vpshufb, vpermb doesn't zero out values in the result based on the index high bit
let swizzler = |bytes, idxs| {
let mask = x86::_mm512_cmp_epu8_mask::<{ x86::_MM_CMPINT_LT }>(
idxs,
Simd::<u8, 64>::splat(N as u8).into(),
);
x86::_mm512_maskz_permutexvar_epi8(mask, idxs, bytes)
};
transize(swizzler, self, idxs)
}
_ => {
let mut array = [0; N];
for (i, k) in idxs.to_array().into_iter().enumerate() {
@ -82,6 +107,28 @@ where
}
}
/// armv7 neon supports swizzling `u8x16` by swizzling two u8x8 blocks
/// with a u8x8x2 lookup table.
///
/// # Safety
/// This requires armv7 neon to work
#[cfg(all(
target_arch = "arm",
target_feature = "v7",
target_feature = "neon",
target_endian = "little"
))]
unsafe fn armv7_neon_swizzle_u8x16(bytes: Simd<u8, 16>, idxs: Simd<u8, 16>) -> Simd<u8, 16> {
use core::arch::arm::{uint8x8x2_t, vcombine_u8, vget_high_u8, vget_low_u8, vtbl2_u8};
// SAFETY: Caller promised arm neon support
unsafe {
let bytes = uint8x8x2_t(vget_low_u8(bytes.into()), vget_high_u8(bytes.into()));
let lo = vtbl2_u8(bytes, vget_low_u8(idxs.into()));
let hi = vtbl2_u8(bytes, vget_high_u8(idxs.into()));
vcombine_u8(lo, hi).into()
}
}
/// "vpshufb like it was meant to be" on AVX2
///
/// # Safety

View File

@ -99,7 +99,7 @@ use crate::simd::{
// directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
// causes rustc to emit illegal LLVM IR in some cases.
#[repr(simd)]
#[repr(simd, packed)]
pub struct Simd<T, const N: usize>([T; N])
where
LaneCount<N>: SupportedLaneCount,
@ -144,14 +144,32 @@ where
/// assert_eq!(v.as_array(), &[8, 8, 8, 8]);
/// ```
#[inline]
pub fn splat(value: T) -> Self {
// This is preferred over `[value; N]`, since it's explicitly a splat:
// https://github.com/rust-lang/rust/issues/97804
struct Splat;
impl<const N: usize> Swizzle<N> for Splat {
const INDEX: [usize; N] = [0; N];
#[rustc_const_unstable(feature = "portable_simd", issue = "86656")]
pub const fn splat(value: T) -> Self {
const fn splat_const<T, const N: usize>(value: T) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
Simd::from_array([value; N])
}
Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
fn splat_rt<T, const N: usize>(value: T) -> Simd<T, N>
where
T: SimdElement,
LaneCount<N>: SupportedLaneCount,
{
// This is preferred over `[value; N]`, since it's explicitly a splat:
// https://github.com/rust-lang/rust/issues/97804
struct Splat;
impl<const N: usize> Swizzle<N> for Splat {
const INDEX: [usize; N] = [0; N];
}
Splat::swizzle::<T, 1>(Simd::<T, 1>::from([value]))
}
core::intrinsics::const_eval_select((value,), splat_const, splat_rt)
}
/// Returns an array reference containing the entire SIMD vector.
@ -425,6 +443,9 @@ where
///
/// When the element is disabled, that memory location is not accessed and the corresponding
/// value from `or` is passed through.
///
/// # Safety
/// Enabled loads must not exceed the length of `slice`.
#[must_use]
#[inline]
pub unsafe fn load_select_unchecked(
@ -442,6 +463,9 @@ where
///
/// When the element is disabled, that memory location is not accessed and the corresponding
/// value from `or` is passed through.
///
/// # Safety
/// Enabled `ptr` elements must be safe to read as if by `std::ptr::read`.
#[must_use]
#[inline]
pub unsafe fn load_select_ptr(
@ -924,6 +948,7 @@ where
}
}
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
impl<T, const N: usize> PartialOrd for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
@ -943,6 +968,7 @@ where
{
}
/// Lexicographic order. For the SIMD elementwise minimum and maximum, use simd_min and simd_max instead.
impl<T, const N: usize> Ord for Simd<T, N>
where
LaneCount<N>: SupportedLaneCount,
@ -1195,6 +1221,7 @@ fn lane_indices<const N: usize>() -> Simd<usize, N>
where
LaneCount<N>: SupportedLaneCount,
{
#![allow(clippy::needless_range_loop)]
let mut index = [0; N];
for i in 0..N {
index[i] = i;

View File

@ -29,3 +29,6 @@ mod arm;
#[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))]
mod powerpc;
#[cfg(target_arch = "loongarch64")]
mod loongarch64;

View File

@ -0,0 +1,31 @@
use crate::simd::*;
use core::arch::loongarch64::*;
from_transmute! { unsafe u8x16 => v16u8 }
from_transmute! { unsafe u8x32 => v32u8 }
from_transmute! { unsafe i8x16 => v16i8 }
from_transmute! { unsafe i8x32 => v32i8 }
from_transmute! { unsafe u16x8 => v8u16 }
from_transmute! { unsafe u16x16 => v16u16 }
from_transmute! { unsafe i16x8 => v8i16 }
from_transmute! { unsafe i16x16 => v16i16 }
from_transmute! { unsafe u32x4 => v4u32 }
from_transmute! { unsafe u32x8 => v8u32 }
from_transmute! { unsafe i32x4 => v4i32 }
from_transmute! { unsafe i32x8 => v8i32 }
from_transmute! { unsafe f32x4 => v4f32 }
from_transmute! { unsafe f32x8 => v8f32 }
from_transmute! { unsafe u64x2 => v2u64 }
from_transmute! { unsafe u64x4 => v4u64 }
from_transmute! { unsafe i64x2 => v2i64 }
from_transmute! { unsafe i64x4 => v4i64 }
from_transmute! { unsafe f64x2 => v2f64 }
from_transmute! { unsafe f64x4 => v4f64 }
from_transmute! { unsafe usizex2 => v2u64 }
from_transmute! { unsafe usizex4 => v4u64 }
from_transmute! { unsafe isizex2 => v2i64 }
from_transmute! { unsafe isizex4 => v4i64 }

View File

@ -0,0 +1,35 @@
#![feature(portable_simd)]
macro_rules! layout_tests {
{ $($mod:ident, $ty:ty,)* } => {
$(
mod $mod {
test_helpers::test_lanes! {
fn no_padding<const LANES: usize>() {
assert_eq!(
core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
core::mem::size_of::<[$ty; LANES]>(),
);
}
}
}
)*
}
}
layout_tests! {
i8, i8,
i16, i16,
i32, i32,
i64, i64,
isize, isize,
u8, u8,
u16, u16,
u32, u32,
u64, u64,
usize, usize,
f32, f32,
f64, f64,
mut_ptr, *mut (),
const_ptr, *const (),
}

View File

@ -99,7 +99,6 @@ macro_rules! test_mask_api {
assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
}
#[cfg(feature = "all_lane_counts")]
#[test]
fn roundtrip_bitmask_conversion_odd() {
let values = [
@ -134,48 +133,6 @@ macro_rules! test_mask_api {
cast_impl::<i64>();
cast_impl::<isize>();
}
#[test]
fn roundtrip_bitmask_vector_conversion() {
use core_simd::simd::ToBytes;
let values = [
true, false, false, true, false, false, true, false,
true, true, false, false, false, false, false, true,
];
let mask = Mask::<$type, 16>::from_array(values);
let bitmask = mask.to_bitmask_vector();
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b01001001, 0b10000011]);
assert_eq!(Mask::<$type, 16>::from_bitmask_vector(bitmask), mask);
}
// rust-lang/portable-simd#379
#[test]
fn roundtrip_bitmask_vector_conversion_small() {
use core_simd::simd::ToBytes;
let values = [
true, false, true, true
];
let mask = Mask::<$type, 4>::from_array(values);
let bitmask = mask.to_bitmask_vector();
assert_eq!(bitmask.resize::<1>(0).to_ne_bytes()[0], 0b00001101);
assert_eq!(Mask::<$type, 4>::from_bitmask_vector(bitmask), mask);
}
/* FIXME doesn't work with non-powers-of-two, yet
// rust-lang/portable-simd#379
#[cfg(feature = "all_lane_counts")]
#[test]
fn roundtrip_bitmask_vector_conversion_odd() {
use core_simd::simd::ToBytes;
let values = [
true, false, true, false, true, true, false, false, false, true, true,
];
let mask = Mask::<$type, 11>::from_array(values);
let bitmask = mask.to_bitmask_vector();
assert_eq!(bitmask.resize::<2>(0).to_ne_bytes()[..2], [0b00110101, 0b00000110]);
assert_eq!(Mask::<$type, 11>::from_bitmask_vector(bitmask), mask);
}
*/
}
}
}

View File

@ -216,6 +216,22 @@ macro_rules! impl_common_integer_tests {
)
}
fn count_ones<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&$vector::<LANES>::count_ones,
&|x| x.count_ones() as _,
&|_| true,
)
}
fn count_zeros<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&$vector::<LANES>::count_zeros,
&|x| x.count_zeros() as _,
&|_| true,
)
}
fn leading_zeros<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&$vector::<LANES>::leading_zeros,
@ -307,6 +323,14 @@ macro_rules! impl_signed_tests {
assert_eq!(a % b, Vector::<LANES>::splat(0));
}
fn abs_diff<const LANES: usize>() {
test_helpers::test_binary_elementwise(
&Vector::<LANES>::abs_diff,
&Scalar::abs_diff,
&|_, _| true,
)
}
fn simd_min<const LANES: usize>() {
use core_simd::simd::cmp::SimdOrd;
let a = Vector::<LANES>::splat(Scalar::MIN);
@ -419,6 +443,14 @@ macro_rules! impl_unsigned_tests {
&|_| true,
);
}
fn abs_diff<const LANES: usize>() {
test_helpers::test_binary_elementwise(
&Vector::<LANES>::abs_diff,
&Scalar::abs_diff,
&|_, _| true,
)
}
}
impl_binary_op_test!(Scalar, Add::add, AddAssign::add_assign, Scalar::wrapping_add);
@ -495,6 +527,9 @@ macro_rules! impl_float_tests {
}
fn is_normal<const LANES: usize>() {
// Arm v7 Neon violates float opsem re: subnormals, see
// https://github.com/rust-lang/portable-simd/issues/439
#[cfg(not(target_arch = "arm"))]
test_helpers::test_unary_mask_elementwise(
&Vector::<LANES>::is_normal,
&Scalar::is_normal,
@ -503,6 +538,9 @@ macro_rules! impl_float_tests {
}
fn is_subnormal<const LANES: usize>() {
// Arm v7 Neon violates float opsem re: subnormals, see
// https://github.com/rust-lang/portable-simd/issues/439
#[cfg(not(target_arch = "arm"))]
test_helpers::test_unary_mask_elementwise(
&Vector::<LANES>::is_subnormal,
&Scalar::is_subnormal,

View File

@ -48,6 +48,24 @@ fn rotate() {
assert_eq!(a.rotate_elements_right::<5>().to_array(), [4, 1, 2, 3]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn shift() {
let a = Simd::from_array([1, 2, 3, 4]);
assert_eq!(a.shift_elements_left::<0>(0).to_array(), [1, 2, 3, 4]);
assert_eq!(a.shift_elements_left::<1>(0).to_array(), [2, 3, 4, 0]);
assert_eq!(a.shift_elements_left::<2>(9).to_array(), [3, 4, 9, 9]);
assert_eq!(a.shift_elements_left::<3>(8).to_array(), [4, 8, 8, 8]);
assert_eq!(a.shift_elements_left::<4>(7).to_array(), [7, 7, 7, 7]);
assert_eq!(a.shift_elements_left::<5>(6).to_array(), [6, 6, 6, 6]);
assert_eq!(a.shift_elements_right::<0>(0).to_array(), [1, 2, 3, 4]);
assert_eq!(a.shift_elements_right::<1>(0).to_array(), [0, 1, 2, 3]);
assert_eq!(a.shift_elements_right::<2>(-1).to_array(), [-1, -1, 1, 2]);
assert_eq!(a.shift_elements_right::<3>(-2).to_array(), [-2, -2, -2, 1]);
assert_eq!(a.shift_elements_right::<4>(-3).to_array(), [-3, -3, -3, -3]);
assert_eq!(a.shift_elements_right::<5>(-4).to_array(), [-4, -4, -4, -4]);
}
#[test]
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
fn interleave() {

View File

@ -6,6 +6,3 @@ publish = false
[dependencies]
proptest = { version = "0.10", default-features = false, features = ["alloc"] }
[features]
all_lane_counts = []

View File

@ -539,32 +539,22 @@ macro_rules! test_lanes {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_1 1;
lanes_2 2;
lanes_4 4;
);
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_8 8;
lanes_16 16;
lanes_32 32;
lanes_64 64;
);
#[cfg(feature = "all_lane_counts")]
$crate::test_lanes_helper!(
// test some odd and even non-power-of-2 lengths on miri
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
// Cover an odd and an even non-power-of-2 length in Miri.
// (Even non-power-of-2 vectors have alignment between element
// and vector size, so we want to cover that case as well.)
lanes_3 3;
lanes_5 5;
lanes_6 6;
);
#[cfg(feature = "all_lane_counts")]
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_4 4;
lanes_5 5;
lanes_7 7;
lanes_8 8;
lanes_9 9;
lanes_10 10;
lanes_11 11;
@ -572,52 +562,55 @@ macro_rules! test_lanes {
lanes_13 13;
lanes_14 14;
lanes_15 15;
lanes_16 16;
lanes_17 17;
lanes_18 18;
lanes_19 19;
lanes_20 20;
lanes_21 21;
lanes_22 22;
lanes_23 23;
//lanes_18 18;
//lanes_19 19;
//lanes_20 20;
//lanes_21 21;
//lanes_22 22;
//lanes_23 23;
lanes_24 24;
lanes_25 25;
lanes_26 26;
lanes_27 27;
lanes_28 28;
lanes_29 29;
lanes_30 30;
lanes_31 31;
lanes_33 33;
lanes_34 34;
lanes_35 35;
lanes_36 36;
lanes_37 37;
lanes_38 38;
lanes_39 39;
lanes_40 40;
lanes_41 41;
lanes_42 42;
lanes_43 43;
lanes_44 44;
lanes_45 45;
lanes_46 46;
//lanes_25 25;
//lanes_26 26;
//lanes_27 27;
//lanes_28 28;
//lanes_29 29;
//lanes_30 30;
//lanes_31 31;
lanes_32 32;
//lanes_33 33;
//lanes_34 34;
//lanes_35 35;
//lanes_36 36;
//lanes_37 37;
//lanes_38 38;
//lanes_39 39;
//lanes_40 40;
//lanes_41 41;
//lanes_42 42;
//lanes_43 43;
//lanes_44 44;
//lanes_45 45;
//lanes_46 46;
lanes_47 47;
lanes_48 48;
lanes_49 49;
lanes_50 50;
lanes_51 51;
lanes_52 52;
lanes_53 53;
lanes_54 54;
lanes_55 55;
//lanes_48 48;
//lanes_49 49;
//lanes_50 50;
//lanes_51 51;
//lanes_52 52;
//lanes_53 53;
//lanes_54 54;
//lanes_55 55;
lanes_56 56;
lanes_57 57;
lanes_58 58;
lanes_59 59;
lanes_60 60;
lanes_61 61;
lanes_62 62;
//lanes_58 58;
//lanes_59 59;
//lanes_60 60;
//lanes_61 61;
//lanes_62 62;
lanes_63 63;
lanes_64 64;
);
}
)*
@ -639,36 +632,24 @@ macro_rules! test_lanes_panic {
core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
$body
// test some odd and even non-power-of-2 lengths on miri
$crate::test_lanes_helper!(
#[should_panic];
lanes_1 1;
lanes_2 2;
lanes_4 4;
);
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[should_panic];
lanes_8 8;
lanes_16 16;
lanes_32 32;
lanes_64 64;
);
#[cfg(feature = "all_lane_counts")]
$crate::test_lanes_helper!(
// test some odd and even non-power-of-2 lengths on miri
#[should_panic];
lanes_3 3;
lanes_5 5;
lanes_6 6;
);
#[cfg(feature = "all_lane_counts")]
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[should_panic];
lanes_4 4;
lanes_5 5;
lanes_7 7;
lanes_8 8;
lanes_9 9;
lanes_10 10;
lanes_11 11;
@ -676,52 +657,55 @@ macro_rules! test_lanes_panic {
lanes_13 13;
lanes_14 14;
lanes_15 15;
lanes_16 16;
lanes_17 17;
lanes_18 18;
lanes_19 19;
lanes_20 20;
lanes_21 21;
lanes_22 22;
lanes_23 23;
//lanes_18 18;
//lanes_19 19;
//lanes_20 20;
//lanes_21 21;
//lanes_22 22;
//lanes_23 23;
lanes_24 24;
lanes_25 25;
lanes_26 26;
lanes_27 27;
lanes_28 28;
lanes_29 29;
lanes_30 30;
lanes_31 31;
lanes_33 33;
lanes_34 34;
lanes_35 35;
lanes_36 36;
lanes_37 37;
lanes_38 38;
lanes_39 39;
lanes_40 40;
lanes_41 41;
lanes_42 42;
lanes_43 43;
lanes_44 44;
lanes_45 45;
lanes_46 46;
//lanes_25 25;
//lanes_26 26;
//lanes_27 27;
//lanes_28 28;
//lanes_29 29;
//lanes_30 30;
//lanes_31 31;
lanes_32 32;
//lanes_33 33;
//lanes_34 34;
//lanes_35 35;
//lanes_36 36;
//lanes_37 37;
//lanes_38 38;
//lanes_39 39;
//lanes_40 40;
//lanes_41 41;
//lanes_42 42;
//lanes_43 43;
//lanes_44 44;
//lanes_45 45;
//lanes_46 46;
lanes_47 47;
lanes_48 48;
lanes_49 49;
lanes_50 50;
lanes_51 51;
lanes_52 52;
lanes_53 53;
lanes_54 54;
lanes_55 55;
//lanes_48 48;
//lanes_49 49;
//lanes_50 50;
//lanes_51 51;
//lanes_52 52;
//lanes_53 53;
//lanes_54 54;
//lanes_55 55;
lanes_56 56;
lanes_57 57;
lanes_58 58;
lanes_59 59;
lanes_60 60;
lanes_61 61;
lanes_62 62;
//lanes_58 58;
//lanes_59 59;
//lanes_60 60;
//lanes_61 61;
//lanes_62 62;
lanes_63 63;
lanes_64 64;
);
}
)*

View File

@ -0,0 +1,3 @@
[toolchain]
channel = "nightly-2025-01-16"
components = ["rustfmt", "clippy", "miri", "rust-src"]

View File

@ -0,0 +1,52 @@
#!/bin/bash
set -eou pipefail
git fetch origin
pushd $2
git fetch origin
popd
if [ "$(git rev-parse --show-prefix)" != "" ]; then
echo "Run this script from the git root" >&2
exit 1
fi
if [ "$(git rev-parse HEAD)" != "$(git rev-parse origin/master)" ]; then
echo "$(pwd) is not at origin/master" >&2
exit 1
fi
if [ ! -f library/portable-simd/git-subtree.sh ]; then
curl -sS https://raw.githubusercontent.com/bjorn3/git/tqc-subtree-portable/contrib/subtree/git-subtree.sh -o library/portable-simd/git-subtree.sh
chmod +x library/portable-simd/git-subtree.sh
fi
today=$(date +%Y-%m-%d)
case $1 in
"push")
upstream=rust-upstream-$today
merge=sync-from-rust-$today
pushd $2
git checkout master
git pull
popd
library/portable-simd/git-subtree.sh push -P library/portable-simd $2 $upstream
pushd $2
git checkout -B $merge origin/master
git merge $upstream
popd
echo "Branch \`$merge\` created in \`$2\`. You may need to resolve merge conflicts."
;;
"pull")
branch=sync-from-portable-simd-$today
git checkout -B $branch
echo "Creating branch \`$branch\`... You may need to resolve merge conflicts."
library/portable-simd/git-subtree.sh pull -P library/portable-simd $2 origin/master
;;
esac