This commit is contained in:
Jubilee Young 2022-02-28 10:17:40 -08:00
commit 4de99e187c
21 changed files with 440 additions and 125 deletions

View File

@ -0,0 +1,77 @@
#![feature(portable_simd)]
use core_simd::simd::*;
fn a(i: usize, j: usize) -> f64 {
((i + j) * (i + j + 1) / 2 + i + 1) as f64
}
fn mult_av(v: &[f64], out: &mut [f64]) {
assert!(v.len() == out.len());
assert!(v.len() % 2 == 0);
for (i, out) in out.iter_mut().enumerate() {
let mut sum = f64x2::splat(0.0);
let mut j = 0;
while j < v.len() {
let b = f64x2::from_slice(&v[j..]);
let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
sum += b / a;
j += 2
}
*out = sum.horizontal_sum();
}
}
fn mult_atv(v: &[f64], out: &mut [f64]) {
assert!(v.len() == out.len());
assert!(v.len() % 2 == 0);
for (i, out) in out.iter_mut().enumerate() {
let mut sum = f64x2::splat(0.0);
let mut j = 0;
while j < v.len() {
let b = f64x2::from_slice(&v[j..]);
let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
sum += b / a;
j += 2
}
*out = sum.horizontal_sum();
}
}
fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
mult_av(v, tmp);
mult_atv(tmp, out);
}
pub fn spectral_norm(n: usize) -> f64 {
assert!(n % 2 == 0, "only even lengths are accepted");
let mut u = vec![1.0; n];
let mut v = u.clone();
let mut tmp = u.clone();
for _ in 0..10 {
mult_atav(&u, &mut v, &mut tmp);
mult_atav(&v, &mut u, &mut tmp);
}
(dot(&u, &v) / dot(&v, &v)).sqrt()
}
fn dot(x: &[f64], y: &[f64]) -> f64 {
// This is auto-vectorized:
x.iter().zip(y).map(|(&x, &y)| x * y).sum()
}
#[cfg(test)]
#[test]
fn test() {
assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
}
fn main() {
// Empty main to make cargo happy
}

View File

@ -10,6 +10,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
}
@ -17,6 +19,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
}
}
@ -30,6 +34,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
}
@ -37,6 +43,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
}
@ -44,6 +52,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
}
@ -51,6 +61,8 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
// Safety: `self` is a vector, and the result of the comparison
// is always a valid mask.
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
}
}

View File

@ -2,31 +2,55 @@
//! crate.
//!
//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
//!
//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
//! this means that division by poison or undef is like division by zero, which means it inflicts...
//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
//!
//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
//!
//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
/// is documented alongside each intrinsic.
/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
extern "platform-intrinsic" {
/// add/fadd
pub(crate) fn simd_add<T>(x: T, y: T) -> T;
/// sub/fsub
pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
/// mul/fmul
pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
/// udiv/sdiv/fdiv
pub(crate) fn simd_div<T>(x: T, y: T) -> T;
/// ints and uints: {s,u}div incur UB if division by zero occurs.
/// ints: sdiv is UB for int::MIN / -1.
/// floats: fdiv is never UB, but may create NaNs or infinities.
pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
/// urem/srem/frem
pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
/// ints and uints: {s,u}rem incur UB if division by zero occurs.
/// ints: srem is UB for int::MIN / -1.
/// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
/// shl
pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
/// for (u)ints. poison if rhs >= lhs::BITS
pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
/// lshr/ashr
pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
/// ints: ashr
/// uints: lshr
/// poison if rhs >= lhs::BITS
pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
/// and
pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@ -38,6 +62,9 @@ extern "platform-intrinsic" {
pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
/// fptoui/fptosi/uitofp/sitofp
/// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
/// but the truncated value must fit in the target type or the result is poison.
/// use `simd_as` instead for a cast that performs a saturating conversion.
pub(crate) fn simd_cast<T, U>(x: T) -> U;
/// follows Rust's `T as U` semantics, including saturating float casts
/// which amounts to the same as `simd_cast` for many cases
@ -45,6 +72,9 @@ extern "platform-intrinsic" {
pub(crate) fn simd_as<T, U>(x: T) -> U;
/// neg/fneg
/// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
/// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
/// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
pub(crate) fn simd_neg<T>(x: T) -> T;
/// fabs
@ -54,6 +84,7 @@ extern "platform-intrinsic" {
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
// these return Simd<int, N> with the same BITS size as the inputs
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@ -62,19 +93,31 @@ extern "platform-intrinsic" {
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
// shufflevector
// idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
/// llvm.masked.gather
/// like a loop of pointer reads
/// val: vector of values to select if a lane is masked
/// ptr: vector of pointers to read from
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
/// note, the LLVM intrinsic accepts a mask vector of <N x i1>
/// FIXME: review this if/when we fix up our mask story in general?
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
/// llvm.masked.scatter
/// like gather, but more spicy, as it writes instead of reads
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
// {s,u}add.sat
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
// {s,u}sub.sat
pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
// reductions
// llvm.vector.reduce.{add,fadd}
pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
// llvm.vector.reduce.{mul,fmul}
pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
#[allow(unused)]
pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@ -91,7 +134,10 @@ extern "platform-intrinsic" {
pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
// select
pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
// first argument is a vector of integers, -1 (all bits 1) is "true"
// logically equivalent to (yes & m) | (no & (m^-1),
// but you can use it on floats.
pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
#[allow(unused)]
pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
}

View File

@ -1,7 +1,9 @@
#![cfg_attr(not(feature = "std"), no_std)]
#![feature(
const_fn_trait_bound,
convert_float_to_int,
decl_macro,
intra_doc_pointers,
platform_intrinsics,
repr_simd,
simd_ffi,

View File

@ -12,8 +12,10 @@
)]
mod mask_impl;
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
mod to_bitmask;
pub use to_bitmask::ToBitMask;
use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::cmp::Ordering;
use core::{fmt, mem};
@ -42,6 +44,9 @@ mod sealed {
use sealed::Sealed;
/// Marker trait for types that may be used as SIMD mask elements.
///
/// # Safety
/// Type must be a signed integer.
pub unsafe trait MaskElement: SimdElement + Sealed {}
macro_rules! impl_element {
@ -149,6 +154,7 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
// Safety: the caller must confirm this invariant
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
}
@ -161,6 +167,7 @@ where
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_int(value: Simd<T, LANES>) -> Self {
assert!(T::valid(value), "all values must be either 0 or -1",);
// Safety: the validity has been checked
unsafe { Self::from_int_unchecked(value) }
}
@ -179,6 +186,7 @@ where
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
// Safety: the caller must confirm this invariant
unsafe { self.0.test_unchecked(lane) }
}
@ -190,6 +198,7 @@ where
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn test(&self, lane: usize) -> bool {
assert!(lane < LANES, "lane index out of range");
// Safety: the lane index has been checked
unsafe { self.test_unchecked(lane) }
}
@ -199,6 +208,7 @@ where
/// `lane` must be less than `LANES`.
#[inline]
pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
// Safety: the caller must confirm this invariant
unsafe {
self.0.set_unchecked(lane, value);
}
@ -211,27 +221,12 @@ where
#[inline]
pub fn set(&mut self, lane: usize, value: bool) {
assert!(lane < LANES, "lane index out of range");
// Safety: the lane index has been checked
unsafe {
self.set_unchecked(lane, value);
}
}
/// Convert this mask to a bitmask, with one bit set per lane.
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new array and does not mutate the original value"]
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
self.0.to_bitmask()
}
/// Convert a bitmask to a mask.
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
Self(mask_impl::Mask::from_bitmask(bitmask))
}
/// Returns true if any lane is set, or false otherwise.
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]

View File

@ -1,7 +1,7 @@
#![allow(unused_imports)]
use super::MaskElement;
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
use core::marker::PhantomData;
/// A mask where each lane is represented by a single bit.
@ -115,20 +115,22 @@ where
unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
}
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new array and does not mutate the original value"]
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
// Safety: these are the same type and we are laundering the generic
pub fn to_bitmask_integer<U>(self) -> U
where
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
{
// Safety: these are the same types
unsafe { core::mem::transmute_copy(&self.0) }
}
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
// Safety: these are the same type and we are laundering the generic
Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
pub fn from_bitmask_integer<U>(bitmask: U) -> Self
where
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
{
// Safety: these are the same types
unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
}
#[inline]
@ -137,6 +139,7 @@ where
where
U: MaskElement,
{
// Safety: bitmask layout does not depend on the element width
unsafe { core::mem::transmute_copy(&self) }
}

View File

@ -2,7 +2,7 @@
use super::MaskElement;
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
#[repr(transparent)]
pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
@ -66,6 +66,23 @@ where
}
}
// Used for bitmask bit order workaround
pub(crate) trait ReverseBits {
fn reverse_bits(self) -> Self;
}
macro_rules! impl_reverse_bits {
{ $($int:ty),* } => {
$(
impl ReverseBits for $int {
fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
}
)*
}
}
impl_reverse_bits! { u8, u16, u32, u64 }
impl<T, const LANES: usize> Mask<T, LANES>
where
T: MaskElement,
@ -106,44 +123,40 @@ where
where
U: MaskElement,
{
// Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
unsafe { Mask(intrinsics::simd_cast(self.0)) }
}
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new array and does not mutate the original value"]
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
unsafe {
let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
intrinsics::simd_bitmask(self.0);
// There is a bug where LLVM appears to implement this operation with the wrong
// bit order.
// TODO fix this in a better way
if cfg!(target_endian = "big") {
for x in bitmask.as_mut() {
*x = x.reverse_bits();
}
}
pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
where
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
{
// Safety: U is required to be the appropriate bitmask type
let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
// LLVM assumes bit order should match endianness
if cfg!(target_endian = "big") {
bitmask.reverse_bits()
} else {
bitmask
}
}
#[cfg(feature = "generic_const_exprs")]
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
unsafe {
// There is a bug where LLVM appears to implement this operation with the wrong
// bit order.
// TODO fix this in a better way
if cfg!(target_endian = "big") {
for x in bitmask.as_mut() {
*x = x.reverse_bits();
}
}
pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
where
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
{
// LLVM assumes bit order should match endianness
let bitmask = if cfg!(target_endian = "big") {
bitmask.reverse_bits()
} else {
bitmask
};
// Safety: U is required to be the appropriate bitmask type
unsafe {
Self::from_int_unchecked(intrinsics::simd_select_bitmask(
bitmask,
Self::splat(true).to_int(),
@ -155,12 +168,14 @@ where
#[inline]
#[must_use = "method returns a new bool and does not mutate the original value"]
pub fn any(self) -> bool {
// Safety: use `self` as an integer vector
unsafe { intrinsics::simd_reduce_any(self.to_int()) }
}
#[inline]
#[must_use = "method returns a new vector and does not mutate the original value"]
pub fn all(self) -> bool {
// Safety: use `self` as an integer vector
unsafe { intrinsics::simd_reduce_all(self.to_int()) }
}
}
@ -184,6 +199,7 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
fn bitand(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
}
}
@ -197,6 +213,7 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
fn bitor(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
}
}
@ -210,6 +227,7 @@ where
#[inline]
#[must_use = "method returns a new mask and does not mutate the original value"]
fn bitxor(self, rhs: Self) -> Self {
// Safety: `self` is an integer vector
unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
}
}

View File

@ -0,0 +1,57 @@
use super::{mask_impl, Mask, MaskElement};
use crate::simd::{LaneCount, SupportedLaneCount};
mod sealed {
pub trait Sealed {}
}
pub use sealed::Sealed;
impl<T, const LANES: usize> Sealed for Mask<T, LANES>
where
T: MaskElement,
LaneCount<LANES>: SupportedLaneCount,
{
}
/// Converts masks to and from integer bitmasks.
///
/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
///
/// # Safety
/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
/// the mask.
pub unsafe trait ToBitMask: Sealed {
/// The integer bitmask type.
type BitMask;
/// Converts a mask to a bitmask.
fn to_bitmask(self) -> Self::BitMask;
/// Converts a bitmask to a mask.
fn from_bitmask(bitmask: Self::BitMask) -> Self;
}
macro_rules! impl_integer_intrinsic {
{ $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
$(
unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
type BitMask = $int;
fn to_bitmask(self) -> $int {
self.0.to_bitmask_integer()
}
fn from_bitmask(bitmask: $int) -> Self {
Self(mask_impl::Mask::from_bitmask_integer(bitmask))
}
}
)*
}
}
impl_integer_intrinsic! {
unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
}

View File

@ -22,6 +22,7 @@ macro_rules! impl_uint_arith {
/// ```
#[inline]
pub fn saturating_add(self, second: Self) -> Self {
// Safety: `self` is a vector
unsafe { simd_saturating_add(self, second) }
}
@ -41,6 +42,7 @@ macro_rules! impl_uint_arith {
/// assert_eq!(sat, Simd::splat(0));
#[inline]
pub fn saturating_sub(self, second: Self) -> Self {
// Safety: `self` is a vector
unsafe { simd_saturating_sub(self, second) }
}
})+
@ -68,6 +70,7 @@ macro_rules! impl_int_arith {
/// ```
#[inline]
pub fn saturating_add(self, second: Self) -> Self {
// Safety: `self` is a vector
unsafe { simd_saturating_add(self, second) }
}
@ -87,6 +90,7 @@ macro_rules! impl_int_arith {
/// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
#[inline]
pub fn saturating_sub(self, second: Self) -> Self {
// Safety: `self` is a vector
unsafe { simd_saturating_sub(self, second) }
}

View File

@ -57,29 +57,40 @@ macro_rules! wrap_bitshift {
};
}
// Division by zero is poison, according to LLVM.
// So is dividing the MIN value of a signed integer by -1,
// since that would return MAX + 1.
// FIXME: Rust allows <SInt>::MIN / -1,
// so we should probably figure out how to make that safe.
/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
/// thus guaranteeing a Rust value returns instead.
///
/// | | LLVM | Rust
/// | :--------------: | :--- | :----------
/// | N {/,%} 0 | UB | panic!()
/// | <$int>::MIN / -1 | UB | <$int>::MIN
/// | <$int>::MIN % -1 | UB | 0
///
macro_rules! int_divrem_guard {
( $lhs:ident,
$rhs:ident,
{ const PANIC_ZERO: &'static str = $zero:literal;
const PANIC_OVERFLOW: &'static str = $overflow:literal;
$simd_call:ident
},
$int:ident ) => {
if $rhs.lanes_eq(Simd::splat(0)).any() {
panic!($zero);
} else if <$int>::MIN != 0
&& ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
// type inference can break here, so cut an SInt to size
& $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
{
panic!($overflow);
} else {
unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
// Prevent otherwise-UB overflow on the MIN / -1 case.
let rhs = if <$int>::MIN != 0 {
// This should, at worst, optimize to a few branchless logical ops
// Ideally, this entire conditional should evaporate
// Fire LLVM and implement those manually if it doesn't get the hint
($lhs.lanes_eq(Simd::splat(<$int>::MIN))
// type inference can break here, so cut an SInt to size
& $rhs.lanes_eq(Simd::splat(-1i64 as _)))
.select(Simd::splat(1), $rhs)
} else {
// Nice base case to make it easy to const-fold away the other branch.
$rhs
};
unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
}
};
}
@ -183,7 +194,6 @@ for_base_ops! {
impl Div::div {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to divide by zero";
const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
simd_div
}
}
@ -191,7 +201,6 @@ for_base_ops! {
impl Rem::rem {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
simd_rem
}
}

View File

@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions {
/// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
#[inline]
pub fn horizontal_sum(self) -> $scalar {
// Safety: `self` is an integer vector
unsafe { simd_reduce_add_ordered(self, 0) }
}
/// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
#[inline]
pub fn horizontal_product(self) -> $scalar {
// Safety: `self` is an integer vector
unsafe { simd_reduce_mul_ordered(self, 1) }
}
/// Horizontal maximum. Returns the maximum lane in the vector.
#[inline]
pub fn horizontal_max(self) -> $scalar {
// Safety: `self` is an integer vector
unsafe { simd_reduce_max(self) }
}
/// Horizontal minimum. Returns the minimum lane in the vector.
#[inline]
pub fn horizontal_min(self) -> $scalar {
// Safety: `self` is an integer vector
unsafe { simd_reduce_min(self) }
}
}
@ -63,6 +67,7 @@ macro_rules! impl_float_reductions {
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
self.as_array().iter().sum()
} else {
// Safety: `self` is a float vector
unsafe { simd_reduce_add_ordered(self, 0.) }
}
}
@ -74,6 +79,7 @@ macro_rules! impl_float_reductions {
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
self.as_array().iter().product()
} else {
// Safety: `self` is a float vector
unsafe { simd_reduce_mul_ordered(self, 1.) }
}
}
@ -84,6 +90,7 @@ macro_rules! impl_float_reductions {
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
#[inline]
pub fn horizontal_max(self) -> $scalar {
// Safety: `self` is a float vector
unsafe { simd_reduce_max(self) }
}
@ -93,6 +100,7 @@ macro_rules! impl_float_reductions {
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
#[inline]
pub fn horizontal_min(self) -> $scalar {
// Safety: `self` is a float vector
unsafe { simd_reduce_min(self) }
}
}

View File

@ -1,9 +1,10 @@
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
use core::convert::FloatToInt;
macro_rules! implement {
{
$type:ty, $int_type:ty
$type:ty
} => {
impl<const LANES: usize> Simd<$type, LANES>
where
@ -18,20 +19,22 @@ macro_rules! implement {
/// * Not be NaN
/// * Not be infinite
/// * Be representable in the return type, after truncating off its fractional part
///
/// If these requirements are infeasible or costly, consider using the safe function [cast],
/// which saturates on conversion.
///
/// [cast]: Simd::cast
#[inline]
pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
where
$type: FloatToInt<I>,
I: SimdElement,
{
unsafe { intrinsics::simd_cast(self) }
}
/// Creates a floating-point vector from an integer vector. Rounds values that are
/// not exactly representable.
#[inline]
pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
unsafe { intrinsics::simd_cast(value) }
}
}
}
}
implement! { f32, i32 }
implement! { f64, i64 }
implement! { f32 }
implement! { f64 }

View File

@ -11,6 +11,7 @@ where
/// For each lane in the mask, choose the corresponding lane from `true_values` if
/// that lane mask is true, and `false_values` if that lane mask is false.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@ -31,6 +32,8 @@ where
where
U: SimdElement<Mask = T>,
{
// Safety: The mask has been cast to a vector of integers,
// and the operands to select between are vectors of the same type and length.
unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
}
@ -39,6 +42,7 @@ where
/// For each lane in the mask, choose the corresponding lane from `true_values` if
/// that lane mask is true, and `false_values` if that lane mask is false.
///
/// # Examples
/// ```
/// # #![feature(portable_simd)]
/// # #[cfg(feature = "std")] use core_simd::Mask;

View File

@ -95,6 +95,7 @@ pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
LaneCount<INPUT_LANES>: SupportedLaneCount,
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
{
// Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
}
}
@ -119,6 +120,7 @@ pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
LaneCount<INPUT_LANES>: SupportedLaneCount,
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
{
// Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
}
}

View File

@ -8,12 +8,14 @@ macro_rules! impl_to_bytes {
/// Return the memory representation of this integer as a byte array in native byte
/// order.
pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
// Safety: transmuting between vectors is safe
unsafe { core::mem::transmute_copy(&self) }
}
/// Create a native endian integer value from its memory representation as a byte array
/// in native endianness.
pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
// Safety: transmuting between vectors is safe
unsafe { core::mem::transmute_copy(&bytes) }
}
}

View File

@ -12,7 +12,79 @@ pub(crate) mod ptr;
use crate::simd::intrinsics;
use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
/// A SIMD vector of `LANES` elements of type `T`.
/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
///
/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
/// the two arrays together and mapping the operator over each lane.
///
/// ```rust
/// # #![feature(array_zip, portable_simd)]
/// # use core::simd::{Simd};
/// let a0: [i32; 4] = [-2, 0, 2, 4];
/// let a1 = [10, 9, 8, 7];
/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
///
/// // `Simd<T, N>` implements `From<[T; N]>
/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
/// // Which means arrays implement `Into<Simd<T, N>>`.
/// assert_eq!(v0 + v1, zm_add.into());
/// assert_eq!(v0 * v1, zm_mul.into());
/// ```
///
/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
/// This means there is no warning on overflows, even in "debug" builds.
/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
/// and even "debug builds" are unlikely to tolerate the loss of performance.
/// You may want to consider using explicitly checked arithmetic if such is required.
/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
///
/// [`Wrapping<T>`]: core::num::Wrapping
///
/// # Layout
/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
///
/// # ABI "Features"
/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
/// The need for this may be corrected in the future.
///
/// # Safe SIMD with Unsafe Rust
///
/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
/// [`read_unaligned`] and [`write_unaligned`]. This is because:
/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
///
/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
/// and may be just as fast as stricter alternatives.
/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
///
/// [`transmute`]: core::mem::transmute
/// [raw pointers]: pointer
/// [`read_unaligned`]: pointer::read_unaligned
/// [`write_unaligned`]: pointer::write_unaligned
/// [`read`]: pointer::read
/// [`write`]: pointer::write
/// [as_simd]: slice::as_simd
#[repr(simd)]
pub struct Simd<T, const LANES: usize>([T; LANES])
where
@ -102,6 +174,7 @@ where
#[inline]
#[cfg(not(bootstrap))]
pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
// Safety: The input argument is a vector of a known SIMD type.
unsafe { intrinsics::simd_as(self) }
}
@ -175,7 +248,7 @@ where
or: Self,
) -> Self {
let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
// SAFETY: We have masked-off out-of-bounds lanes.
// Safety: We have masked-off out-of-bounds lanes.
unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
}
@ -216,7 +289,7 @@ where
let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
// Ferris forgive me, I have done pointer arithmetic here.
let ptrs = base_ptr.wrapping_add(idxs);
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
// Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
}
@ -268,7 +341,7 @@ where
idxs: Simd<usize, LANES>,
) {
let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
// SAFETY: We have masked-off out-of-bounds lanes.
// Safety: We have masked-off out-of-bounds lanes.
unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
}
@ -307,7 +380,7 @@ where
enable: Mask<isize, LANES>,
idxs: Simd<usize, LANES>,
) {
// SAFETY: This block works with *mut T derived from &mut 'a [T],
// Safety: This block works with *mut T derived from &mut 'a [T],
// which means it is delicate in Rust's borrowing model, circa 2021:
// &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
// Even though this block is largely safe methods, it must be exactly this way
@ -487,7 +560,9 @@ mod sealed {
use sealed::Sealed;
/// Marker trait for types that may be used as SIMD vector elements.
/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
///
/// # Safety
/// This trait, when implemented, asserts the compiler can monomorphize
/// `#[repr(simd)]` structs with the marked type as an element.
/// Strictly, it is valid to impl if the vector will not be miscompiled.
/// Practically, it is user-unfriendly to impl it if the vector won't compile,

View File

@ -21,6 +21,8 @@ where
#[inline]
#[must_use]
pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
// Safety: converting pointers to usize and vice-versa is safe
// (even if using that pointer is not)
unsafe {
let x: Simd<usize, LANES> = mem::transmute_copy(&self);
mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
@ -47,6 +49,8 @@ where
#[inline]
#[must_use]
pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
// Safety: converting pointers to usize and vice-versa is safe
// (even if using that pointer is not)
unsafe {
let x: Simd<usize, LANES> = mem::transmute_copy(&self);
mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })

View File

@ -9,6 +9,8 @@ macro_rules! from_transmute {
impl core::convert::From<$from> for $to {
#[inline]
fn from(value: $from) -> $to {
// Safety: transmuting between vectors is safe, but the caller of this macro
// checks the invariants
unsafe { core::mem::transmute(value) }
}
}

View File

@ -68,16 +68,16 @@ macro_rules! test_mask_api {
assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
}
#[cfg(feature = "generic_const_exprs")]
#[test]
fn roundtrip_bitmask_conversion() {
use core_simd::ToBitMask;
let values = [
true, false, false, true, false, false, true, false,
true, true, false, false, false, false, false, true,
];
let mask = core_simd::Mask::<$type, 16>::from_array(values);
let bitmask = mask.to_bitmask();
assert_eq!(bitmask, [0b01001001, 0b10000011]);
assert_eq!(bitmask, 0b1000001101001001);
assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
}
}

View File

@ -210,15 +210,21 @@ macro_rules! impl_signed_tests {
)
}
fn div_min_may_overflow<const LANES: usize>() {
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(-1);
assert_eq!(a / b, a);
}
fn rem_min_may_overflow<const LANES: usize>() {
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(-1);
assert_eq!(a % b, Vector::<LANES>::splat(0));
}
}
test_helpers::test_lanes_panic! {
fn div_min_overflow_panics<const LANES: usize>() {
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(-1);
let _ = a / b;
}
fn div_by_all_zeros_panics<const LANES: usize>() {
let a = Vector::<LANES>::splat(42);
let b = Vector::<LANES>::splat(0);
@ -232,12 +238,6 @@ macro_rules! impl_signed_tests {
let _ = a / b;
}
fn rem_min_overflow_panic<const LANES: usize>() {
let a = Vector::<LANES>::splat(Scalar::MIN);
let b = Vector::<LANES>::splat(-1);
let _ = a % b;
}
fn rem_zero_panic<const LANES: usize>() {
let a = Vector::<LANES>::splat(42);
let b = Vector::<LANES>::splat(0);

View File

@ -53,14 +53,6 @@ macro_rules! float_rounding_test {
}
test_helpers::test_lanes! {
fn from_int<const LANES: usize>() {
test_helpers::test_unary_elementwise(
&Vector::<LANES>::round_from_int,
&|x| x as Scalar,
&|_| true,
)
}
fn to_int_unchecked<const LANES: usize>() {
// The maximum integer that can be represented by the equivalently sized float has
// all of the mantissa digits set to 1, pushed up to the MSB.
@ -72,11 +64,11 @@ macro_rules! float_rounding_test {
runner.run(
&test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
|x| {
let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
let result_2 = {
let mut result = [0; LANES];
let mut result: [IntScalar; LANES] = [0; LANES];
for (i, o) in x.iter().zip(result.iter_mut()) {
*o = unsafe { i.to_int_unchecked() };
*o = unsafe { i.to_int_unchecked::<IntScalar>() };
}
result
};