mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-22 06:44:35 +00:00
This commit is contained in:
commit
4de99e187c
@ -0,0 +1,77 @@
|
||||
#![feature(portable_simd)]
|
||||
|
||||
use core_simd::simd::*;
|
||||
|
||||
fn a(i: usize, j: usize) -> f64 {
|
||||
((i + j) * (i + j + 1) / 2 + i + 1) as f64
|
||||
}
|
||||
|
||||
fn mult_av(v: &[f64], out: &mut [f64]) {
|
||||
assert!(v.len() == out.len());
|
||||
assert!(v.len() % 2 == 0);
|
||||
|
||||
for (i, out) in out.iter_mut().enumerate() {
|
||||
let mut sum = f64x2::splat(0.0);
|
||||
|
||||
let mut j = 0;
|
||||
while j < v.len() {
|
||||
let b = f64x2::from_slice(&v[j..]);
|
||||
let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
|
||||
sum += b / a;
|
||||
j += 2
|
||||
}
|
||||
*out = sum.horizontal_sum();
|
||||
}
|
||||
}
|
||||
|
||||
fn mult_atv(v: &[f64], out: &mut [f64]) {
|
||||
assert!(v.len() == out.len());
|
||||
assert!(v.len() % 2 == 0);
|
||||
|
||||
for (i, out) in out.iter_mut().enumerate() {
|
||||
let mut sum = f64x2::splat(0.0);
|
||||
|
||||
let mut j = 0;
|
||||
while j < v.len() {
|
||||
let b = f64x2::from_slice(&v[j..]);
|
||||
let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
|
||||
sum += b / a;
|
||||
j += 2
|
||||
}
|
||||
*out = sum.horizontal_sum();
|
||||
}
|
||||
}
|
||||
|
||||
fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
|
||||
mult_av(v, tmp);
|
||||
mult_atv(tmp, out);
|
||||
}
|
||||
|
||||
pub fn spectral_norm(n: usize) -> f64 {
|
||||
assert!(n % 2 == 0, "only even lengths are accepted");
|
||||
|
||||
let mut u = vec![1.0; n];
|
||||
let mut v = u.clone();
|
||||
let mut tmp = u.clone();
|
||||
|
||||
for _ in 0..10 {
|
||||
mult_atav(&u, &mut v, &mut tmp);
|
||||
mult_atav(&v, &mut u, &mut tmp);
|
||||
}
|
||||
(dot(&u, &v) / dot(&v, &v)).sqrt()
|
||||
}
|
||||
|
||||
fn dot(x: &[f64], y: &[f64]) -> f64 {
|
||||
// This is auto-vectorized:
|
||||
x.iter().zip(y).map(|(&x, &y)| x * y).sum()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
#[test]
|
||||
fn test() {
|
||||
assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
|
||||
}
|
||||
|
||||
fn main() {
|
||||
// Empty main to make cargo happy
|
||||
}
|
@ -10,6 +10,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
|
||||
}
|
||||
|
||||
@ -17,6 +19,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
|
||||
}
|
||||
}
|
||||
@ -30,6 +34,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
|
||||
}
|
||||
|
||||
@ -37,6 +43,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
|
||||
}
|
||||
|
||||
@ -44,6 +52,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
|
||||
}
|
||||
|
||||
@ -51,6 +61,8 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
|
||||
// Safety: `self` is a vector, and the result of the comparison
|
||||
// is always a valid mask.
|
||||
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
|
||||
}
|
||||
}
|
||||
|
@ -2,31 +2,55 @@
|
||||
//! crate.
|
||||
//!
|
||||
//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
|
||||
//!
|
||||
//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
|
||||
//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
|
||||
//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
|
||||
//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
|
||||
//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
|
||||
//! this means that division by poison or undef is like division by zero, which means it inflicts...
|
||||
//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
|
||||
//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
|
||||
//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
|
||||
//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
|
||||
//!
|
||||
//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
|
||||
//!
|
||||
//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
|
||||
|
||||
/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
|
||||
/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
|
||||
/// is documented alongside each intrinsic.
|
||||
/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
|
||||
/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
|
||||
extern "platform-intrinsic" {
|
||||
/// add/fadd
|
||||
pub(crate) fn simd_add<T>(x: T, y: T) -> T;
|
||||
|
||||
/// sub/fsub
|
||||
pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
|
||||
pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// mul/fmul
|
||||
pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
|
||||
|
||||
/// udiv/sdiv/fdiv
|
||||
pub(crate) fn simd_div<T>(x: T, y: T) -> T;
|
||||
/// ints and uints: {s,u}div incur UB if division by zero occurs.
|
||||
/// ints: sdiv is UB for int::MIN / -1.
|
||||
/// floats: fdiv is never UB, but may create NaNs or infinities.
|
||||
pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// urem/srem/frem
|
||||
pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
|
||||
/// ints and uints: {s,u}rem incur UB if division by zero occurs.
|
||||
/// ints: srem is UB for int::MIN / -1.
|
||||
/// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
|
||||
pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// shl
|
||||
pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
|
||||
/// for (u)ints. poison if rhs >= lhs::BITS
|
||||
pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// lshr/ashr
|
||||
pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
|
||||
/// ints: ashr
|
||||
/// uints: lshr
|
||||
/// poison if rhs >= lhs::BITS
|
||||
pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
/// and
|
||||
pub(crate) fn simd_and<T>(x: T, y: T) -> T;
|
||||
@ -38,6 +62,9 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
|
||||
|
||||
/// fptoui/fptosi/uitofp/sitofp
|
||||
/// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
|
||||
/// but the truncated value must fit in the target type or the result is poison.
|
||||
/// use `simd_as` instead for a cast that performs a saturating conversion.
|
||||
pub(crate) fn simd_cast<T, U>(x: T) -> U;
|
||||
/// follows Rust's `T as U` semantics, including saturating float casts
|
||||
/// which amounts to the same as `simd_cast` for many cases
|
||||
@ -45,6 +72,9 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_as<T, U>(x: T) -> U;
|
||||
|
||||
/// neg/fneg
|
||||
/// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
|
||||
/// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
|
||||
/// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
|
||||
pub(crate) fn simd_neg<T>(x: T) -> T;
|
||||
|
||||
/// fabs
|
||||
@ -54,6 +84,7 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
|
||||
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
|
||||
|
||||
// these return Simd<int, N> with the same BITS size as the inputs
|
||||
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
|
||||
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
|
||||
@ -62,19 +93,31 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
|
||||
|
||||
// shufflevector
|
||||
// idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
|
||||
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
|
||||
|
||||
/// llvm.masked.gather
|
||||
/// like a loop of pointer reads
|
||||
/// val: vector of values to select if a lane is masked
|
||||
/// ptr: vector of pointers to read from
|
||||
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
|
||||
/// note, the LLVM intrinsic accepts a mask vector of <N x i1>
|
||||
/// FIXME: review this if/when we fix up our mask story in general?
|
||||
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
|
||||
/// llvm.masked.scatter
|
||||
/// like gather, but more spicy, as it writes instead of reads
|
||||
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
|
||||
|
||||
// {s,u}add.sat
|
||||
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
|
||||
|
||||
// {s,u}sub.sat
|
||||
pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
|
||||
pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
|
||||
|
||||
// reductions
|
||||
// llvm.vector.reduce.{add,fadd}
|
||||
pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
|
||||
// llvm.vector.reduce.{mul,fmul}
|
||||
pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
|
||||
@ -91,7 +134,10 @@ extern "platform-intrinsic" {
|
||||
pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
|
||||
|
||||
// select
|
||||
pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
|
||||
// first argument is a vector of integers, -1 (all bits 1) is "true"
|
||||
// logically equivalent to (yes & m) | (no & (m^-1),
|
||||
// but you can use it on floats.
|
||||
pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
|
||||
#[allow(unused)]
|
||||
pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
|
||||
pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
|
||||
}
|
||||
|
@ -1,7 +1,9 @@
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
#![feature(
|
||||
const_fn_trait_bound,
|
||||
convert_float_to_int,
|
||||
decl_macro,
|
||||
intra_doc_pointers,
|
||||
platform_intrinsics,
|
||||
repr_simd,
|
||||
simd_ffi,
|
||||
|
@ -12,8 +12,10 @@
|
||||
)]
|
||||
mod mask_impl;
|
||||
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
mod to_bitmask;
|
||||
pub use to_bitmask::ToBitMask;
|
||||
|
||||
use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
use core::cmp::Ordering;
|
||||
use core::{fmt, mem};
|
||||
|
||||
@ -42,6 +44,9 @@ mod sealed {
|
||||
use sealed::Sealed;
|
||||
|
||||
/// Marker trait for types that may be used as SIMD mask elements.
|
||||
///
|
||||
/// # Safety
|
||||
/// Type must be a signed integer.
|
||||
pub unsafe trait MaskElement: SimdElement + Sealed {}
|
||||
|
||||
macro_rules! impl_element {
|
||||
@ -149,6 +154,7 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
|
||||
// Safety: the caller must confirm this invariant
|
||||
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
|
||||
}
|
||||
|
||||
@ -161,6 +167,7 @@ where
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_int(value: Simd<T, LANES>) -> Self {
|
||||
assert!(T::valid(value), "all values must be either 0 or -1",);
|
||||
// Safety: the validity has been checked
|
||||
unsafe { Self::from_int_unchecked(value) }
|
||||
}
|
||||
|
||||
@ -179,6 +186,7 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
|
||||
// Safety: the caller must confirm this invariant
|
||||
unsafe { self.0.test_unchecked(lane) }
|
||||
}
|
||||
|
||||
@ -190,6 +198,7 @@ where
|
||||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
pub fn test(&self, lane: usize) -> bool {
|
||||
assert!(lane < LANES, "lane index out of range");
|
||||
// Safety: the lane index has been checked
|
||||
unsafe { self.test_unchecked(lane) }
|
||||
}
|
||||
|
||||
@ -199,6 +208,7 @@ where
|
||||
/// `lane` must be less than `LANES`.
|
||||
#[inline]
|
||||
pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
|
||||
// Safety: the caller must confirm this invariant
|
||||
unsafe {
|
||||
self.0.set_unchecked(lane, value);
|
||||
}
|
||||
@ -211,27 +221,12 @@ where
|
||||
#[inline]
|
||||
pub fn set(&mut self, lane: usize, value: bool) {
|
||||
assert!(lane < LANES, "lane index out of range");
|
||||
// Safety: the lane index has been checked
|
||||
unsafe {
|
||||
self.set_unchecked(lane, value);
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert this mask to a bitmask, with one bit set per lane.
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new array and does not mutate the original value"]
|
||||
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
|
||||
self.0.to_bitmask()
|
||||
}
|
||||
|
||||
/// Convert a bitmask to a mask.
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
|
||||
Self(mask_impl::Mask::from_bitmask(bitmask))
|
||||
}
|
||||
|
||||
/// Returns true if any lane is set, or false otherwise.
|
||||
#[inline]
|
||||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
|
@ -1,7 +1,7 @@
|
||||
#![allow(unused_imports)]
|
||||
use super::MaskElement;
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
|
||||
use core::marker::PhantomData;
|
||||
|
||||
/// A mask where each lane is represented by a single bit.
|
||||
@ -115,20 +115,22 @@ where
|
||||
unsafe { Self(intrinsics::simd_bitmask(value), PhantomData) }
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new array and does not mutate the original value"]
|
||||
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
|
||||
// Safety: these are the same type and we are laundering the generic
|
||||
pub fn to_bitmask_integer<U>(self) -> U
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
// Safety: these are the same types
|
||||
unsafe { core::mem::transmute_copy(&self.0) }
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
|
||||
// Safety: these are the same type and we are laundering the generic
|
||||
Self(unsafe { core::mem::transmute_copy(&bitmask) }, PhantomData)
|
||||
pub fn from_bitmask_integer<U>(bitmask: U) -> Self
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
// Safety: these are the same types
|
||||
unsafe { Self(core::mem::transmute_copy(&bitmask), PhantomData) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -137,6 +139,7 @@ where
|
||||
where
|
||||
U: MaskElement,
|
||||
{
|
||||
// Safety: bitmask layout does not depend on the element width
|
||||
unsafe { core::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@
|
||||
|
||||
use super::MaskElement;
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount, ToBitMask};
|
||||
|
||||
#[repr(transparent)]
|
||||
pub struct Mask<T, const LANES: usize>(Simd<T, LANES>)
|
||||
@ -66,6 +66,23 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// Used for bitmask bit order workaround
|
||||
pub(crate) trait ReverseBits {
|
||||
fn reverse_bits(self) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_reverse_bits {
|
||||
{ $($int:ty),* } => {
|
||||
$(
|
||||
impl ReverseBits for $int {
|
||||
fn reverse_bits(self) -> Self { <$int>::reverse_bits(self) }
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_reverse_bits! { u8, u16, u32, u64 }
|
||||
|
||||
impl<T, const LANES: usize> Mask<T, LANES>
|
||||
where
|
||||
T: MaskElement,
|
||||
@ -106,44 +123,40 @@ where
|
||||
where
|
||||
U: MaskElement,
|
||||
{
|
||||
// Safety: masks are simply integer vectors of 0 and -1, and we can cast the element type.
|
||||
unsafe { Mask(intrinsics::simd_cast(self.0)) }
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new array and does not mutate the original value"]
|
||||
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
|
||||
unsafe {
|
||||
let mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN] =
|
||||
intrinsics::simd_bitmask(self.0);
|
||||
|
||||
// There is a bug where LLVM appears to implement this operation with the wrong
|
||||
// bit order.
|
||||
// TODO fix this in a better way
|
||||
if cfg!(target_endian = "big") {
|
||||
for x in bitmask.as_mut() {
|
||||
*x = x.reverse_bits();
|
||||
}
|
||||
}
|
||||
pub(crate) fn to_bitmask_integer<U: ReverseBits>(self) -> U
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
// Safety: U is required to be the appropriate bitmask type
|
||||
let bitmask: U = unsafe { intrinsics::simd_bitmask(self.0) };
|
||||
|
||||
// LLVM assumes bit order should match endianness
|
||||
if cfg!(target_endian = "big") {
|
||||
bitmask.reverse_bits()
|
||||
} else {
|
||||
bitmask
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
pub fn from_bitmask(mut bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
|
||||
unsafe {
|
||||
// There is a bug where LLVM appears to implement this operation with the wrong
|
||||
// bit order.
|
||||
// TODO fix this in a better way
|
||||
if cfg!(target_endian = "big") {
|
||||
for x in bitmask.as_mut() {
|
||||
*x = x.reverse_bits();
|
||||
}
|
||||
}
|
||||
pub(crate) fn from_bitmask_integer<U: ReverseBits>(bitmask: U) -> Self
|
||||
where
|
||||
super::Mask<T, LANES>: ToBitMask<BitMask = U>,
|
||||
{
|
||||
// LLVM assumes bit order should match endianness
|
||||
let bitmask = if cfg!(target_endian = "big") {
|
||||
bitmask.reverse_bits()
|
||||
} else {
|
||||
bitmask
|
||||
};
|
||||
|
||||
// Safety: U is required to be the appropriate bitmask type
|
||||
unsafe {
|
||||
Self::from_int_unchecked(intrinsics::simd_select_bitmask(
|
||||
bitmask,
|
||||
Self::splat(true).to_int(),
|
||||
@ -155,12 +168,14 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new bool and does not mutate the original value"]
|
||||
pub fn any(self) -> bool {
|
||||
// Safety: use `self` as an integer vector
|
||||
unsafe { intrinsics::simd_reduce_any(self.to_int()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
#[must_use = "method returns a new vector and does not mutate the original value"]
|
||||
pub fn all(self) -> bool {
|
||||
// Safety: use `self` as an integer vector
|
||||
unsafe { intrinsics::simd_reduce_all(self.to_int()) }
|
||||
}
|
||||
}
|
||||
@ -184,6 +199,7 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitand(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_and(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
@ -197,6 +213,7 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitor(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_or(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
@ -210,6 +227,7 @@ where
|
||||
#[inline]
|
||||
#[must_use = "method returns a new mask and does not mutate the original value"]
|
||||
fn bitxor(self, rhs: Self) -> Self {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { Self(intrinsics::simd_xor(self.0, rhs.0)) }
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,57 @@
|
||||
use super::{mask_impl, Mask, MaskElement};
|
||||
use crate::simd::{LaneCount, SupportedLaneCount};
|
||||
|
||||
mod sealed {
|
||||
pub trait Sealed {}
|
||||
}
|
||||
pub use sealed::Sealed;
|
||||
|
||||
impl<T, const LANES: usize> Sealed for Mask<T, LANES>
|
||||
where
|
||||
T: MaskElement,
|
||||
LaneCount<LANES>: SupportedLaneCount,
|
||||
{
|
||||
}
|
||||
|
||||
/// Converts masks to and from integer bitmasks.
|
||||
///
|
||||
/// Each bit of the bitmask corresponds to a mask lane, starting with the LSB.
|
||||
///
|
||||
/// # Safety
|
||||
/// This trait is `unsafe` and sealed, since the `BitMask` type must match the number of lanes in
|
||||
/// the mask.
|
||||
pub unsafe trait ToBitMask: Sealed {
|
||||
/// The integer bitmask type.
|
||||
type BitMask;
|
||||
|
||||
/// Converts a mask to a bitmask.
|
||||
fn to_bitmask(self) -> Self::BitMask;
|
||||
|
||||
/// Converts a bitmask to a mask.
|
||||
fn from_bitmask(bitmask: Self::BitMask) -> Self;
|
||||
}
|
||||
|
||||
macro_rules! impl_integer_intrinsic {
|
||||
{ $(unsafe impl ToBitMask<BitMask=$int:ty> for Mask<_, $lanes:literal>)* } => {
|
||||
$(
|
||||
unsafe impl<T: MaskElement> ToBitMask for Mask<T, $lanes> {
|
||||
type BitMask = $int;
|
||||
|
||||
fn to_bitmask(self) -> $int {
|
||||
self.0.to_bitmask_integer()
|
||||
}
|
||||
|
||||
fn from_bitmask(bitmask: $int) -> Self {
|
||||
Self(mask_impl::Mask::from_bitmask_integer(bitmask))
|
||||
}
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
impl_integer_intrinsic! {
|
||||
unsafe impl ToBitMask<BitMask=u8> for Mask<_, 8>
|
||||
unsafe impl ToBitMask<BitMask=u16> for Mask<_, 16>
|
||||
unsafe impl ToBitMask<BitMask=u32> for Mask<_, 32>
|
||||
unsafe impl ToBitMask<BitMask=u64> for Mask<_, 64>
|
||||
}
|
@ -22,6 +22,7 @@ macro_rules! impl_uint_arith {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { simd_saturating_add(self, second) }
|
||||
}
|
||||
|
||||
@ -41,6 +42,7 @@ macro_rules! impl_uint_arith {
|
||||
/// assert_eq!(sat, Simd::splat(0));
|
||||
#[inline]
|
||||
pub fn saturating_sub(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { simd_saturating_sub(self, second) }
|
||||
}
|
||||
})+
|
||||
@ -68,6 +70,7 @@ macro_rules! impl_int_arith {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn saturating_add(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { simd_saturating_add(self, second) }
|
||||
}
|
||||
|
||||
@ -87,6 +90,7 @@ macro_rules! impl_int_arith {
|
||||
/// assert_eq!(sat, Simd::from_array([MIN, MIN, MIN, 0]));
|
||||
#[inline]
|
||||
pub fn saturating_sub(self, second: Self) -> Self {
|
||||
// Safety: `self` is a vector
|
||||
unsafe { simd_saturating_sub(self, second) }
|
||||
}
|
||||
|
||||
|
@ -57,29 +57,40 @@ macro_rules! wrap_bitshift {
|
||||
};
|
||||
}
|
||||
|
||||
// Division by zero is poison, according to LLVM.
|
||||
// So is dividing the MIN value of a signed integer by -1,
|
||||
// since that would return MAX + 1.
|
||||
// FIXME: Rust allows <SInt>::MIN / -1,
|
||||
// so we should probably figure out how to make that safe.
|
||||
/// SAFETY: This macro must only be used to impl Div or Rem and given the matching intrinsic.
|
||||
/// It guards against LLVM's UB conditions for integer div or rem using masks and selects,
|
||||
/// thus guaranteeing a Rust value returns instead.
|
||||
///
|
||||
/// | | LLVM | Rust
|
||||
/// | :--------------: | :--- | :----------
|
||||
/// | N {/,%} 0 | UB | panic!()
|
||||
/// | <$int>::MIN / -1 | UB | <$int>::MIN
|
||||
/// | <$int>::MIN % -1 | UB | 0
|
||||
///
|
||||
macro_rules! int_divrem_guard {
|
||||
( $lhs:ident,
|
||||
$rhs:ident,
|
||||
{ const PANIC_ZERO: &'static str = $zero:literal;
|
||||
const PANIC_OVERFLOW: &'static str = $overflow:literal;
|
||||
$simd_call:ident
|
||||
},
|
||||
$int:ident ) => {
|
||||
if $rhs.lanes_eq(Simd::splat(0)).any() {
|
||||
panic!($zero);
|
||||
} else if <$int>::MIN != 0
|
||||
&& ($lhs.lanes_eq(Simd::splat(<$int>::MIN))
|
||||
// type inference can break here, so cut an SInt to size
|
||||
& $rhs.lanes_eq(Simd::splat(-1i64 as _))).any()
|
||||
{
|
||||
panic!($overflow);
|
||||
} else {
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, $rhs) }
|
||||
// Prevent otherwise-UB overflow on the MIN / -1 case.
|
||||
let rhs = if <$int>::MIN != 0 {
|
||||
// This should, at worst, optimize to a few branchless logical ops
|
||||
// Ideally, this entire conditional should evaporate
|
||||
// Fire LLVM and implement those manually if it doesn't get the hint
|
||||
($lhs.lanes_eq(Simd::splat(<$int>::MIN))
|
||||
// type inference can break here, so cut an SInt to size
|
||||
& $rhs.lanes_eq(Simd::splat(-1i64 as _)))
|
||||
.select(Simd::splat(1), $rhs)
|
||||
} else {
|
||||
// Nice base case to make it easy to const-fold away the other branch.
|
||||
$rhs
|
||||
};
|
||||
unsafe { $crate::simd::intrinsics::$simd_call($lhs, rhs) }
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -183,7 +194,6 @@ for_base_ops! {
|
||||
impl Div::div {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to divide by zero";
|
||||
const PANIC_OVERFLOW: &'static str = "attempt to divide with overflow";
|
||||
simd_div
|
||||
}
|
||||
}
|
||||
@ -191,7 +201,6 @@ for_base_ops! {
|
||||
impl Rem::rem {
|
||||
int_divrem_guard {
|
||||
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
|
||||
const PANIC_OVERFLOW: &'static str = "attempt to calculate the remainder with overflow";
|
||||
simd_rem
|
||||
}
|
||||
}
|
||||
|
@ -14,24 +14,28 @@ macro_rules! impl_integer_reductions {
|
||||
/// Horizontal wrapping add. Returns the sum of the lanes of the vector, with wrapping addition.
|
||||
#[inline]
|
||||
pub fn horizontal_sum(self) -> $scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { simd_reduce_add_ordered(self, 0) }
|
||||
}
|
||||
|
||||
/// Horizontal wrapping multiply. Returns the product of the lanes of the vector, with wrapping multiplication.
|
||||
#[inline]
|
||||
pub fn horizontal_product(self) -> $scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { simd_reduce_mul_ordered(self, 1) }
|
||||
}
|
||||
|
||||
/// Horizontal maximum. Returns the maximum lane in the vector.
|
||||
#[inline]
|
||||
pub fn horizontal_max(self) -> $scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { simd_reduce_max(self) }
|
||||
}
|
||||
|
||||
/// Horizontal minimum. Returns the minimum lane in the vector.
|
||||
#[inline]
|
||||
pub fn horizontal_min(self) -> $scalar {
|
||||
// Safety: `self` is an integer vector
|
||||
unsafe { simd_reduce_min(self) }
|
||||
}
|
||||
}
|
||||
@ -63,6 +67,7 @@ macro_rules! impl_float_reductions {
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||
self.as_array().iter().sum()
|
||||
} else {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { simd_reduce_add_ordered(self, 0.) }
|
||||
}
|
||||
}
|
||||
@ -74,6 +79,7 @@ macro_rules! impl_float_reductions {
|
||||
if cfg!(all(target_arch = "x86", not(target_feature = "sse2"))) {
|
||||
self.as_array().iter().product()
|
||||
} else {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { simd_reduce_mul_ordered(self, 1.) }
|
||||
}
|
||||
}
|
||||
@ -84,6 +90,7 @@ macro_rules! impl_float_reductions {
|
||||
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
||||
#[inline]
|
||||
pub fn horizontal_max(self) -> $scalar {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { simd_reduce_max(self) }
|
||||
}
|
||||
|
||||
@ -93,6 +100,7 @@ macro_rules! impl_float_reductions {
|
||||
/// return either. This function will not return `NaN` unless all lanes are `NaN`.
|
||||
#[inline]
|
||||
pub fn horizontal_min(self) -> $scalar {
|
||||
// Safety: `self` is a float vector
|
||||
unsafe { simd_reduce_min(self) }
|
||||
}
|
||||
}
|
||||
|
@ -1,9 +1,10 @@
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Simd, SupportedLaneCount};
|
||||
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
|
||||
use core::convert::FloatToInt;
|
||||
|
||||
macro_rules! implement {
|
||||
{
|
||||
$type:ty, $int_type:ty
|
||||
$type:ty
|
||||
} => {
|
||||
impl<const LANES: usize> Simd<$type, LANES>
|
||||
where
|
||||
@ -18,20 +19,22 @@ macro_rules! implement {
|
||||
/// * Not be NaN
|
||||
/// * Not be infinite
|
||||
/// * Be representable in the return type, after truncating off its fractional part
|
||||
///
|
||||
/// If these requirements are infeasible or costly, consider using the safe function [cast],
|
||||
/// which saturates on conversion.
|
||||
///
|
||||
/// [cast]: Simd::cast
|
||||
#[inline]
|
||||
pub unsafe fn to_int_unchecked(self) -> Simd<$int_type, LANES> {
|
||||
pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
|
||||
where
|
||||
$type: FloatToInt<I>,
|
||||
I: SimdElement,
|
||||
{
|
||||
unsafe { intrinsics::simd_cast(self) }
|
||||
}
|
||||
|
||||
/// Creates a floating-point vector from an integer vector. Rounds values that are
|
||||
/// not exactly representable.
|
||||
#[inline]
|
||||
pub fn round_from_int(value: Simd<$int_type, LANES>) -> Self {
|
||||
unsafe { intrinsics::simd_cast(value) }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
implement! { f32, i32 }
|
||||
implement! { f64, i64 }
|
||||
implement! { f32 }
|
||||
implement! { f64 }
|
||||
|
@ -11,6 +11,7 @@ where
|
||||
/// For each lane in the mask, choose the corresponding lane from `true_values` if
|
||||
/// that lane mask is true, and `false_values` if that lane mask is false.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
|
||||
@ -31,6 +32,8 @@ where
|
||||
where
|
||||
U: SimdElement<Mask = T>,
|
||||
{
|
||||
// Safety: The mask has been cast to a vector of integers,
|
||||
// and the operands to select between are vectors of the same type and length.
|
||||
unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
|
||||
}
|
||||
|
||||
@ -39,6 +42,7 @@ where
|
||||
/// For each lane in the mask, choose the corresponding lane from `true_values` if
|
||||
/// that lane mask is true, and `false_values` if that lane mask is false.
|
||||
///
|
||||
/// # Examples
|
||||
/// ```
|
||||
/// # #![feature(portable_simd)]
|
||||
/// # #[cfg(feature = "std")] use core_simd::Mask;
|
||||
|
@ -95,6 +95,7 @@ pub trait Swizzle<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
LaneCount<INPUT_LANES>: SupportedLaneCount,
|
||||
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
|
||||
{
|
||||
// Safety: `vector` is a vector, and `INDEX_IMPL` is a const array of u32.
|
||||
unsafe { intrinsics::simd_shuffle(vector, vector, Self::INDEX_IMPL) }
|
||||
}
|
||||
}
|
||||
@ -119,6 +120,7 @@ pub trait Swizzle2<const INPUT_LANES: usize, const OUTPUT_LANES: usize> {
|
||||
LaneCount<INPUT_LANES>: SupportedLaneCount,
|
||||
LaneCount<OUTPUT_LANES>: SupportedLaneCount,
|
||||
{
|
||||
// Safety: `first` and `second` are vectors, and `INDEX_IMPL` is a const array of u32.
|
||||
unsafe { intrinsics::simd_shuffle(first, second, Self::INDEX_IMPL) }
|
||||
}
|
||||
}
|
||||
|
@ -8,12 +8,14 @@ macro_rules! impl_to_bytes {
|
||||
/// Return the memory representation of this integer as a byte array in native byte
|
||||
/// order.
|
||||
pub fn to_ne_bytes(self) -> crate::simd::Simd<u8, {{ $size * LANES }}> {
|
||||
// Safety: transmuting between vectors is safe
|
||||
unsafe { core::mem::transmute_copy(&self) }
|
||||
}
|
||||
|
||||
/// Create a native endian integer value from its memory representation as a byte array
|
||||
/// in native endianness.
|
||||
pub fn from_ne_bytes(bytes: crate::simd::Simd<u8, {{ $size * LANES }}>) -> Self {
|
||||
// Safety: transmuting between vectors is safe
|
||||
unsafe { core::mem::transmute_copy(&bytes) }
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,79 @@ pub(crate) mod ptr;
|
||||
use crate::simd::intrinsics;
|
||||
use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
|
||||
|
||||
/// A SIMD vector of `LANES` elements of type `T`.
|
||||
/// A SIMD vector of `LANES` elements of type `T`. `Simd<T, N>` has the same shape as [`[T; N]`](array), but operates like `T`.
|
||||
///
|
||||
/// Two vectors of the same type and length will, by convention, support the operators (+, *, etc.) that `T` does.
|
||||
/// These take the lanes at each index on the left-hand side and right-hand side, perform the operation,
|
||||
/// and return the result in the same lane in a vector of equal size. For a given operator, this is equivalent to zipping
|
||||
/// the two arrays together and mapping the operator over each lane.
|
||||
///
|
||||
/// ```rust
|
||||
/// # #![feature(array_zip, portable_simd)]
|
||||
/// # use core::simd::{Simd};
|
||||
/// let a0: [i32; 4] = [-2, 0, 2, 4];
|
||||
/// let a1 = [10, 9, 8, 7];
|
||||
/// let zm_add = a0.zip(a1).map(|(lhs, rhs)| lhs + rhs);
|
||||
/// let zm_mul = a0.zip(a1).map(|(lhs, rhs)| lhs * rhs);
|
||||
///
|
||||
/// // `Simd<T, N>` implements `From<[T; N]>
|
||||
/// let (v0, v1) = (Simd::from(a0), Simd::from(a1));
|
||||
/// // Which means arrays implement `Into<Simd<T, N>>`.
|
||||
/// assert_eq!(v0 + v1, zm_add.into());
|
||||
/// assert_eq!(v0 * v1, zm_mul.into());
|
||||
/// ```
|
||||
///
|
||||
/// `Simd` with integers has the quirk that these operations are also inherently wrapping, as if `T` was [`Wrapping<T>`].
|
||||
/// Thus, `Simd` does not implement `wrapping_add`, because that is the default behavior.
|
||||
/// This means there is no warning on overflows, even in "debug" builds.
|
||||
/// For most applications where `Simd` is appropriate, it is "not a bug" to wrap,
|
||||
/// and even "debug builds" are unlikely to tolerate the loss of performance.
|
||||
/// You may want to consider using explicitly checked arithmetic if such is required.
|
||||
/// Division by zero still causes a panic, so you may want to consider using floating point numbers if that is unacceptable.
|
||||
///
|
||||
/// [`Wrapping<T>`]: core::num::Wrapping
|
||||
///
|
||||
/// # Layout
|
||||
/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
|
||||
/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
|
||||
/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
|
||||
/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
|
||||
///
|
||||
/// # ABI "Features"
|
||||
/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
|
||||
/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
|
||||
/// The need for this may be corrected in the future.
|
||||
///
|
||||
/// # Safe SIMD with Unsafe Rust
|
||||
///
|
||||
/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
|
||||
/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
|
||||
/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
|
||||
/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
|
||||
/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
|
||||
/// [`read_unaligned`] and [`write_unaligned`]. This is because:
|
||||
/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
|
||||
/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
|
||||
/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
|
||||
/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
|
||||
/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
|
||||
///
|
||||
/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
|
||||
/// and may be just as fast as stricter alternatives.
|
||||
/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
|
||||
/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
|
||||
/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
|
||||
/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
|
||||
/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
|
||||
/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
|
||||
///
|
||||
/// [`transmute`]: core::mem::transmute
|
||||
/// [raw pointers]: pointer
|
||||
/// [`read_unaligned`]: pointer::read_unaligned
|
||||
/// [`write_unaligned`]: pointer::write_unaligned
|
||||
/// [`read`]: pointer::read
|
||||
/// [`write`]: pointer::write
|
||||
/// [as_simd]: slice::as_simd
|
||||
#[repr(simd)]
|
||||
pub struct Simd<T, const LANES: usize>([T; LANES])
|
||||
where
|
||||
@ -102,6 +174,7 @@ where
|
||||
#[inline]
|
||||
#[cfg(not(bootstrap))]
|
||||
pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
|
||||
// Safety: The input argument is a vector of a known SIMD type.
|
||||
unsafe { intrinsics::simd_as(self) }
|
||||
}
|
||||
|
||||
@ -175,7 +248,7 @@ where
|
||||
or: Self,
|
||||
) -> Self {
|
||||
let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
|
||||
// SAFETY: We have masked-off out-of-bounds lanes.
|
||||
// Safety: We have masked-off out-of-bounds lanes.
|
||||
unsafe { Self::gather_select_unchecked(slice, enable, idxs, or) }
|
||||
}
|
||||
|
||||
@ -216,7 +289,7 @@ where
|
||||
let base_ptr = crate::simd::ptr::SimdConstPtr::splat(slice.as_ptr());
|
||||
// Ferris forgive me, I have done pointer arithmetic here.
|
||||
let ptrs = base_ptr.wrapping_add(idxs);
|
||||
// SAFETY: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
|
||||
// Safety: The ptrs have been bounds-masked to prevent memory-unsafe reads insha'allah
|
||||
unsafe { intrinsics::simd_gather(or, ptrs, enable.to_int()) }
|
||||
}
|
||||
|
||||
@ -268,7 +341,7 @@ where
|
||||
idxs: Simd<usize, LANES>,
|
||||
) {
|
||||
let enable: Mask<isize, LANES> = enable & idxs.lanes_lt(Simd::splat(slice.len()));
|
||||
// SAFETY: We have masked-off out-of-bounds lanes.
|
||||
// Safety: We have masked-off out-of-bounds lanes.
|
||||
unsafe { self.scatter_select_unchecked(slice, enable, idxs) }
|
||||
}
|
||||
|
||||
@ -307,7 +380,7 @@ where
|
||||
enable: Mask<isize, LANES>,
|
||||
idxs: Simd<usize, LANES>,
|
||||
) {
|
||||
// SAFETY: This block works with *mut T derived from &mut 'a [T],
|
||||
// Safety: This block works with *mut T derived from &mut 'a [T],
|
||||
// which means it is delicate in Rust's borrowing model, circa 2021:
|
||||
// &mut 'a [T] asserts uniqueness, so deriving &'a [T] invalidates live *mut Ts!
|
||||
// Even though this block is largely safe methods, it must be exactly this way
|
||||
@ -487,7 +560,9 @@ mod sealed {
|
||||
use sealed::Sealed;
|
||||
|
||||
/// Marker trait for types that may be used as SIMD vector elements.
|
||||
/// SAFETY: This trait, when implemented, asserts the compiler can monomorphize
|
||||
///
|
||||
/// # Safety
|
||||
/// This trait, when implemented, asserts the compiler can monomorphize
|
||||
/// `#[repr(simd)]` structs with the marked type as an element.
|
||||
/// Strictly, it is valid to impl if the vector will not be miscompiled.
|
||||
/// Practically, it is user-unfriendly to impl it if the vector won't compile,
|
||||
|
@ -21,6 +21,8 @@ where
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
|
||||
// Safety: converting pointers to usize and vice-versa is safe
|
||||
// (even if using that pointer is not)
|
||||
unsafe {
|
||||
let x: Simd<usize, LANES> = mem::transmute_copy(&self);
|
||||
mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
|
||||
@ -47,6 +49,8 @@ where
|
||||
#[inline]
|
||||
#[must_use]
|
||||
pub fn wrapping_add(self, addend: Simd<usize, LANES>) -> Self {
|
||||
// Safety: converting pointers to usize and vice-versa is safe
|
||||
// (even if using that pointer is not)
|
||||
unsafe {
|
||||
let x: Simd<usize, LANES> = mem::transmute_copy(&self);
|
||||
mem::transmute_copy(&{ x + (addend * Simd::splat(mem::size_of::<T>())) })
|
||||
|
@ -9,6 +9,8 @@ macro_rules! from_transmute {
|
||||
impl core::convert::From<$from> for $to {
|
||||
#[inline]
|
||||
fn from(value: $from) -> $to {
|
||||
// Safety: transmuting between vectors is safe, but the caller of this macro
|
||||
// checks the invariants
|
||||
unsafe { core::mem::transmute(value) }
|
||||
}
|
||||
}
|
||||
|
@ -68,16 +68,16 @@ macro_rules! test_mask_api {
|
||||
assert_eq!(core_simd::Mask::<$type, 8>::from_int(int), mask);
|
||||
}
|
||||
|
||||
#[cfg(feature = "generic_const_exprs")]
|
||||
#[test]
|
||||
fn roundtrip_bitmask_conversion() {
|
||||
use core_simd::ToBitMask;
|
||||
let values = [
|
||||
true, false, false, true, false, false, true, false,
|
||||
true, true, false, false, false, false, false, true,
|
||||
];
|
||||
let mask = core_simd::Mask::<$type, 16>::from_array(values);
|
||||
let bitmask = mask.to_bitmask();
|
||||
assert_eq!(bitmask, [0b01001001, 0b10000011]);
|
||||
assert_eq!(bitmask, 0b1000001101001001);
|
||||
assert_eq!(core_simd::Mask::<$type, 16>::from_bitmask(bitmask), mask);
|
||||
}
|
||||
}
|
||||
|
@ -210,15 +210,21 @@ macro_rules! impl_signed_tests {
|
||||
)
|
||||
}
|
||||
|
||||
fn div_min_may_overflow<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(Scalar::MIN);
|
||||
let b = Vector::<LANES>::splat(-1);
|
||||
assert_eq!(a / b, a);
|
||||
}
|
||||
|
||||
fn rem_min_may_overflow<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(Scalar::MIN);
|
||||
let b = Vector::<LANES>::splat(-1);
|
||||
assert_eq!(a % b, Vector::<LANES>::splat(0));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
test_helpers::test_lanes_panic! {
|
||||
fn div_min_overflow_panics<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(Scalar::MIN);
|
||||
let b = Vector::<LANES>::splat(-1);
|
||||
let _ = a / b;
|
||||
}
|
||||
|
||||
fn div_by_all_zeros_panics<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(42);
|
||||
let b = Vector::<LANES>::splat(0);
|
||||
@ -232,12 +238,6 @@ macro_rules! impl_signed_tests {
|
||||
let _ = a / b;
|
||||
}
|
||||
|
||||
fn rem_min_overflow_panic<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(Scalar::MIN);
|
||||
let b = Vector::<LANES>::splat(-1);
|
||||
let _ = a % b;
|
||||
}
|
||||
|
||||
fn rem_zero_panic<const LANES: usize>() {
|
||||
let a = Vector::<LANES>::splat(42);
|
||||
let b = Vector::<LANES>::splat(0);
|
||||
|
@ -53,14 +53,6 @@ macro_rules! float_rounding_test {
|
||||
}
|
||||
|
||||
test_helpers::test_lanes! {
|
||||
fn from_int<const LANES: usize>() {
|
||||
test_helpers::test_unary_elementwise(
|
||||
&Vector::<LANES>::round_from_int,
|
||||
&|x| x as Scalar,
|
||||
&|_| true,
|
||||
)
|
||||
}
|
||||
|
||||
fn to_int_unchecked<const LANES: usize>() {
|
||||
// The maximum integer that can be represented by the equivalently sized float has
|
||||
// all of the mantissa digits set to 1, pushed up to the MSB.
|
||||
@ -72,11 +64,11 @@ macro_rules! float_rounding_test {
|
||||
runner.run(
|
||||
&test_helpers::array::UniformArrayStrategy::new(-MAX_REPRESENTABLE_VALUE..MAX_REPRESENTABLE_VALUE),
|
||||
|x| {
|
||||
let result_1 = unsafe { Vector::from_array(x).to_int_unchecked().to_array() };
|
||||
let result_1 = unsafe { Vector::from_array(x).to_int_unchecked::<IntScalar>().to_array() };
|
||||
let result_2 = {
|
||||
let mut result = [0; LANES];
|
||||
let mut result: [IntScalar; LANES] = [0; LANES];
|
||||
for (i, o) in x.iter().zip(result.iter_mut()) {
|
||||
*o = unsafe { i.to_int_unchecked() };
|
||||
*o = unsafe { i.to_int_unchecked::<IntScalar>() };
|
||||
}
|
||||
result
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user