mirror of
https://github.com/rust-lang/rust.git
synced 2025-06-08 13:18:32 +00:00
208 lines
7.6 KiB
Rust
208 lines
7.6 KiB
Rust
#![cfg_attr(
|
|
feature = "as_crate",
|
|
feature(core_intrinsics),
|
|
feature(portable_simd),
|
|
allow(internal_features)
|
|
)]
|
|
#[cfg(not(feature = "as_crate"))]
|
|
use core::simd;
|
|
#[cfg(feature = "as_crate")]
|
|
use core_simd::simd;
|
|
|
|
use core::intrinsics::simd as intrinsics;
|
|
|
|
use simd::{LaneCount, Simd, SupportedLaneCount};
|
|
|
|
#[cfg(feature = "as_crate")]
|
|
mod experimental {
|
|
pub trait Sealed {}
|
|
}
|
|
|
|
#[cfg(feature = "as_crate")]
|
|
use experimental as sealed;
|
|
|
|
use crate::sealed::Sealed;
|
|
|
|
/// This trait provides a possibly-temporary implementation of float functions
|
|
/// that may, in the absence of hardware support, canonicalize to calling an
|
|
/// operating system's `math.h` dynamically-loaded library (also known as a
|
|
/// shared object). As these conditionally require runtime support, they
|
|
/// should only appear in binaries built assuming OS support: `std`.
|
|
///
|
|
/// However, there is no reason SIMD types, in general, need OS support,
|
|
/// as for many architectures an embedded binary may simply configure that
|
|
/// support itself. This means these types must be visible in `core`
|
|
/// but have these functions available in `std`.
|
|
///
|
|
/// [`f32`] and [`f64`] achieve a similar trick by using "lang items", but
|
|
/// due to compiler limitations, it is harder to implement this approach for
|
|
/// abstract data types like [`Simd`]. From that need, this trait is born.
|
|
///
|
|
/// It is possible this trait will be replaced in some manner in the future,
|
|
/// when either the compiler or its supporting runtime functions are improved.
|
|
/// For now this trait is available to permit experimentation with SIMD float
|
|
/// operations that may lack hardware support, such as `mul_add`.
|
|
pub trait StdFloat: Sealed + Sized {
|
|
/// Elementwise fused multiply-add. Computes `(self * a) + b` with only one rounding error,
|
|
/// yielding a more accurate result than an unfused multiply-add.
|
|
///
|
|
/// Using `mul_add` *may* be more performant than an unfused multiply-add if the target
|
|
/// architecture has a dedicated `fma` CPU instruction. However, this is not always
|
|
/// true, and will be heavily dependent on designing algorithms with specific target
|
|
/// hardware in mind.
|
|
#[inline]
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn mul_add(self, a: Self, b: Self) -> Self {
|
|
unsafe { intrinsics::simd_fma(self, a, b) }
|
|
}
|
|
|
|
/// Produces a vector where every element has the square root value
|
|
/// of the equivalently-indexed element in `self`
|
|
#[inline]
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn sqrt(self) -> Self {
|
|
unsafe { intrinsics::simd_fsqrt(self) }
|
|
}
|
|
|
|
/// Produces a vector where every element has the sine of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn sin(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the cosine of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn cos(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the exponential (base e) of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn exp(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the exponential (base 2) of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn exp2(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the natural logarithm of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn ln(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the logarithm with respect to an arbitrary
|
|
/// in the equivalently-indexed elements in `self` and `base`.
|
|
#[inline]
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn log(self, base: Self) -> Self {
|
|
unsafe { intrinsics::simd_div(self.ln(), base.ln()) }
|
|
}
|
|
|
|
/// Produces a vector where every element has the base-2 logarithm of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn log2(self) -> Self;
|
|
|
|
/// Produces a vector where every element has the base-10 logarithm of the value
|
|
/// in the equivalently-indexed element in `self`.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn log10(self) -> Self;
|
|
|
|
/// Returns the smallest integer greater than or equal to each element.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
#[inline]
|
|
fn ceil(self) -> Self {
|
|
unsafe { intrinsics::simd_ceil(self) }
|
|
}
|
|
|
|
/// Returns the largest integer value less than or equal to each element.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
#[inline]
|
|
fn floor(self) -> Self {
|
|
unsafe { intrinsics::simd_floor(self) }
|
|
}
|
|
|
|
/// Rounds to the nearest integer value. Ties round toward zero.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
#[inline]
|
|
fn round(self) -> Self {
|
|
unsafe { intrinsics::simd_round(self) }
|
|
}
|
|
|
|
/// Returns the floating point's integer value, with its fractional part removed.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
#[inline]
|
|
fn trunc(self) -> Self {
|
|
unsafe { intrinsics::simd_trunc(self) }
|
|
}
|
|
|
|
/// Returns the floating point's fractional value, with its integer part removed.
|
|
#[must_use = "method returns a new vector and does not mutate the original value"]
|
|
fn fract(self) -> Self;
|
|
}
|
|
|
|
impl<const N: usize> Sealed for Simd<f32, N> where LaneCount<N>: SupportedLaneCount {}
|
|
impl<const N: usize> Sealed for Simd<f64, N> where LaneCount<N>: SupportedLaneCount {}
|
|
|
|
macro_rules! impl_float {
|
|
{
|
|
$($fn:ident: $intrinsic:ident,)*
|
|
} => {
|
|
impl<const N: usize> StdFloat for Simd<f32, N>
|
|
where
|
|
LaneCount<N>: SupportedLaneCount,
|
|
{
|
|
#[inline]
|
|
fn fract(self) -> Self {
|
|
self - self.trunc()
|
|
}
|
|
|
|
$(
|
|
#[inline]
|
|
fn $fn(self) -> Self {
|
|
unsafe { intrinsics::$intrinsic(self) }
|
|
}
|
|
)*
|
|
}
|
|
|
|
impl<const N: usize> StdFloat for Simd<f64, N>
|
|
where
|
|
LaneCount<N>: SupportedLaneCount,
|
|
{
|
|
#[inline]
|
|
fn fract(self) -> Self {
|
|
self - self.trunc()
|
|
}
|
|
|
|
$(
|
|
#[inline]
|
|
fn $fn(self) -> Self {
|
|
// https://github.com/llvm/llvm-project/issues/83729
|
|
#[cfg(target_arch = "aarch64")]
|
|
{
|
|
let mut ln = Self::splat(0f64);
|
|
for i in 0..N {
|
|
ln[i] = self[i].$fn()
|
|
}
|
|
ln
|
|
}
|
|
|
|
#[cfg(not(target_arch = "aarch64"))]
|
|
{
|
|
unsafe { intrinsics::$intrinsic(self) }
|
|
}
|
|
}
|
|
)*
|
|
}
|
|
}
|
|
}
|
|
|
|
impl_float! {
|
|
sin: simd_fsin,
|
|
cos: simd_fcos,
|
|
exp: simd_fexp,
|
|
exp2: simd_fexp2,
|
|
ln: simd_flog,
|
|
log2: simd_flog2,
|
|
log10: simd_flog10,
|
|
}
|