Auto merge of #129410 - RalfJung:miri-sync, r=RalfJung

Miri subtree update

r? `@ghost`
This commit is contained in:
bors 2024-08-22 20:25:09 +00:00
commit eff09483c6
15 changed files with 658 additions and 86 deletions

View File

@ -20,7 +20,7 @@ doctest = false # and no doc tests
[dependencies]
getrandom = { version = "0.2", features = ["std"] }
rand = "0.8"
smallvec = "1.7"
smallvec = { version = "1.7", features = ["drain_filter"] }
aes = { version = "0.8.3", features = ["hazmat"] }
measureme = "11"
ctrlc = "3.2.5"

View File

@ -1 +1 @@
f24a6ba06f4190d8ec4f22d1baa800e64b1900cb
fdf61d499c8a8421ecf98e7924bb87caf43a9938

View File

@ -10,7 +10,7 @@
//! and the relative position of the access;
//! - idempotency properties asserted in `perms.rs` (for optimizations)
use std::fmt;
use std::{fmt, mem};
use smallvec::SmallVec;
@ -699,8 +699,7 @@ impl<'tcx> Tree {
/// Integration with the BorTag garbage collector
impl Tree {
pub fn remove_unreachable_tags(&mut self, live_tags: &FxHashSet<BorTag>) {
let root_is_needed = self.keep_only_needed(self.root, live_tags); // root can't be removed
assert!(root_is_needed);
self.remove_useless_children(self.root, live_tags);
// Right after the GC runs is a good moment to check if we can
// merge some adjacent ranges that were made equal by the removal of some
// tags (this does not necessarily mean that they have identical internal representations,
@ -708,9 +707,16 @@ impl Tree {
self.rperms.merge_adjacent_thorough();
}
/// Checks if a node is useless and should be GC'ed.
/// A node is useless if it has no children and also the tag is no longer live.
fn is_useless(&self, idx: UniIndex, live: &FxHashSet<BorTag>) -> bool {
let node = self.nodes.get(idx).unwrap();
node.children.is_empty() && !live.contains(&node.tag)
}
/// Traverses the entire tree looking for useless tags.
/// Returns true iff the tag it was called on is still live or has live children,
/// and removes from the tree all tags that have no live children.
/// Removes from the tree all useless child nodes of root.
/// It will not delete the root itself.
///
/// NOTE: This leaves in the middle of the tree tags that are unreachable but have
/// reachable children. There is a potential for compacting the tree by reassigning
@ -721,42 +727,60 @@ impl Tree {
/// `child: Reserved`. This tree can exist. If we blindly delete `parent` and reassign
/// `child` to be a direct child of `root` then Writes to `child` are now permitted
/// whereas they were not when `parent` was still there.
fn keep_only_needed(&mut self, idx: UniIndex, live: &FxHashSet<BorTag>) -> bool {
let node = self.nodes.get(idx).unwrap();
// FIXME: this function does a lot of cloning, a 2-pass approach is possibly
// more efficient. It could consist of
// 1. traverse the Tree, collect all useless tags in a Vec
// 2. traverse the Vec, remove all tags previously selected
// Bench it.
let children: SmallVec<_> = node
.children
.clone()
.into_iter()
.filter(|child| self.keep_only_needed(*child, live))
.collect();
let no_children = children.is_empty();
let node = self.nodes.get_mut(idx).unwrap();
node.children = children;
if !live.contains(&node.tag) && no_children {
// All of the children and this node are unreachable, delete this tag
// from the tree (the children have already been deleted by recursive
// calls).
// Due to the API of UniMap we must absolutely call
// `UniValMap::remove` for the key of this tag on *all* maps that used it
// (which are `self.nodes` and every range of `self.rperms`)
// before we can safely apply `UniValMap::forget` to truly remove
// the tag from the mapping.
let tag = node.tag;
self.nodes.remove(idx);
for (_perms_range, perms) in self.rperms.iter_mut_all() {
perms.remove(idx);
fn remove_useless_children(&mut self, root: UniIndex, live: &FxHashSet<BorTag>) {
// To avoid stack overflows, we roll our own stack.
// Each element in the stack consists of the current tag, and the number of the
// next child to be processed.
// The other functions are written using the `TreeVisitorStack`, but that does not work here
// since we need to 1) do a post-traversal and 2) remove nodes from the tree.
// Since we do a post-traversal (by deleting nodes only after handling all children),
// we also need to be a bit smarter than "pop node, push all children."
let mut stack = vec![(root, 0)];
while let Some((tag, nth_child)) = stack.last_mut() {
let node = self.nodes.get(*tag).unwrap();
if *nth_child < node.children.len() {
// Visit the child by pushing it to the stack.
// Also increase `nth_child` so that when we come back to the `tag` node, we
// look at the next child.
let next_child = node.children[*nth_child];
*nth_child += 1;
stack.push((next_child, 0));
continue;
} else {
// We have processed all children of `node`, so now it is time to process `node` itself.
// First, get the current children of `node`. To appease the borrow checker,
// we have to temporarily move the list out of the node, and then put the
// list of remaining children back in.
let mut children_of_node =
mem::take(&mut self.nodes.get_mut(*tag).unwrap().children);
// Remove all useless children, and save them for later.
// The closure needs `&self` and the loop below needs `&mut self`, so we need to `collect`
// in to a temporary list.
let to_remove: Vec<_> =
children_of_node.drain_filter(|x| self.is_useless(*x, live)).collect();
// Put back the now-filtered vector.
self.nodes.get_mut(*tag).unwrap().children = children_of_node;
// Now, all that is left is unregistering the children saved in `to_remove`.
for idx in to_remove {
// Note: In the rest of this comment, "this node" refers to `idx`.
// This node has no more children (if there were any, they have already been removed).
// It is also unreachable as determined by the GC, so we can remove it everywhere.
// Due to the API of UniMap we must make sure to call
// `UniValMap::remove` for the key of this node on *all* maps that used it
// (which are `self.nodes` and every range of `self.rperms`)
// before we can safely apply `UniKeyMap::remove` to truly remove
// this tag from the `tag_mapping`.
let node = self.nodes.remove(idx).unwrap();
for (_perms_range, perms) in self.rperms.iter_mut_all() {
perms.remove(idx);
}
self.tag_mapping.remove(&node.tag);
}
// We are done, the parent can continue.
stack.pop();
continue;
}
self.tag_mapping.remove(&tag);
// The tag has been deleted, inform the caller
false
} else {
// The tag is still live or has live children, it must be kept
true
}
}
}

View File

@ -12,7 +12,7 @@
#![allow(dead_code)]
use std::hash::Hash;
use std::{hash::Hash, mem};
use rustc_data_structures::fx::FxHashMap;
@ -187,13 +187,16 @@ impl<V> UniValMap<V> {
self.data.get_mut(idx.idx as usize).and_then(Option::as_mut)
}
/// Delete any value associated with this index. Ok even if the index
/// has no associated value.
pub fn remove(&mut self, idx: UniIndex) {
/// Delete any value associated with this index.
/// Returns None if the value was not present, otherwise
/// returns the previously stored value.
pub fn remove(&mut self, idx: UniIndex) -> Option<V> {
if idx.idx as usize >= self.data.len() {
return;
return None;
}
self.data[idx.idx as usize] = None;
let mut res = None;
mem::swap(&mut res, &mut self.data[idx.idx as usize]);
res
}
}

View File

@ -1204,14 +1204,14 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
)?;
}
"freebsd" => {
this.write_int(ino, &this.project_field_named(&entry_place, "d_fileno")?)?;
// `d_off` only exists on FreeBSD 12+, but we support v11 as well.
// `libc` uses a build script to determine which version of the API to use,
// and cross-builds always end up using v11.
// To support both v11 and v12+, we dynamically check whether the field exists.
if this.projectable_has_field(&entry_place, "d_off") {
this.write_int(0, &this.project_field_named(&entry_place, "d_off")?)?;
}
#[rustfmt::skip]
this.write_int_fields_named(
&[
("d_fileno", ino.into()),
("d_off", 0),
],
&entry_place,
)?;
}
_ => unreachable!(),
}

View File

@ -15,6 +15,7 @@ mod aesni;
mod avx;
mod avx2;
mod bmi;
mod sha;
mod sse;
mod sse2;
mod sse3;
@ -105,6 +106,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sha") => {
return sha::EvalContextExt::emulate_x86_sha_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sse.") => {
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,

View File

@ -0,0 +1,221 @@
//! Implements sha256 SIMD instructions of x86 targets
//!
//! The functions that actually compute SHA256 were copied from [RustCrypto's sha256 module].
//!
//! [RustCrypto's sha256 module]: https://github.com/RustCrypto/hashes/blob/6be8466247e936c415d8aafb848697f39894a386/sha2/src/sha256/soft.rs
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;
use crate::*;
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_x86_sha_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();
this.expect_target_feature_for_intrinsic(link_name, "sha")?;
// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sha").unwrap();
fn read<'c>(this: &mut MiriInterpCx<'c>, reg: &MPlaceTy<'c>) -> InterpResult<'c, [u32; 4]> {
let mut res = [0; 4];
// We reverse the order because x86 is little endian but the copied implementation uses
// big endian.
for (i, dst) in res.iter_mut().rev().enumerate() {
let projected = &this.project_index(reg, i.try_into().unwrap())?;
*dst = this.read_scalar(projected)?.to_u32()?
}
Ok(res)
}
fn write<'c>(
this: &mut MiriInterpCx<'c>,
dest: &MPlaceTy<'c>,
val: [u32; 4],
) -> InterpResult<'c, ()> {
// We reverse the order because x86 is little endian but the copied implementation uses
// big endian.
for (i, part) in val.into_iter().rev().enumerate() {
let projected = &this.project_index(dest, i.try_into().unwrap())?;
this.write_scalar(Scalar::from_u32(part), projected)?;
}
Ok(())
}
match unprefixed_name {
// Used to implement the _mm_sha256rnds2_epu32 function.
"256rnds2" => {
let [a, b, k] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let (a_reg, a_len) = this.operand_to_simd(a)?;
let (b_reg, b_len) = this.operand_to_simd(b)?;
let (k_reg, k_len) = this.operand_to_simd(k)?;
let (dest, dest_len) = this.mplace_to_simd(dest)?;
assert_eq!(a_len, 4);
assert_eq!(b_len, 4);
assert_eq!(k_len, 4);
assert_eq!(dest_len, 4);
let a = read(this, &a_reg)?;
let b = read(this, &b_reg)?;
let k = read(this, &k_reg)?;
let result = sha256_digest_round_x2(a, b, k);
write(this, &dest, result)?;
}
// Used to implement the _mm_sha256msg1_epu32 function.
"256msg1" => {
let [a, b] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let (a_reg, a_len) = this.operand_to_simd(a)?;
let (b_reg, b_len) = this.operand_to_simd(b)?;
let (dest, dest_len) = this.mplace_to_simd(dest)?;
assert_eq!(a_len, 4);
assert_eq!(b_len, 4);
assert_eq!(dest_len, 4);
let a = read(this, &a_reg)?;
let b = read(this, &b_reg)?;
let result = sha256msg1(a, b);
write(this, &dest, result)?;
}
// Used to implement the _mm_sha256msg2_epu32 function.
"256msg2" => {
let [a, b] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let (a_reg, a_len) = this.operand_to_simd(a)?;
let (b_reg, b_len) = this.operand_to_simd(b)?;
let (dest, dest_len) = this.mplace_to_simd(dest)?;
assert_eq!(a_len, 4);
assert_eq!(b_len, 4);
assert_eq!(dest_len, 4);
let a = read(this, &a_reg)?;
let b = read(this, &b_reg)?;
let result = sha256msg2(a, b);
write(this, &dest, result)?;
}
_ => return Ok(EmulateItemResult::NotSupported),
}
Ok(EmulateItemResult::NeedsReturn)
}
}
#[inline(always)]
fn shr(v: [u32; 4], o: u32) -> [u32; 4] {
[v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o]
}
#[inline(always)]
fn shl(v: [u32; 4], o: u32) -> [u32; 4] {
[v[0] << o, v[1] << o, v[2] << o, v[3] << o]
}
#[inline(always)]
fn or(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
[a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]]
}
#[inline(always)]
fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
[a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]]
}
#[inline(always)]
fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
[
a[0].wrapping_add(b[0]),
a[1].wrapping_add(b[1]),
a[2].wrapping_add(b[2]),
a[3].wrapping_add(b[3]),
]
}
fn sha256load(v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
[v3[3], v2[0], v2[1], v2[2]]
}
fn sha256_digest_round_x2(cdgh: [u32; 4], abef: [u32; 4], wk: [u32; 4]) -> [u32; 4] {
macro_rules! big_sigma0 {
($a:expr) => {
($a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22))
};
}
macro_rules! big_sigma1 {
($a:expr) => {
($a.rotate_right(6) ^ $a.rotate_right(11) ^ $a.rotate_right(25))
};
}
macro_rules! bool3ary_202 {
($a:expr, $b:expr, $c:expr) => {
$c ^ ($a & ($b ^ $c))
};
} // Choose, MD5F, SHA1C
macro_rules! bool3ary_232 {
($a:expr, $b:expr, $c:expr) => {
($a & $b) ^ ($a & $c) ^ ($b & $c)
};
} // Majority, SHA1M
let [_, _, wk1, wk0] = wk;
let [a0, b0, e0, f0] = abef;
let [c0, d0, g0, h0] = cdgh;
// a round
let x0 =
big_sigma1!(e0).wrapping_add(bool3ary_202!(e0, f0, g0)).wrapping_add(wk0).wrapping_add(h0);
let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0));
let (a1, b1, c1, d1, e1, f1, g1, h1) =
(x0.wrapping_add(y0), a0, b0, c0, x0.wrapping_add(d0), e0, f0, g0);
// a round
let x1 =
big_sigma1!(e1).wrapping_add(bool3ary_202!(e1, f1, g1)).wrapping_add(wk1).wrapping_add(h1);
let y1 = big_sigma0!(a1).wrapping_add(bool3ary_232!(a1, b1, c1));
let (a2, b2, _, _, e2, f2, _, _) =
(x1.wrapping_add(y1), a1, b1, c1, x1.wrapping_add(d1), e1, f1, g1);
[a2, b2, e2, f2]
}
fn sha256msg1(v0: [u32; 4], v1: [u32; 4]) -> [u32; 4] {
// sigma 0 on vectors
#[inline]
fn sigma0x4(x: [u32; 4]) -> [u32; 4] {
let t1 = or(shr(x, 7), shl(x, 25));
let t2 = or(shr(x, 18), shl(x, 14));
let t3 = shr(x, 3);
xor(xor(t1, t2), t3)
}
add(v0, sigma0x4(sha256load(v0, v1)))
}
fn sha256msg2(v4: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
macro_rules! sigma1 {
($a:expr) => {
$a.rotate_right(17) ^ $a.rotate_right(19) ^ ($a >> 10)
};
}
let [x3, x2, x1, x0] = v4;
let [w15, w14, _, _] = v3;
let w16 = x0.wrapping_add(sigma1!(w14));
let w17 = x1.wrapping_add(sigma1!(w15));
let w18 = x2.wrapping_add(sigma1!(w16));
let w19 = x3.wrapping_add(sigma1!(w17));
[w19, w18, w17, w16]
}

View File

@ -119,9 +119,9 @@ dependencies = [
[[package]]
name = "libc"
version = "0.2.155"
version = "0.2.158"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c"
checksum = "d8adc4bb1803a324070e64a98ae98f38934d91957a99cfb3a43dcbc01bc56439"
[[package]]
name = "linux-raw-sys"

View File

@ -13,7 +13,5 @@ use std::alloc::{Allocator, Global, Layout, System};
fn main() {
let l = Layout::from_size_align(1, 1).unwrap();
let ptr = Global.allocate(l).unwrap().as_non_null_ptr();
unsafe {
System.deallocate(ptr, l);
}
unsafe { System.deallocate(ptr, l) };
}

View File

@ -12,7 +12,7 @@ LL | FREE();
note: inside `main`
--> $DIR/global_system_mixup.rs:LL:CC
|
LL | System.deallocate(ptr, l);
LL | unsafe { System.deallocate(ptr, l) };
| ^
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

View File

@ -0,0 +1,19 @@
//@revisions: stack tree
//@[tree]compile-flags: -Zmiri-tree-borrows
//@[tree]error-in-other-file: /deallocation .* is forbidden/
use std::alloc::{alloc, dealloc, Layout};
// `x` is strongly protected but covers zero bytes.
// Let's see if deallocating the allocation x points to is UB:
// in TB, it is UB, but in SB it is not.
fn test(_x: &mut (), ptr: *mut u8, l: Layout) {
unsafe { dealloc(ptr, l) };
}
fn main() {
let l = Layout::from_size_align(1, 1).unwrap();
let ptr = unsafe { alloc(l) };
unsafe { test(&mut *ptr.cast::<()>(), ptr, l) };
// In SB the test would pass if it weren't for this line.
unsafe { std::hint::unreachable_unchecked() }; //~[stack] ERROR: unreachable
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: entering unreachable code
--> $DIR/zero-sized-protected.rs:LL:CC
|
LL | unsafe { std::hint::unreachable_unchecked() };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ entering unreachable code
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/zero-sized-protected.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,36 @@
error: Undefined Behavior: deallocation through <TAG> (root of the allocation) at ALLOC[0x0] is forbidden
--> RUSTLIB/alloc/src/alloc.rs:LL:CC
|
LL | unsafe { __rust_dealloc(ptr, layout.size(), layout.align()) }
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ deallocation through <TAG> (root of the allocation) at ALLOC[0x0] is forbidden
|
= help: this indicates a potential bug in the program: it performed an invalid operation, but the Tree Borrows rules it violated are still experimental
= help: the allocation of the accessed tag <TAG> (root of the allocation) also contains the strongly protected tag <TAG>
= help: the strongly protected tag <TAG> disallows deallocations
help: the accessed tag <TAG> was created here
--> $DIR/zero-sized-protected.rs:LL:CC
|
LL | let ptr = unsafe { alloc(l) };
| ^^^^^^^^
help: the strongly protected tag <TAG> was created here, in the initial state Reserved
--> $DIR/zero-sized-protected.rs:LL:CC
|
LL | fn test(_x: &mut (), ptr: *mut u8, l: Layout) {
| ^^
= note: BACKTRACE (of the first span):
= note: inside `std::alloc::dealloc` at RUSTLIB/alloc/src/alloc.rs:LL:CC
note: inside `test`
--> $DIR/zero-sized-protected.rs:LL:CC
|
LL | unsafe { dealloc(ptr, l) };
| ^^^^^^^^^^^^^^^
note: inside `main`
--> $DIR/zero-sized-protected.rs:LL:CC
|
LL | unsafe { test(&mut *ptr.cast::<()>(), ptr, l) };
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -1,8 +1,7 @@
//@only-target-linux
#![feature(exposed_provenance)] // Needed for fn test_pointer()
#![feature(strict_provenance)]
use std::convert::TryInto;
use std::mem::MaybeUninit;
fn main() {
test_epoll_socketpair();
@ -17,7 +16,6 @@ fn main() {
test_no_notification_for_unregister_flag();
test_epoll_ctl_mod();
test_epoll_ctl_del();
test_pointer();
test_two_same_fd_in_same_epoll_instance();
test_epoll_wait_maxevent_zero();
test_socketpair_epollerr();
@ -261,24 +259,6 @@ fn test_epoll_eventfd() {
check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]);
}
fn test_pointer() {
// Create an epoll instance.
let epfd = unsafe { libc::epoll_create1(0) };
assert_ne!(epfd, -1);
// Create a socketpair instance.
let mut fds = [-1, -1];
let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) };
assert_eq!(res, 0);
// Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET
let data = MaybeUninit::<u64>::uninit().as_ptr();
let mut ev =
libc::epoll_event { events: EPOLL_IN_OUT_ET, u64: data.expose_provenance() as u64 };
let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) };
assert_eq!(res, 0);
}
// When read/write happened on one side of the socketpair, only the other side will be notified.
fn test_epoll_socketpair_both_sides() {
// Create an epoll instance.
@ -543,9 +523,9 @@ fn test_epoll_wait_maxevent_zero() {
// Create an epoll instance.
let epfd = unsafe { libc::epoll_create1(0) };
assert_ne!(epfd, -1);
// It is ok to use uninitialised pointer here because it will error out before the
// pointer actually get accessed.
let array_ptr = MaybeUninit::<libc::epoll_event>::uninit().as_mut_ptr();
// It is ok to use a dangling pointer here because it will error out before the
// pointer actually gets accessed.
let array_ptr = std::ptr::without_provenance_mut::<libc::epoll_event>(0x100);
let res = unsafe { libc::epoll_wait(epfd, array_ptr, 0, 0) };
let e = std::io::Error::last_os_error();
assert_eq!(e.raw_os_error(), Some(libc::EINVAL));

View File

@ -0,0 +1,270 @@
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
//@compile-flags: -C target-feature=+sha,+sse2,+ssse3,+sse4.1
#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;
macro_rules! rounds4 {
($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
let k = K32X4[$i];
let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32);
let t1 = _mm_add_epi32($rest, kv);
$cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
let t2 = _mm_shuffle_epi32(t1, 0x0E);
$abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
}};
}
macro_rules! schedule_rounds4 {
(
$abef:ident, $cdgh:ident,
$w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
$i: expr
) => {{
$w4 = schedule($w0, $w1, $w2, $w3);
rounds4!($abef, $cdgh, $w4, $i);
}};
}
fn main() {
assert!(is_x86_feature_detected!("sha"));
assert!(is_x86_feature_detected!("sse2"));
assert!(is_x86_feature_detected!("ssse3"));
assert!(is_x86_feature_detected!("sse4.1"));
unsafe {
test_sha256rnds2();
test_sha256msg1();
test_sha256msg2();
test_sha256();
}
}
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn test_sha256rnds2() {
let test_vectors = [
(
[0x3c6ef372, 0xa54ff53a, 0x1f83d9ab, 0x5be0cd19],
[0x6a09e667, 0xbb67ae85, 0x510e527f, 0x9b05688c],
[0x592340c6, 0x17386142, 0x91a0b7b1, 0x94ffa30c],
[0xeef39c6c, 0x4e7dfbc1, 0x467a98f3, 0xeb3d5616],
),
(
[0x6a09e667, 0xbb67ae85, 0x510e527f, 0x9b05688c],
[0xeef39c6c, 0x4e7dfbc1, 0x467a98f3, 0xeb3d5616],
[0x91a0b7b1, 0x94ffa30c, 0x592340c6, 0x17386142],
[0x7e7f3c9d, 0x78db9a20, 0xd82fe6ed, 0xaf1f2704],
),
(
[0xeef39c6c, 0x4e7dfbc1, 0x467a98f3, 0xeb3d5616],
[0x7e7f3c9d, 0x78db9a20, 0xd82fe6ed, 0xaf1f2704],
[0x1a89c3f6, 0xf3b6e817, 0x7a5a8511, 0x8bcc35cf],
[0xc9292f7e, 0x49137bd9, 0x7e5f9e08, 0xd10f9247],
),
];
for (cdgh, abef, wk, expected) in test_vectors {
let output_reg = _mm_sha256rnds2_epu32(set_arr(cdgh), set_arr(abef), set_arr(wk));
let mut output = [0u32; 4];
_mm_storeu_si128(output.as_mut_ptr().cast(), output_reg);
// The values are stored as little endian, so we need to reverse them
output.reverse();
assert_eq!(output, expected);
}
}
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn test_sha256msg1() {
let test_vectors = [
(
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x2da4b536, 0x77f29328, 0x541a4d59, 0x6afb680c],
),
(
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x2da4b536, 0x77f29328, 0x541a4d59, 0x6afb680c],
),
(
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0x2da4b536, 0x77f29328, 0x541a4d59, 0x6afb680c],
),
];
for (v0, v1, expected) in test_vectors {
let output_reg = _mm_sha256msg1_epu32(set_arr(v0), set_arr(v1));
let mut output = [0u32; 4];
_mm_storeu_si128(output.as_mut_ptr().cast(), output_reg);
// The values are stored as little endian, so we need to reverse them
output.reverse();
assert_eq!(output, expected);
}
}
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn test_sha256msg2() {
let test_vectors = [
(
[0x801a28aa, 0xe75ff849, 0xb591b2cc, 0x8b64db2c],
[0x6f6d6521, 0x61776573, 0x20697320, 0x52757374],
[0xe7c46c4e, 0x8ce92ccc, 0xd3c0f3ce, 0xe9745c78],
),
(
[0x171911ae, 0xe75ff849, 0xb591b2cc, 0x8b64db2c],
[0xe7c46c4e, 0x8ce92ccc, 0xd3c0f3ce, 0xe9745c78],
[0xc17c6ea3, 0xc4d10083, 0x712910cd, 0x3f41c8ce],
),
(
[0x6ce67e04, 0x5fb6ff76, 0xe1037a25, 0x3ebc5bda],
[0xc17c6ea3, 0xc4d10083, 0x712910cd, 0x3f41c8ce],
[0xf5ab4eff, 0x83d732a5, 0x9bb941af, 0xdf1d0a8c],
),
];
for (v4, v3, expected) in test_vectors {
let output_reg = _mm_sha256msg2_epu32(set_arr(v4), set_arr(v3));
let mut output = [0u32; 4];
_mm_storeu_si128(output.as_mut_ptr().cast(), output_reg);
// The values are stored as little endian, so we need to reverse them
output.reverse();
assert_eq!(output, expected);
}
}
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn set_arr(x: [u32; 4]) -> __m128i {
_mm_set_epi32(x[0] as i32, x[1] as i32, x[2] as i32, x[3] as i32)
}
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn test_sha256() {
use std::fmt::Write;
/// The initial state of the hash engine.
const INITIAL_STATE: [u32; 8] = [
0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab,
0x5be0cd19,
];
// We don't want to bother with hash finalization algorithm so we just feed constant data.
// This is the content that's being hashed - you can feed it to sha256sum and it'll output
// the same hash (beware of newlines though).
let first_block = *b"Rust is awesome!Rust is awesome!Rust is awesome!Rust is awesome!";
// sha256 is fianlized by appending 0x80, then zeros and finally the data lenght at the
// end.
let mut final_block = [0; 64];
final_block[0] = 0x80;
final_block[(64 - 8)..].copy_from_slice(&(8u64 * 64).to_be_bytes());
let mut state = INITIAL_STATE;
digest_blocks(&mut state, &[first_block, final_block]);
// We compare strings because it's easier to check the hex and the output of panic.
let mut hash = String::new();
for chunk in &state {
write!(hash, "{:08x}", chunk).expect("writing to String doesn't fail");
}
assert_eq!(hash, "1b2293d21b17a0cb0c18737307c37333dea775eded18cefed45e50389f9f8184");
}
// Almost full SHA256 implementation copied from RustCrypto's sha2 crate
// https://github.com/RustCrypto/hashes/blob/6be8466247e936c415d8aafb848697f39894a386/sha2/src/sha256/x86.rs
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
let t1 = _mm_sha256msg1_epu32(v0, v1);
let t2 = _mm_alignr_epi8(v3, v2, 4);
let t3 = _mm_add_epi32(t1, t2);
_mm_sha256msg2_epu32(t3, v3)
}
// we use unaligned loads with `__m128i` pointers
#[allow(clippy::cast_ptr_alignment)]
#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
#[allow(non_snake_case)]
let MASK: __m128i =
_mm_set_epi64x(0x0C0D_0E0F_0809_0A0Bu64 as i64, 0x0405_0607_0001_0203u64 as i64);
let state_ptr: *const __m128i = state.as_ptr().cast();
let dcba = _mm_loadu_si128(state_ptr.add(0));
let efgh = _mm_loadu_si128(state_ptr.add(1));
let cdab = _mm_shuffle_epi32(dcba, 0xB1);
let efgh = _mm_shuffle_epi32(efgh, 0x1B);
let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
for block in blocks {
let abef_save = abef;
let cdgh_save = cdgh;
let block_ptr: *const __m128i = block.as_ptr().cast();
let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(0)), MASK);
let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(1)), MASK);
let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(2)), MASK);
let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.add(3)), MASK);
let mut w4;
rounds4!(abef, cdgh, w0, 0);
rounds4!(abef, cdgh, w1, 1);
rounds4!(abef, cdgh, w2, 2);
rounds4!(abef, cdgh, w3, 3);
schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
abef = _mm_add_epi32(abef, abef_save);
cdgh = _mm_add_epi32(cdgh, cdgh_save);
}
let feba = _mm_shuffle_epi32(abef, 0x1B);
let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
let hgef = _mm_alignr_epi8(dchg, feba, 8);
let state_ptr_mut: *mut __m128i = state.as_mut_ptr().cast();
_mm_storeu_si128(state_ptr_mut.add(0), dcba);
_mm_storeu_si128(state_ptr_mut.add(1), hgef);
}
/// Swapped round constants for SHA-256 family of digests
pub static K32X4: [[u32; 4]; 16] = {
let mut res = [[0u32; 4]; 16];
let mut i = 0;
while i < 16 {
res[i] = [K32[4 * i + 3], K32[4 * i + 2], K32[4 * i + 1], K32[4 * i]];
i += 1;
}
res
};
/// Round constants for SHA-256 family of digests
pub static K32: [u32; 64] = [
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2,
];