mirror of
https://github.com/rust-lang/rust.git
synced 2025-01-26 14:43:24 +00:00
Auto merge of #93671 - Kobzol:stable-hash-const, r=the8472
Use const generics in SipHasher128's short_write This was proposed by `@michaelwoerister` [here](https://github.com/rust-lang/rust/pull/93615#discussion_r799485554). A few comments: 1) I tried to pass `&[u8; LEN]` instead of `[u8; LEN]`. Locally, it resulted in small icount regressions (about 0.5 %). When passing by value, there were no regressions (and no improvements). 2) I wonder if we should use `to_ne_bytes()` in `SipHasher128` to keep it generic and only use `to_le_bytes()` in `StableHasher`. However, currently `SipHasher128` is only used in `StableHasher` and the `short_write` method was private, so I couldn't use it directly from `StableHasher`. Using `to_le()` in the `StableHasher` was breaking this abstraction boundary before slightly. ```rust debug_assert!(LEN <= 8); ``` This could be done at compile time, but actually I think that now we can remove this assert altogether. r? `@the8472`
This commit is contained in:
commit
fc323035ac
@ -202,28 +202,26 @@ impl SipHasher128 {
|
||||
hasher
|
||||
}
|
||||
|
||||
// A specialized write function for values with size <= 8.
|
||||
#[inline]
|
||||
fn short_write<T>(&mut self, x: T) {
|
||||
let size = mem::size_of::<T>();
|
||||
pub fn short_write<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
|
||||
let nbuf = self.nbuf;
|
||||
debug_assert!(size <= 8);
|
||||
debug_assert!(LEN <= 8);
|
||||
debug_assert!(nbuf < BUFFER_SIZE);
|
||||
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
|
||||
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
|
||||
|
||||
if nbuf + size < BUFFER_SIZE {
|
||||
if nbuf + LEN < BUFFER_SIZE {
|
||||
unsafe {
|
||||
// The memcpy call is optimized away because the size is known.
|
||||
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
|
||||
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
|
||||
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
|
||||
}
|
||||
|
||||
self.nbuf = nbuf + size;
|
||||
self.nbuf = nbuf + LEN;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
unsafe { self.short_write_process_buffer(x) }
|
||||
unsafe { self.short_write_process_buffer(bytes) }
|
||||
}
|
||||
|
||||
// A specialized write function for values with size <= 8 that should only
|
||||
@ -233,18 +231,17 @@ impl SipHasher128 {
|
||||
// `self.nbuf` must cause `self.buf` to become fully initialized (and not
|
||||
// overflow) if it wasn't already.
|
||||
#[inline(never)]
|
||||
unsafe fn short_write_process_buffer<T>(&mut self, x: T) {
|
||||
let size = mem::size_of::<T>();
|
||||
unsafe fn short_write_process_buffer<const LEN: usize>(&mut self, bytes: [u8; LEN]) {
|
||||
let nbuf = self.nbuf;
|
||||
debug_assert!(size <= 8);
|
||||
debug_assert!(LEN <= 8);
|
||||
debug_assert!(nbuf < BUFFER_SIZE);
|
||||
debug_assert!(nbuf + size >= BUFFER_SIZE);
|
||||
debug_assert!(nbuf + size < BUFFER_WITH_SPILL_SIZE);
|
||||
debug_assert!(nbuf + LEN >= BUFFER_SIZE);
|
||||
debug_assert!(nbuf + LEN < BUFFER_WITH_SPILL_SIZE);
|
||||
|
||||
// Copy first part of input into end of buffer, possibly into spill
|
||||
// element. The memcpy call is optimized away because the size is known.
|
||||
let dst = (self.buf.as_mut_ptr() as *mut u8).add(nbuf);
|
||||
ptr::copy_nonoverlapping(&x as *const _ as *const u8, dst, size);
|
||||
ptr::copy_nonoverlapping(bytes.as_ptr(), dst, LEN);
|
||||
|
||||
// Process buffer.
|
||||
for i in 0..BUFFER_CAPACITY {
|
||||
@ -254,17 +251,17 @@ impl SipHasher128 {
|
||||
self.state.v0 ^= elem;
|
||||
}
|
||||
|
||||
// Copy remaining input into start of buffer by copying size - 1
|
||||
// elements from spill (at most size - 1 bytes could have overflowed
|
||||
// Copy remaining input into start of buffer by copying LEN - 1
|
||||
// elements from spill (at most LEN - 1 bytes could have overflowed
|
||||
// into the spill). The memcpy call is optimized away because the size
|
||||
// is known. And the whole copy is optimized away for size == 1.
|
||||
// is known. And the whole copy is optimized away for LEN == 1.
|
||||
let src = self.buf.get_unchecked(BUFFER_SPILL_INDEX) as *const _ as *const u8;
|
||||
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, size - 1);
|
||||
ptr::copy_nonoverlapping(src, self.buf.as_mut_ptr() as *mut u8, LEN - 1);
|
||||
|
||||
// This function should only be called when the write fills the buffer.
|
||||
// Therefore, when size == 1, the new `self.nbuf` must be zero. The size
|
||||
// is statically known, so the branch is optimized away.
|
||||
self.nbuf = if size == 1 { 0 } else { nbuf + size - BUFFER_SIZE };
|
||||
// Therefore, when LEN == 1, the new `self.nbuf` must be zero.
|
||||
// LEN is statically known, so the branch is optimized away.
|
||||
self.nbuf = if LEN == 1 { 0 } else { nbuf + LEN - BUFFER_SIZE };
|
||||
self.processed += BUFFER_SIZE;
|
||||
}
|
||||
|
||||
@ -412,52 +409,52 @@ impl SipHasher128 {
|
||||
impl Hasher for SipHasher128 {
|
||||
#[inline]
|
||||
fn write_u8(&mut self, i: u8) {
|
||||
self.short_write(i);
|
||||
self.short_write(i.to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u16(&mut self, i: u16) {
|
||||
self.short_write(i);
|
||||
self.short_write(i.to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u32(&mut self, i: u32) {
|
||||
self.short_write(i);
|
||||
self.short_write(i.to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u64(&mut self, i: u64) {
|
||||
self.short_write(i);
|
||||
self.short_write(i.to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_usize(&mut self, i: usize) {
|
||||
self.short_write(i);
|
||||
self.short_write(i.to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i8(&mut self, i: i8) {
|
||||
self.short_write(i as u8);
|
||||
self.short_write((i as u8).to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i16(&mut self, i: i16) {
|
||||
self.short_write(i as u16);
|
||||
self.short_write((i as u16).to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i32(&mut self, i: i32) {
|
||||
self.short_write(i as u32);
|
||||
self.short_write((i as u32).to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i64(&mut self, i: i64) {
|
||||
self.short_write(i as u64);
|
||||
self.short_write((i as u64).to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_isize(&mut self, i: isize) {
|
||||
self.short_write(i as usize);
|
||||
self.short_write((i as usize).to_ne_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -80,22 +80,22 @@ impl Hasher for StableHasher {
|
||||
|
||||
#[inline]
|
||||
fn write_u16(&mut self, i: u16) {
|
||||
self.state.write_u16(i.to_le());
|
||||
self.state.short_write(i.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u32(&mut self, i: u32) {
|
||||
self.state.write_u32(i.to_le());
|
||||
self.state.short_write(i.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u64(&mut self, i: u64) {
|
||||
self.state.write_u64(i.to_le());
|
||||
self.state.short_write(i.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_u128(&mut self, i: u128) {
|
||||
self.state.write_u128(i.to_le());
|
||||
self.state.write(&i.to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -103,7 +103,7 @@ impl Hasher for StableHasher {
|
||||
// Always treat usize as u64 so we get the same results on 32 and 64 bit
|
||||
// platforms. This is important for symbol hashes when cross compiling,
|
||||
// for example.
|
||||
self.state.write_u64((i as u64).to_le());
|
||||
self.state.short_write((i as u64).to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -113,22 +113,22 @@ impl Hasher for StableHasher {
|
||||
|
||||
#[inline]
|
||||
fn write_i16(&mut self, i: i16) {
|
||||
self.state.write_i16(i.to_le());
|
||||
self.state.short_write((i as u16).to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i32(&mut self, i: i32) {
|
||||
self.state.write_i32(i.to_le());
|
||||
self.state.short_write((i as u32).to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i64(&mut self, i: i64) {
|
||||
self.state.write_i64(i.to_le());
|
||||
self.state.short_write((i as u64).to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn write_i128(&mut self, i: i128) {
|
||||
self.state.write_i128(i.to_le());
|
||||
self.state.write(&(i as u128).to_le_bytes());
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@ -144,7 +144,7 @@ impl Hasher for StableHasher {
|
||||
#[inline(never)]
|
||||
fn hash_value(state: &mut SipHasher128, value: u64) {
|
||||
state.write_u8(0xFF);
|
||||
state.write_u64(value.to_le());
|
||||
state.short_write(value.to_le_bytes());
|
||||
}
|
||||
|
||||
// `isize` values often seem to have a small (positive) numeric value in practice.
|
||||
@ -161,10 +161,6 @@ impl Hasher for StableHasher {
|
||||
// 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
|
||||
// `isize`s that fit within a different amount of bytes, they should always produce a different
|
||||
// byte stream for the hasher.
|
||||
//
|
||||
// To ensure that this optimization hashes the exact same bytes on both little-endian and
|
||||
// big-endian architectures, we compare the value with 0xFF before we convert the number
|
||||
// into a unified representation (little-endian).
|
||||
if value < 0xFF {
|
||||
self.state.write_u8(value as u8);
|
||||
} else {
|
||||
|
Loading…
Reference in New Issue
Block a user