mirror of
https://github.com/rust-lang/rust.git
synced 2025-05-14 02:49:40 +00:00

Add a dedicated length-prefixing method to `Hasher` This accomplishes two main goals: - Make it clear who is responsible for prefix-freedom, including how they should do it - Make it feasible for a `Hasher` that *doesn't* care about Hash-DoS resistance to get better performance by not hashing lengths This does not change rustc-hash, since that's in an external crate, but that could potentially use it in future. Fixes #94026 r? rust-lang/libs --- The core of this change is the following two new methods on `Hasher`: ```rust pub trait Hasher { /// Writes a length prefix into this hasher, as part of being prefix-free. /// /// If you're implementing [`Hash`] for a custom collection, call this before /// writing its contents to this `Hasher`. That way /// `(collection![1, 2, 3], collection![4, 5])` and /// `(collection![1, 2], collection![3, 4, 5])` will provide different /// sequences of values to the `Hasher` /// /// The `impl<T> Hash for [T]` includes a call to this method, so if you're /// hashing a slice (or array or vector) via its `Hash::hash` method, /// you should **not** call this yourself. /// /// This method is only for providing domain separation. If you want to /// hash a `usize` that represents part of the *data*, then it's important /// that you pass it to [`Hasher::write_usize`] instead of to this method. /// /// # Examples /// /// ``` /// #![feature(hasher_prefixfree_extras)] /// # // Stubs to make the `impl` below pass the compiler /// # struct MyCollection<T>(Option<T>); /// # impl<T> MyCollection<T> { /// # fn len(&self) -> usize { todo!() } /// # } /// # impl<'a, T> IntoIterator for &'a MyCollection<T> { /// # type Item = T; /// # type IntoIter = std::iter::Empty<T>; /// # fn into_iter(self) -> Self::IntoIter { todo!() } /// # } /// /// use std:#️⃣:{Hash, Hasher}; /// impl<T: Hash> Hash for MyCollection<T> { /// fn hash<H: Hasher>(&self, state: &mut H) { /// state.write_length_prefix(self.len()); /// for elt in self { /// elt.hash(state); /// } /// } /// } /// ``` /// /// # Note to Implementers /// /// If you've decided that your `Hasher` is willing to be susceptible to /// Hash-DoS attacks, then you might consider skipping hashing some or all /// of the `len` provided in the name of increased performance. #[inline] #[unstable(feature = "hasher_prefixfree_extras", issue = "88888888")] fn write_length_prefix(&mut self, len: usize) { self.write_usize(len); } /// Writes a single `str` into this hasher. /// /// If you're implementing [`Hash`], you generally do not need to call this, /// as the `impl Hash for str` does, so you can just use that. /// /// This includes the domain separator for prefix-freedom, so you should /// **not** call `Self::write_length_prefix` before calling this. /// /// # Note to Implementers /// /// The default implementation of this method includes a call to /// [`Self::write_length_prefix`], so if your implementation of `Hasher` /// doesn't care about prefix-freedom and you've thus overridden /// that method to do nothing, there's no need to override this one. /// /// This method is available to be overridden separately from the others /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which /// can be used to provide prefix-freedom cheaper than hashing a length. /// /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating /// them into a buffer), then you can hash the bytes of the `str` followed /// by a single `0xFF` byte. /// /// If your `Hasher` works in chunks, you can also do this by being careful /// about how you pad partial chunks. If the chunks are padded with `0x00` /// bytes then just hashing an extra `0xFF` byte doesn't necessarily /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash /// the same sequence of chunks. But if you pad with `0xFF` bytes instead, /// ensuring at least one padding byte, then it can often provide /// prefix-freedom cheaper than hashing the length would. #[inline] #[unstable(feature = "hasher_prefixfree_extras", issue = "88888888")] fn write_str(&mut self, s: &str) { self.write_length_prefix(s.len()); self.write(s.as_bytes()); } } ``` With updates to the `Hash` implementations for slices and containers to call `write_length_prefix` instead of `write_usize`. `write_str` defaults to using `write_length_prefix` since, as was pointed out in the issue, the `write_u8(0xFF)` approach is insufficient for hashers that work in chunks, as those would hash `"a\u{0}"` and `"a"` to the same thing. But since `SipHash` works byte-wise (there's an internal buffer to accumulate bytes until a full chunk is available) it overrides `write_str` to continue to use the add-non-UTF-8-byte approach. --- Compatibility: Because the default implementation of `write_length_prefix` calls `write_usize`, the changed hash implementation for slices will do the same thing the old one did on existing `Hasher`s.
161 lines
3.6 KiB
Rust
161 lines
3.6 KiB
Rust
//! Various data structures used by the Rust compiler. The intention
|
|
//! is that code in here should be not be *specific* to rustc, so that
|
|
//! it can be easily unit tested and so forth.
|
|
//!
|
|
//! # Note
|
|
//!
|
|
//! This API is completely unstable and subject to change.
|
|
|
|
#![doc(html_root_url = "https://doc.rust-lang.org/nightly/nightly-rustc/")]
|
|
#![feature(array_windows)]
|
|
#![feature(associated_type_bounds)]
|
|
#![feature(auto_traits)]
|
|
#![feature(control_flow_enum)]
|
|
#![feature(core_intrinsics)]
|
|
#![feature(extend_one)]
|
|
#![feature(generator_trait)]
|
|
#![feature(generators)]
|
|
#![feature(let_else)]
|
|
#![feature(hash_raw_entry)]
|
|
#![feature(hasher_prefixfree_extras)]
|
|
#![feature(maybe_uninit_uninit_array)]
|
|
#![feature(min_specialization)]
|
|
#![feature(never_type)]
|
|
#![feature(type_alias_impl_trait)]
|
|
#![feature(new_uninit)]
|
|
#![feature(once_cell)]
|
|
#![feature(rustc_attrs)]
|
|
#![feature(test)]
|
|
#![feature(thread_id_value)]
|
|
#![feature(vec_into_raw_parts)]
|
|
#![allow(rustc::default_hash_types)]
|
|
#![allow(rustc::potential_query_instability)]
|
|
|
|
#[macro_use]
|
|
extern crate tracing;
|
|
#[macro_use]
|
|
extern crate cfg_if;
|
|
#[macro_use]
|
|
extern crate rustc_macros;
|
|
|
|
pub use rustc_index::static_assert_size;
|
|
|
|
#[inline(never)]
|
|
#[cold]
|
|
pub fn cold_path<F: FnOnce() -> R, R>(f: F) -> R {
|
|
f()
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! likely {
|
|
($e:expr) => {
|
|
match $e {
|
|
#[allow(unused_unsafe)]
|
|
e => unsafe { std::intrinsics::likely(e) },
|
|
}
|
|
};
|
|
}
|
|
|
|
#[macro_export]
|
|
macro_rules! unlikely {
|
|
($e:expr) => {
|
|
match $e {
|
|
#[allow(unused_unsafe)]
|
|
e => unsafe { std::intrinsics::unlikely(e) },
|
|
}
|
|
};
|
|
}
|
|
|
|
pub mod base_n;
|
|
pub mod binary_search_util;
|
|
pub mod captures;
|
|
pub mod flock;
|
|
pub mod functor;
|
|
pub mod fx;
|
|
pub mod graph;
|
|
pub mod intern;
|
|
pub mod jobserver;
|
|
pub mod macros;
|
|
pub mod map_in_place;
|
|
pub mod obligation_forest;
|
|
pub mod owning_ref;
|
|
pub mod sip128;
|
|
pub mod small_c_str;
|
|
pub mod small_str;
|
|
pub mod snapshot_map;
|
|
pub mod stable_map;
|
|
pub mod svh;
|
|
pub use ena::snapshot_vec;
|
|
pub mod memmap;
|
|
pub mod sorted_map;
|
|
pub mod stable_set;
|
|
#[macro_use]
|
|
pub mod stable_hasher;
|
|
mod atomic_ref;
|
|
pub mod fingerprint;
|
|
pub mod profiling;
|
|
pub mod sharded;
|
|
pub mod stack;
|
|
pub mod sync;
|
|
pub mod thin_vec;
|
|
pub mod tiny_list;
|
|
pub mod transitive_relation;
|
|
pub mod vec_linked_list;
|
|
pub mod vec_map;
|
|
pub mod work_queue;
|
|
pub use atomic_ref::AtomicRef;
|
|
pub mod frozen;
|
|
pub mod sso;
|
|
pub mod steal;
|
|
pub mod tagged_ptr;
|
|
pub mod temp_dir;
|
|
pub mod unhash;
|
|
|
|
pub use ena::undo_log;
|
|
pub use ena::unify;
|
|
|
|
use std::ops::{Generator, GeneratorState};
|
|
use std::pin::Pin;
|
|
|
|
pub struct OnDrop<F: Fn()>(pub F);
|
|
|
|
impl<F: Fn()> OnDrop<F> {
|
|
/// Forgets the function which prevents it from running.
|
|
/// Ensure that the function owns no memory, otherwise it will be leaked.
|
|
#[inline]
|
|
pub fn disable(self) {
|
|
std::mem::forget(self);
|
|
}
|
|
}
|
|
|
|
impl<F: Fn()> Drop for OnDrop<F> {
|
|
#[inline]
|
|
fn drop(&mut self) {
|
|
(self.0)();
|
|
}
|
|
}
|
|
|
|
struct IterFromGenerator<G>(G);
|
|
|
|
impl<G: Generator<Return = ()> + Unpin> Iterator for IterFromGenerator<G> {
|
|
type Item = G::Yield;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match Pin::new(&mut self.0).resume(()) {
|
|
GeneratorState::Yielded(n) => Some(n),
|
|
GeneratorState::Complete(_) => None,
|
|
}
|
|
}
|
|
}
|
|
|
|
/// An adapter for turning a generator closure into an iterator, similar to `iter::from_fn`.
|
|
pub fn iter_from_generator<G: Generator<Return = ()> + Unpin>(
|
|
generator: G,
|
|
) -> impl Iterator<Item = G::Yield> {
|
|
IterFromGenerator(generator)
|
|
}
|
|
|
|
// See comments in src/librustc_middle/lib.rs
|
|
#[doc(hidden)]
|
|
pub fn __noop_fix_for_27438() {}
|