std::to_bytes: Delimit sequences &[A] and ~str when hashing

Address issue #5257, for example these values all had the same hash value:

	("aaa", "bbb", "ccc")
	("aaab", "bb", "ccc")
	("aaabbb", "", "ccc")

IterBytes for &[A] now includes the length, before calling iter_bytes on
each element.

IterBytes for &str is now terminated by a byte that does not appear in
UTF-8. This way only one more byte is processed when hashing strings.
This commit is contained in:
blake2-ppc 2013-08-15 05:23:33 +02:00
parent 0a238288d3
commit 6066118b29
3 changed files with 47 additions and 42 deletions

View File

@ -409,6 +409,14 @@ mod tests {
use uint;
// Hash just the bytes of the slice, without length prefix
struct Bytes<'self>(&'self [u8]);
impl<'self> IterBytes for Bytes<'self> {
fn iter_bytes(&self, _lsb0: bool, f: &fn(&[u8]) -> bool) -> bool {
f(**self)
}
}
#[test]
fn test_siphash() {
let vecs : [[u8, ..8], ..64] = [
@ -496,7 +504,7 @@ mod tests {
while t < 64 {
debug!("siphash test %?", t);
let vec = u8to64_le!(vecs[t], 0);
let out = buf.hash_keyed(k0, k1);
let out = Bytes(buf.as_slice()).hash_keyed(k0, k1);
debug!("got %?, expected %?", out, vec);
assert_eq!(vec, out);

View File

@ -376,7 +376,6 @@ static ASCII_UPPER_MAP: &'static [u8] = &[
#[cfg(test)]
mod tests {
use super::*;
use to_bytes::ToBytes;
use str::from_char;
macro_rules! v2ascii (
@ -445,7 +444,6 @@ mod tests {
#[test]
fn test_ascii_to_bytes() {
assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
}

View File

@ -15,37 +15,43 @@ The `ToBytes` and `IterBytes` traits
*/
use cast;
use container::Container;
use io;
use io::Writer;
use iterator::Iterator;
use option::{None, Option, Some};
use str::StrSlice;
use vec::ImmutableVector;
use str::{Str, StrSlice};
use vec::{Vector, ImmutableVector};
pub type Cb<'self> = &'self fn(buf: &[u8]) -> bool;
/**
* A trait to implement in order to make a type hashable;
* This works in combination with the trait `Hash::Hash`, and
* may in the future be merged with that trait or otherwise
* modified when default methods and trait inheritance are
* completed.
*/
///
/// A trait to implement in order to make a type hashable;
/// This works in combination with the trait `std::hash::Hash`, and
/// may in the future be merged with that trait or otherwise
/// modified when default methods and trait inheritance are
/// completed.
///
/// IterBytes should be implemented so that the extent of the
/// produced byte stream can be discovered, given the original
/// type.
/// For example, the IterBytes implementation for vectors emits
/// its length first, and enums should emit their discriminant.
///
pub trait IterBytes {
/**
* Call the provided callback `f` one or more times with
* byte-slices that should be used when computing a hash
* value or otherwise "flattening" the structure into
* a sequence of bytes. The `lsb0` parameter conveys
* whether the caller is asking for little-endian bytes
* (`true`) or big-endian (`false`); this should only be
* relevant in implementations that represent a single
* multi-byte datum such as a 32 bit integer or 64 bit
* floating-point value. It can be safely ignored for
* larger structured types as they are usually processed
* left-to-right in declaration order, regardless of
* underlying memory endianness.
*/
/// Call the provided callback `f` one or more times with
/// byte-slices that should be used when computing a hash
/// value or otherwise "flattening" the structure into
/// a sequence of bytes. The `lsb0` parameter conveys
/// whether the caller is asking for little-endian bytes
/// (`true`) or big-endian (`false`); this should only be
/// relevant in implementations that represent a single
/// multi-byte datum such as a 32 bit integer or 64 bit
/// floating-point value. It can be safely ignored for
/// larger structured types as they are usually processed
/// left-to-right in declaration order, regardless of
/// underlying memory endianness.
///
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool;
}
@ -224,6 +230,7 @@ impl IterBytes for f64 {
impl<'self,A:IterBytes> IterBytes for &'self [A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.len().iter_bytes(lsb0, |b| f(b)) &&
self.iter().advance(|elt| elt.iter_bytes(lsb0, |b| f(b)))
}
}
@ -251,47 +258,39 @@ impl<A:IterBytes,B:IterBytes,C:IterBytes> IterBytes for (A,B,C) {
}
}
// Move this to vec, probably.
fn borrow<'x,A>(a: &'x [A]) -> &'x [A] {
a
}
impl<A:IterBytes> IterBytes for ~[A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
borrow(*self).iter_bytes(lsb0, f)
self.as_slice().iter_bytes(lsb0, f)
}
}
impl<A:IterBytes> IterBytes for @[A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
borrow(*self).iter_bytes(lsb0, f)
self.as_slice().iter_bytes(lsb0, f)
}
}
impl<'self> IterBytes for &'self str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
f(self.as_bytes())
// Terminate the string with a byte that does not appear in UTF-8
f(self.as_bytes()) && f([0xFF])
}
}
impl IterBytes for ~str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.as_slice().iter_bytes(lsb0, f)
}
}
impl IterBytes for @str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.as_slice().iter_bytes(lsb0, f)
}
}