mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-13 15:33:53 +00:00
auto merge of #11497 : huonw/rust/trie-internal-iter, r=alexcrichton
This stores the stack of iterators inline (we have a maximum depth with `uint` keys), and then uses direct pointer offsetting to manipulate it, in a blazing fast way: Before: bench_iter_large ... bench: 43187 ns/iter (+/- 3082) bench_iter_small ... bench: 618 ns/iter (+/- 288) After: bench_iter_large ... bench: 13497 ns/iter (+/- 1575) bench_iter_small ... bench: 220 ns/iter (+/- 91) Also, removes `.each_{key,value}_reverse` as an offering to placate the gods of external iterators for my heinous sin of attempting to add new internal ones (in a previous version of this PR).
This commit is contained in:
commit
f4498c71e2
@ -11,14 +11,17 @@
|
|||||||
//! Ordered containers with integer keys, implemented as radix tries (`TrieSet` and `TrieMap` types)
|
//! Ordered containers with integer keys, implemented as radix tries (`TrieSet` and `TrieMap` types)
|
||||||
|
|
||||||
use prelude::*;
|
use prelude::*;
|
||||||
|
use mem;
|
||||||
use uint;
|
use uint;
|
||||||
use util::replace;
|
use util::replace;
|
||||||
|
use unstable::intrinsics::init;
|
||||||
use vec;
|
use vec;
|
||||||
|
|
||||||
// FIXME: #5244: need to manually update the TrieNode constructor
|
// FIXME: #5244: need to manually update the TrieNode constructor
|
||||||
static SHIFT: uint = 4;
|
static SHIFT: uint = 4;
|
||||||
static SIZE: uint = 1 << SHIFT;
|
static SIZE: uint = 1 << SHIFT;
|
||||||
static MASK: uint = SIZE - 1;
|
static MASK: uint = SIZE - 1;
|
||||||
|
static NUM_CHUNKS: uint = uint::bits / SHIFT;
|
||||||
|
|
||||||
enum Child<T> {
|
enum Child<T> {
|
||||||
Internal(~TrieNode<T>),
|
Internal(~TrieNode<T>),
|
||||||
@ -111,35 +114,27 @@ impl<T> TrieMap<T> {
|
|||||||
self.root.each_reverse(f)
|
self.root.each_reverse(f)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Visit all keys in reverse order
|
|
||||||
#[inline]
|
|
||||||
pub fn each_key_reverse(&self, f: |&uint| -> bool) -> bool {
|
|
||||||
self.each_reverse(|k, _| f(k))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Visit all values in reverse order
|
|
||||||
#[inline]
|
|
||||||
pub fn each_value_reverse(&self, f: |&T| -> bool) -> bool {
|
|
||||||
self.each_reverse(|_, v| f(v))
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get an iterator over the key-value pairs in the map
|
/// Get an iterator over the key-value pairs in the map
|
||||||
pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, T> {
|
pub fn iter<'a>(&'a self) -> TrieMapIterator<'a, T> {
|
||||||
TrieMapIterator {
|
let mut iter = unsafe {TrieMapIterator::new()};
|
||||||
stack: ~[self.root.children.iter()],
|
iter.stack[0] = self.root.children.iter();
|
||||||
remaining_min: self.length,
|
iter.length = 1;
|
||||||
remaining_max: self.length
|
iter.remaining_min = self.length;
|
||||||
}
|
iter.remaining_max = self.length;
|
||||||
|
|
||||||
|
iter
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an iterator over the key-value pairs in the map, with the
|
/// Get an iterator over the key-value pairs in the map, with the
|
||||||
/// ability to mutate the values.
|
/// ability to mutate the values.
|
||||||
pub fn mut_iter<'a>(&'a mut self) -> TrieMapMutIterator<'a, T> {
|
pub fn mut_iter<'a>(&'a mut self) -> TrieMapMutIterator<'a, T> {
|
||||||
TrieMapMutIterator {
|
let mut iter = unsafe {TrieMapMutIterator::new()};
|
||||||
stack: ~[self.root.children.mut_iter()],
|
iter.stack[0] = self.root.children.mut_iter();
|
||||||
remaining_min: self.length,
|
iter.length = 1;
|
||||||
remaining_max: self.length
|
iter.remaining_min = self.length;
|
||||||
}
|
iter.remaining_max = self.length;
|
||||||
|
|
||||||
|
iter
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -188,16 +183,16 @@ macro_rules! bound {
|
|||||||
|
|
||||||
let key = $key;
|
let key = $key;
|
||||||
|
|
||||||
let mut idx = 0;
|
let mut it = unsafe {$iterator_name::new()};
|
||||||
let mut it = $iterator_name {
|
// everything else is zero'd, as we want.
|
||||||
stack: ~[],
|
it.remaining_max = this.length;
|
||||||
remaining_min: 0,
|
|
||||||
remaining_max: this.length
|
|
||||||
};
|
|
||||||
// this addr is necessary for the `Internal` pattern.
|
// this addr is necessary for the `Internal` pattern.
|
||||||
addr!(loop {
|
addr!(loop {
|
||||||
let children = unsafe {addr!(& $($mut_)* (*node).children)};
|
let children = unsafe {addr!(& $($mut_)* (*node).children)};
|
||||||
let child_id = chunk(key, idx);
|
// it.length is the current depth in the iterator and the
|
||||||
|
// current depth through the `uint` key we've traversed.
|
||||||
|
let child_id = chunk(key, it.length);
|
||||||
let (slice_idx, ret) = match children[child_id] {
|
let (slice_idx, ret) = match children[child_id] {
|
||||||
Internal(ref $($mut_)* n) => {
|
Internal(ref $($mut_)* n) => {
|
||||||
node = addr!(& $($mut_)* **n as * $($mut_)* TrieNode<T>);
|
node = addr!(& $($mut_)* **n as * $($mut_)* TrieNode<T>);
|
||||||
@ -214,9 +209,10 @@ macro_rules! bound {
|
|||||||
(child_id + 1, true)
|
(child_id + 1, true)
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
it.stack.push(children.$slice_from(slice_idx).$iter());
|
// push to the stack.
|
||||||
|
it.stack[it.length] = children.$slice_from(slice_idx).$iter();
|
||||||
|
it.length += 1;
|
||||||
if ret { return it }
|
if ret { return it }
|
||||||
idx += 1;
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -328,7 +324,7 @@ impl TrieSet {
|
|||||||
/// Visit all values in reverse order
|
/// Visit all values in reverse order
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn each_reverse(&self, f: |&uint| -> bool) -> bool {
|
pub fn each_reverse(&self, f: |&uint| -> bool) -> bool {
|
||||||
self.map.each_key_reverse(f)
|
self.map.each_reverse(|k, _| f(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get an iterator over the values in the set
|
/// Get an iterator over the values in the set
|
||||||
@ -479,7 +475,8 @@ fn remove<T>(count: &mut uint, child: &mut Child<T>, key: uint,
|
|||||||
|
|
||||||
/// Forward iterator over a map
|
/// Forward iterator over a map
|
||||||
pub struct TrieMapIterator<'a, T> {
|
pub struct TrieMapIterator<'a, T> {
|
||||||
priv stack: ~[vec::VecIterator<'a, Child<T>>],
|
priv stack: [vec::VecIterator<'a, Child<T>>, .. NUM_CHUNKS],
|
||||||
|
priv length: uint,
|
||||||
priv remaining_min: uint,
|
priv remaining_min: uint,
|
||||||
priv remaining_max: uint
|
priv remaining_max: uint
|
||||||
}
|
}
|
||||||
@ -487,7 +484,8 @@ pub struct TrieMapIterator<'a, T> {
|
|||||||
/// Forward iterator over the key-value pairs of a map, with the
|
/// Forward iterator over the key-value pairs of a map, with the
|
||||||
/// values being mutable.
|
/// values being mutable.
|
||||||
pub struct TrieMapMutIterator<'a, T> {
|
pub struct TrieMapMutIterator<'a, T> {
|
||||||
priv stack: ~[vec::VecMutIterator<'a, Child<T>>],
|
priv stack: [vec::VecMutIterator<'a, Child<T>>, .. NUM_CHUNKS],
|
||||||
|
priv length: uint,
|
||||||
priv remaining_min: uint,
|
priv remaining_min: uint,
|
||||||
priv remaining_max: uint
|
priv remaining_max: uint
|
||||||
}
|
}
|
||||||
@ -499,23 +497,91 @@ macro_rules! iterator_impl {
|
|||||||
($name:ident,
|
($name:ident,
|
||||||
iter = $iter:ident,
|
iter = $iter:ident,
|
||||||
mutability = $($mut_:tt)*) => {
|
mutability = $($mut_:tt)*) => {
|
||||||
item!(impl<'a, T> Iterator<(uint, &'a $($mut_)* T)> for $name<'a, T> {
|
impl<'a, T> $name<'a, T> {
|
||||||
fn next(&mut self) -> Option<(uint, &'a $($mut_)* T)> {
|
// Create new zero'd iterator. We have a thin gilding of safety by
|
||||||
while !self.stack.is_empty() {
|
// using init rather than uninit, so that the worst that can happen
|
||||||
match self.stack[self.stack.len() - 1].next() {
|
// from failing to initialise correctly after calling these is a
|
||||||
None => {
|
// segfault.
|
||||||
self.stack.pop();
|
#[cfg(target_word_size="32")]
|
||||||
|
unsafe fn new() -> $name<'a, T> {
|
||||||
|
$name {
|
||||||
|
remaining_min: 0,
|
||||||
|
remaining_max: 0,
|
||||||
|
length: 0,
|
||||||
|
// ick :( ... at least the compiler will tell us if we screwed up.
|
||||||
|
stack: [init(), init(), init(), init(), init(), init(), init(), init()]
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(target_word_size="64")]
|
||||||
|
unsafe fn new() -> $name<'a, T> {
|
||||||
|
$name {
|
||||||
|
remaining_min: 0,
|
||||||
|
remaining_max: 0,
|
||||||
|
length: 0,
|
||||||
|
stack: [init(), init(), init(), init(), init(), init(), init(), init(),
|
||||||
|
init(), init(), init(), init(), init(), init(), init(), init()]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
item!(impl<'a, T> Iterator<(uint, &'a $($mut_)* T)> for $name<'a, T> {
|
||||||
|
// you might wonder why we're not even trying to act within the
|
||||||
|
// rules, and are just manipulating raw pointers like there's no
|
||||||
|
// such thing as invalid pointers and memory unsafety. The
|
||||||
|
// reason is performance, without doing this we can get the
|
||||||
|
// bench_iter_large microbenchmark down to about 30000 ns/iter
|
||||||
|
// (using .unsafe_ref to index self.stack directly, 38000
|
||||||
|
// ns/iter with [] checked indexing), but this smashes that down
|
||||||
|
// to 13500 ns/iter.
|
||||||
|
//
|
||||||
|
// Fortunately, it's still safe...
|
||||||
|
//
|
||||||
|
// We have an invariant that every Internal node
|
||||||
|
// corresponds to one push to self.stack, and one pop,
|
||||||
|
// nested appropriately. self.stack has enough storage
|
||||||
|
// to store the maximum depth of Internal nodes in the
|
||||||
|
// trie (8 on 32-bit platforms, 16 on 64-bit).
|
||||||
|
fn next(&mut self) -> Option<(uint, &'a $($mut_)* T)> {
|
||||||
|
let start_ptr = self.stack.as_mut_ptr();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// write_ptr is the next place to write to the stack.
|
||||||
|
// invariant: start_ptr <= write_ptr < end of the
|
||||||
|
// vector.
|
||||||
|
let mut write_ptr = start_ptr.offset(self.length as int);
|
||||||
|
while write_ptr != start_ptr {
|
||||||
|
// indexing back one is safe, since write_ptr >
|
||||||
|
// start_ptr now.
|
||||||
|
match (*write_ptr.offset(-1)).next() {
|
||||||
|
// exhausted this iterator (i.e. finished this
|
||||||
|
// Internal node), so pop from the stack.
|
||||||
|
//
|
||||||
|
// don't bother clearing the memory, because the
|
||||||
|
// next time we use it we'll've written to it
|
||||||
|
// first.
|
||||||
|
None => write_ptr = write_ptr.offset(-1),
|
||||||
Some(child) => {
|
Some(child) => {
|
||||||
addr!(match *child {
|
addr!(match *child {
|
||||||
Internal(ref $($mut_)* node) => {
|
Internal(ref $($mut_)* node) => {
|
||||||
self.stack.push(node.children.$iter());
|
// going down a level, so push
|
||||||
|
// to the stack (this is the
|
||||||
|
// write referenced above)
|
||||||
|
*write_ptr = node.children.$iter();
|
||||||
|
write_ptr = write_ptr.offset(1);
|
||||||
}
|
}
|
||||||
External(key, ref $($mut_)* value) => {
|
External(key, ref $($mut_)* value) => {
|
||||||
self.remaining_max -= 1;
|
self.remaining_max -= 1;
|
||||||
if self.remaining_min > 0 {
|
if self.remaining_min > 0 {
|
||||||
self.remaining_min -= 1;
|
self.remaining_min -= 1;
|
||||||
}
|
}
|
||||||
|
// store the new length of the
|
||||||
|
// stack, based on our current
|
||||||
|
// position.
|
||||||
|
self.length = (write_ptr as uint
|
||||||
|
- start_ptr as uint) /
|
||||||
|
mem::size_of_val(&*write_ptr);
|
||||||
|
|
||||||
return Some((key, value));
|
return Some((key, value));
|
||||||
}
|
}
|
||||||
Nothing => {}
|
Nothing => {}
|
||||||
@ -523,6 +589,7 @@ macro_rules! iterator_impl {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user