BTreeMap::from_iter: use bulk building to improve the performance

Bulk building is a common technique to increase the performance of
building a fresh btree map. Instead of inserting items one-by-one,
we sort all the items beforehand then create the BtreeMap in bulk.
This commit is contained in:
Cheng XU 2021-08-28 16:48:45 -07:00
parent 6a6885c6bd
commit cf814d60f8
No known key found for this signature in database
GPG Key ID: 8794B5D7A3C67F70
3 changed files with 79 additions and 5 deletions

View File

@ -0,0 +1,47 @@
use core::iter::Peekable;
/// A iterator for deduping the key of a sorted iterator.
/// When encountering the duplicated key, only the last key-value pair is yielded.
///
/// Used by [`BTreeMap::bulk_build_from_sorted_iter`].
pub struct DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
iter: Peekable<I>,
}
impl<K, V, I> DedupSortedIter<K, V, I>
where
I: Iterator<Item = (K, V)>,
{
pub fn new(iter: I) -> Self {
Self { iter: iter.peekable() }
}
}
impl<K, V, I> Iterator for DedupSortedIter<K, V, I>
where
K: Eq,
I: Iterator<Item = (K, V)>,
{
type Item = (K, V);
fn next(&mut self) -> Option<(K, V)> {
loop {
let next = match self.iter.next() {
Some(next) => next,
None => return None,
};
let peeked = match self.iter.peek() {
Some(peeked) => peeked,
None => return Some(next),
};
if next.0 != peeked.0 {
return Some(next);
}
}
}
}

View File

@ -1,3 +1,4 @@
use crate::vec::Vec;
use core::borrow::Borrow;
use core::cmp::Ordering;
use core::fmt::{self, Debug};
@ -9,6 +10,7 @@ use core::ops::{Index, RangeBounds};
use core::ptr;
use super::borrow::DormantMutRef;
use super::dedup_sorted_iter::DedupSortedIter;
use super::navigate::{LazyLeafRange, LeafRange};
use super::node::{self, marker, ForceResult::*, Handle, NodeRef, Root};
use super::search::SearchResult::*;
@ -1290,6 +1292,18 @@ impl<K, V> BTreeMap<K, V> {
pub fn into_values(self) -> IntoValues<K, V> {
IntoValues { inner: self.into_iter() }
}
/// Makes a `BTreeMap` from a sorted iterator.
pub(crate) fn bulk_build_from_sorted_iter<I>(iter: I) -> Self
where
K: Ord,
I: Iterator<Item = (K, V)>,
{
let mut root = Root::new();
let mut length = 0;
root.bulk_push(DedupSortedIter::new(iter), &mut length);
BTreeMap { root: Some(root), length }
}
}
#[stable(feature = "rust1", since = "1.0.0")]
@ -1914,9 +1928,15 @@ impl<K, V> FusedIterator for RangeMut<'_, K, V> {}
#[stable(feature = "rust1", since = "1.0.0")]
impl<K: Ord, V> FromIterator<(K, V)> for BTreeMap<K, V> {
fn from_iter<T: IntoIterator<Item = (K, V)>>(iter: T) -> BTreeMap<K, V> {
let mut map = BTreeMap::new();
map.extend(iter);
map
let mut inputs: Vec<_> = iter.into_iter().collect();
if inputs.is_empty() {
return BTreeMap::new();
}
// use stable sort to preserve the insertion order.
inputs.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(inputs.into_iter())
}
}
@ -2025,8 +2045,14 @@ impl<K: Ord, V, const N: usize> From<[(K, V); N]> for BTreeMap<K, V> {
/// let map2: BTreeMap<_, _> = [(1, 2), (3, 4)].into();
/// assert_eq!(map1, map2);
/// ```
fn from(arr: [(K, V); N]) -> Self {
core::array::IntoIter::new(arr).collect()
fn from(mut arr: [(K, V); N]) -> Self {
if N == 0 {
return BTreeMap::new();
}
// use stable sort to preserve the insertion order.
arr.sort_by(|a, b| a.0.cmp(&b.0));
BTreeMap::bulk_build_from_sorted_iter(core::array::IntoIter::new(arr))
}
}

View File

@ -1,5 +1,6 @@
mod append;
mod borrow;
mod dedup_sorted_iter;
mod fix;
pub mod map;
mod mem;