From e3adad658763ce79753e6f6e63c6f24964d03ea0 Mon Sep 17 00:00:00 2001 From: Andrey Tonkih Date: Sun, 29 May 2016 13:30:13 +0300 Subject: [PATCH] Implement split_off for BTreeMap and BTreeSet (RFC 509) --- src/libcollections/btree/map.rs | 172 +++++++++++++++- src/libcollections/btree/node.rs | 296 +++++++++++++++++++--------- src/libcollections/btree/set.rs | 37 ++++ src/libcollectionstest/btree/map.rs | 48 +++++ src/libcollectionstest/btree/mod.rs | 30 +++ src/libcollectionstest/btree/set.rs | 48 +++++ src/libcollectionstest/lib.rs | 1 + 7 files changed, 539 insertions(+), 93 deletions(-) diff --git a/src/libcollections/btree/map.rs b/src/libcollections/btree/map.rs index ec2f4a9f7f0..29f3e4b1b61 100644 --- a/src/libcollections/btree/map.rs +++ b/src/libcollections/btree/map.rs @@ -842,13 +842,13 @@ impl BTreeMap { // Check if right-most child is underfull. let mut last_edge = internal.last_edge(); let right_child_len = last_edge.reborrow().descend().len(); - if right_child_len < node::CAPACITY / 2 { + if right_child_len < node::MIN_LEN { // We need to steal. let mut last_kv = match last_edge.left_kv() { Ok(left) => left, Err(_) => unreachable!(), }; - last_kv.bulk_steal_left(node::CAPACITY/2 - right_child_len); + last_kv.bulk_steal_left(node::MIN_LEN - right_child_len); last_edge = last_kv.right_edge(); } @@ -856,6 +856,174 @@ impl BTreeMap { cur_node = last_edge.descend(); } } + + /// Splits the collection into two at the given key. Returns everything after the given key, + /// including the key. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(btree_split_off)] + /// use std::collections::BTreeMap; + /// + /// let mut a = BTreeMap::new(); + /// a.insert(1, "a"); + /// a.insert(2, "b"); + /// a.insert(3, "c"); + /// a.insert(17, "d"); + /// a.insert(41, "e"); + /// + /// let b = a.split_off(&3); + /// + /// assert_eq!(a.len(), 2); + /// assert_eq!(b.len(), 3); + /// + /// assert_eq!(a[&1], "a"); + /// assert_eq!(a[&2], "b"); + /// + /// assert_eq!(b[&3], "c"); + /// assert_eq!(b[&17], "d"); + /// assert_eq!(b[&41], "e"); + /// ``` + #[unstable(feature = "btree_split_off", + reason = "recently added as part of collections reform 2", + issue = "19986")] + pub fn split_off(&mut self, key: &Q) -> Self where K: Borrow { + if self.is_empty() { + return Self::new(); + } + + let total_num = self.len(); + + let mut right = Self::new(); + for _ in 0..(self.root.as_ref().height()) { + right.root.push_level(); + } + + { + let mut left_node = self.root.as_mut(); + let mut right_node = right.root.as_mut(); + + loop { + let mut split_edge = match search::search_node(left_node, key) { + // key is going to the right tree + Found(handle) => handle.left_edge(), + GoDown(handle) => handle + }; + + split_edge.move_suffix(&mut right_node); + + match (split_edge.force(), right_node.force()) { + (Internal(edge), Internal(node)) => { + left_node = edge.descend(); + right_node = node.first_edge().descend(); + } + (Leaf(_), Leaf(_)) => { break; }, + _ => { unreachable!(); } + } + } + } + + self.fix_right_border(); + right.fix_left_border(); + + if self.root.as_ref().height() < right.root.as_ref().height() { + self.recalc_length(); + right.length = total_num - self.len(); + } else { + right.recalc_length(); + self.length = total_num - right.len(); + } + + right + } + + /// Calculates the number of elements if it is incorrect. + fn recalc_length(&mut self) { + fn dfs(node: NodeRef) -> usize { + let mut res = node.len(); + + if let Internal(node) = node.force() { + let mut edge = node.first_edge(); + loop { + res += dfs(edge.reborrow().descend()); + match edge.right_kv() { + Ok(right_kv) => { edge = right_kv.right_edge(); }, + Err(_) => { break; } + } + } + } + + res + } + + self.length = dfs(self.root.as_ref()); + } + + /// Removes empty levels on the top. + fn fix_top(&mut self) { + loop { + { + let node = self.root.as_ref(); + if node.height() == 0 || node.len() > 0 { + break; + } + } + self.root.pop_level(); + } + } + + fn fix_right_border(&mut self) { + self.fix_top(); + + { + let mut cur_node = self.root.as_mut(); + + while let Internal(node) = cur_node.force() { + let mut last_kv = node.last_kv(); + + if last_kv.can_merge() { + cur_node = last_kv.merge().descend(); + } else { + let right_len = last_kv.reborrow().right_edge().descend().len(); + // `MINLEN + 1` to avoid readjust if merge happens on the next level. + if right_len < node::MIN_LEN + 1 { + last_kv.bulk_steal_left(node::MIN_LEN + 1 - right_len); + } + cur_node = last_kv.right_edge().descend(); + } + } + } + + self.fix_top(); + } + + /// The symmetric clone of `fix_right_border`. + fn fix_left_border(&mut self) { + self.fix_top(); + + { + let mut cur_node = self.root.as_mut(); + + while let Internal(node) = cur_node.force() { + let mut first_kv = node.first_kv(); + + if first_kv.can_merge() { + cur_node = first_kv.merge().descend(); + } else { + let left_len = first_kv.reborrow().left_edge().descend().len(); + if left_len < node::MIN_LEN + 1 { + first_kv.bulk_steal_right(node::MIN_LEN + 1 - left_len); + } + cur_node = first_kv.left_edge().descend(); + } + } + } + + self.fix_top(); + } } impl<'a, K: 'a, V: 'a> IntoIterator for &'a BTreeMap { diff --git a/src/libcollections/btree/node.rs b/src/libcollections/btree/node.rs index ca1cf6bcc50..e9bc29118d5 100644 --- a/src/libcollections/btree/node.rs +++ b/src/libcollections/btree/node.rs @@ -51,6 +51,7 @@ use core::slice; use boxed::Box; const B: usize = 6; +pub const MIN_LEN: usize = B - 1; pub const CAPACITY: usize = 2 * B - 1; /// The underlying representation of leaf nodes. Note that it is often unsafe to actually store @@ -413,6 +414,19 @@ impl NodeRef { let len = self.len(); Handle::new_edge(self, len) } + + /// Note that `self` must be nonempty. + pub fn first_kv(self) -> Handle { + debug_assert!(self.len() > 0); + Handle::new_kv(self, 0) + } + + /// Note that `self` must be nonempty. + pub fn last_kv(self) -> Handle { + let len = self.len(); + debug_assert!(len > 0); + Handle::new_kv(self, len - 1) + } } impl NodeRef { @@ -602,6 +616,17 @@ impl<'a, K, V> NodeRef, K, V, marker::Internal> { } } + fn correct_childrens_parent_links(&mut self, first: usize, after_last: usize) { + for i in first..after_last { + Handle::new_edge(unsafe { self.reborrow_mut() }, i).correct_parent_link(); + } + } + + fn correct_all_childrens_parent_links(&mut self) { + let len = self.len(); + self.correct_childrens_parent_links(0, len + 1); + } + /// Adds a key/value pair and an edge to go to the left of that pair to /// the beginning of the node. pub fn push_front(&mut self, key: K, val: V, edge: Root) { @@ -623,11 +648,8 @@ impl<'a, K, V> NodeRef, K, V, marker::Internal> { self.as_leaf_mut().len += 1; - for i in 0..self.len()+1 { - Handle::new_edge(self.reborrow_mut(), i).correct_parent_link(); - } + self.correct_all_childrens_parent_links(); } - } } @@ -696,6 +718,13 @@ impl<'a, K, V> NodeRef, K, V, marker::LeafOrInternal> { (key, val, edge) } } + + fn into_kv_pointers_mut(mut self) -> (*mut K, *mut V) { + ( + self.keys_mut().as_mut_ptr(), + self.vals_mut().as_mut_ptr() + ) + } } impl NodeRef { @@ -1275,103 +1304,153 @@ impl<'a, K, V> Handle, K, V, marker::Internal>, marker:: } /// This does stealing similar to `steal_left` but steals multiple elements at once. - pub fn bulk_steal_left(&mut self, n: usize) { + pub fn bulk_steal_left(&mut self, count: usize) { unsafe { - // Get raw pointers to left child's keys, values and edges. - let (left_len, left_k, left_v, left_e) = { - let mut left = self.reborrow_mut().left_edge().descend(); - - (left.len(), - left.keys_mut().as_mut_ptr(), - left.vals_mut().as_mut_ptr(), - match left.force() { - ForceResult::Leaf(_) => None, - ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()), - }) - }; - - // Get raw pointers to right child's keys, values and edges. - let (right_len, right_k, right_v, right_e) = { - let mut right = self.reborrow_mut().right_edge().descend(); - - (right.len(), - right.keys_mut().as_mut_ptr(), - right.vals_mut().as_mut_ptr(), - match right.force() { - ForceResult::Leaf(_) => None, - ForceResult::Internal(mut i) => Some(i.as_internal_mut().edges.as_mut_ptr()), - }) - }; - - // Get raw pointers to parent's key and value. - let (parent_k, parent_v) = { - let kv = self.reborrow_mut().into_kv_mut(); - (kv.0 as *mut K, kv.1 as *mut V) - }; + let mut left_node = ptr::read(self).left_edge().descend(); + let left_len = left_node.len(); + let mut right_node = ptr::read(self).right_edge().descend(); + let right_len = right_node.len(); // Make sure that we may steal safely. - debug_assert!(right_len + n <= CAPACITY); - debug_assert!(left_len >= n); + debug_assert!(right_len + count <= CAPACITY); + debug_assert!(left_len >= count); - // Make room for stolen elements in right child. - ptr::copy(right_k, - right_k.offset(n as isize), - right_len); - ptr::copy(right_v, - right_v.offset(n as isize), - right_len); - if let Some(edges) = right_e { - ptr::copy(edges, - edges.offset(n as isize), - right_len+1); + let new_left_len = left_len - count; + + // Move data. + { + let left_kv = left_node.reborrow_mut().into_kv_pointers_mut(); + let right_kv = right_node.reborrow_mut().into_kv_pointers_mut(); + let parent_kv = { + let kv = self.reborrow_mut().into_kv_mut(); + (kv.0 as *mut K, kv.1 as *mut V) + }; + + // Make room for stolen elements in the right child. + ptr::copy(right_kv.0, + right_kv.0.offset(count as isize), + right_len); + ptr::copy(right_kv.1, + right_kv.1.offset(count as isize), + right_len); + + // Move elements from the left child to the right one. + move_kv(left_kv, new_left_len + 1, right_kv, 0, count - 1); + + // Move parent's key/value pair to the right child. + move_kv(parent_kv, 0, right_kv, count - 1, 1); + + // Move the left-most stolen pair to the parent. + move_kv(left_kv, new_left_len, parent_kv, 0, 1); } - // Move elements from the left child to the right one. - let left_ind = (left_len - n) as isize; - ptr::copy_nonoverlapping(left_k.offset(left_ind + 1), - right_k, - n - 1); - ptr::copy_nonoverlapping(left_v.offset(left_ind + 1), - right_v, - n - 1); - match (left_e, right_e) { - (Some(left), Some(right)) => { - ptr::copy_nonoverlapping(left.offset(left_ind + 1), - right, - n); + left_node.reborrow_mut().as_leaf_mut().len -= count as u16; + right_node.reborrow_mut().as_leaf_mut().len += count as u16; + + match (left_node.force(), right_node.force()) { + (ForceResult::Internal(left), ForceResult::Internal(mut right)) => { + // Make room for stolen edges. + let right_edges = right.reborrow_mut().as_internal_mut().edges.as_mut_ptr(); + ptr::copy(right_edges, + right_edges.offset(count as isize), + right_len + 1); + right.correct_childrens_parent_links(count, count + right_len + 1); + + move_edges(left, new_left_len + 1, right, 0, count); }, - (Some(_), None) => unreachable!(), - (None, Some(_)) => unreachable!(), - (None, None) => {}, - } - - // Copy parent key/value pair to right child. - ptr::copy_nonoverlapping(parent_k, - right_k.offset(n as isize - 1), - 1); - ptr::copy_nonoverlapping(parent_v, - right_v.offset(n as isize - 1), - 1); - // Copy left-most stolen pair to parent. - ptr::copy_nonoverlapping(left_k.offset(left_ind), - parent_k, - 1); - ptr::copy_nonoverlapping(left_v.offset(left_ind), - parent_v, - 1); - - // Fix lengths of left and right child and parent pointers in children of the right - // child. - self.reborrow_mut().left_edge().descend().as_leaf_mut().len -= n as u16; - let mut right = self.reborrow_mut().right_edge().descend(); - right.as_leaf_mut().len += n as u16; - if let ForceResult::Internal(mut node) = right.force() { - for i in 0..(right_len+n+1) { - Handle::new_edge(node.reborrow_mut(), i as usize).correct_parent_link(); - } + (ForceResult::Leaf(_), ForceResult::Leaf(_)) => { } + _ => { unreachable!(); } } } } + + /// The symmetric clone of `bulk_steal_left`. + pub fn bulk_steal_right(&mut self, count: usize) { + unsafe { + let mut left_node = ptr::read(self).left_edge().descend(); + let left_len = left_node.len(); + let mut right_node = ptr::read(self).right_edge().descend(); + let right_len = right_node.len(); + + // Make sure that we may steal safely. + debug_assert!(left_len + count <= CAPACITY); + debug_assert!(right_len >= count); + + let new_right_len = right_len - count; + + // Move data. + { + let left_kv = left_node.reborrow_mut().into_kv_pointers_mut(); + let right_kv = right_node.reborrow_mut().into_kv_pointers_mut(); + let parent_kv = { + let kv = self.reborrow_mut().into_kv_mut(); + (kv.0 as *mut K, kv.1 as *mut V) + }; + + // Move parent's key/value pair to the left child. + move_kv(parent_kv, 0, left_kv, left_len, 1); + + // Move elements from the right child to the left one. + move_kv(right_kv, 0, left_kv, left_len + 1, count - 1); + + // Move the right-most stolen pair to the parent. + move_kv(right_kv, count - 1, parent_kv, 0, 1); + + // Fix right indexing + ptr::copy(right_kv.0.offset(count as isize), + right_kv.0, + new_right_len); + ptr::copy(right_kv.1.offset(count as isize), + right_kv.1, + new_right_len); + } + + left_node.reborrow_mut().as_leaf_mut().len += count as u16; + right_node.reborrow_mut().as_leaf_mut().len -= count as u16; + + match (left_node.force(), right_node.force()) { + (ForceResult::Internal(left), ForceResult::Internal(mut right)) => { + move_edges(right.reborrow_mut(), 0, left, left_len + 1, count); + + // Fix right indexing. + let right_edges = right.reborrow_mut().as_internal_mut().edges.as_mut_ptr(); + ptr::copy(right_edges.offset(count as isize), + right_edges, + new_right_len + 1); + right.correct_childrens_parent_links(0, new_right_len + 1); + }, + (ForceResult::Leaf(_), ForceResult::Leaf(_)) => { } + _ => { unreachable!(); } + } + } + } +} + +unsafe fn move_kv( + source: (*mut K, *mut V), source_offset: usize, + dest: (*mut K, *mut V), dest_offset: usize, + count: usize) +{ + ptr::copy_nonoverlapping(source.0.offset(source_offset as isize), + dest.0.offset(dest_offset as isize), + count); + ptr::copy_nonoverlapping(source.1.offset(source_offset as isize), + dest.1.offset(dest_offset as isize), + count); +} + +// Source and destination must have the same height. +unsafe fn move_edges( + mut source: NodeRef, source_offset: usize, + mut dest: NodeRef, dest_offset: usize, + count: usize) +{ + let source_ptr = source.as_internal_mut().edges.as_mut_ptr(); + let dest_ptr = dest.as_internal_mut().edges.as_mut_ptr(); + ptr::copy_nonoverlapping(source_ptr.offset(source_offset as isize), + dest_ptr.offset(dest_offset as isize), + count); + dest.correct_childrens_parent_links(dest_offset, dest_offset + count); } impl @@ -1397,6 +1476,41 @@ impl } } +impl<'a, K, V> Handle, K, V, marker::LeafOrInternal>, marker::Edge> { + /// Move the suffix after `self` from one node to another one. `right` must be empty. + /// The first edge of `right` remains unchanged. + pub fn move_suffix(&mut self, + right: &mut NodeRef, K, V, marker::LeafOrInternal>) { + unsafe { + let left_new_len = self.idx; + let mut left_node = self.reborrow_mut().into_node(); + + let right_new_len = left_node.len() - left_new_len; + let mut right_node = right.reborrow_mut(); + + debug_assert!(right_node.len() == 0); + debug_assert!(left_node.height == right_node.height); + + let left_kv = left_node.reborrow_mut().into_kv_pointers_mut(); + let right_kv = right_node.reborrow_mut().into_kv_pointers_mut(); + + + move_kv(left_kv, left_new_len, right_kv, 0, right_new_len); + + left_node.reborrow_mut().as_leaf_mut().len = left_new_len as u16; + right_node.reborrow_mut().as_leaf_mut().len = right_new_len as u16; + + match (left_node.force(), right_node.force()) { + (ForceResult::Internal(left), ForceResult::Internal(right)) => { + move_edges(left, left_new_len + 1, right, 1, right_new_len); + }, + (ForceResult::Leaf(_), ForceResult::Leaf(_)) => { } + _ => { unreachable!(); } + } + } + } +} + pub enum ForceResult { Leaf(Leaf), Internal(Internal) diff --git a/src/libcollections/btree/set.rs b/src/libcollections/btree/set.rs index 3ee42499a38..765595be317 100644 --- a/src/libcollections/btree/set.rs +++ b/src/libcollections/btree/set.rs @@ -580,6 +580,43 @@ impl BTreeSet { pub fn append(&mut self, other: &mut Self) { self.map.append(&mut other.map); } + + /// Splits the collection into two at the given key. Returns everything after the given key, + /// including the key. + /// + /// # Examples + /// + /// Basic usage: + /// + /// ``` + /// #![feature(btree_split_off)] + /// use std::collections::BTreeMap; + /// + /// let mut a = BTreeMap::new(); + /// a.insert(1, "a"); + /// a.insert(2, "b"); + /// a.insert(3, "c"); + /// a.insert(17, "d"); + /// a.insert(41, "e"); + /// + /// let b = a.split_off(&3); + /// + /// assert_eq!(a.len(), 2); + /// assert_eq!(b.len(), 3); + /// + /// assert_eq!(a[&1], "a"); + /// assert_eq!(a[&2], "b"); + /// + /// assert_eq!(b[&3], "c"); + /// assert_eq!(b[&17], "d"); + /// assert_eq!(b[&41], "e"); + /// ``` + #[unstable(feature = "btree_split_off", + reason = "recently added as part of collections reform 2", + issue = "19986")] + pub fn split_off(&mut self, key: &Q) -> Self where T: Borrow { + BTreeSet { map: self.map.split_off(key) } + } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/src/libcollectionstest/btree/map.rs b/src/libcollectionstest/btree/map.rs index e19090c7599..49fce68d15e 100644 --- a/src/libcollectionstest/btree/map.rs +++ b/src/libcollectionstest/btree/map.rs @@ -13,6 +13,9 @@ use std::collections::Bound::{self, Excluded, Included, Unbounded}; use std::collections::btree_map::Entry::{Occupied, Vacant}; use std::rc::Rc; +use std::iter::FromIterator; +use super::DeterministicRng; + #[test] fn test_basic_large() { let mut map = BTreeMap::new(); @@ -528,6 +531,51 @@ create_append_test!(test_append_181, 181); create_append_test!(test_append_239, 239); create_append_test!(test_append_1700, 1700); +fn rand_data(len: usize) -> Vec<(u32, u32)> { + let mut rng = DeterministicRng::new(); + Vec::from_iter( + (0..len).map(|_| (rng.next(), rng.next())) + ) +} + +#[test] +fn test_split_off_empty_right() { + let mut data = rand_data(173); + + let mut map = BTreeMap::from_iter(data.clone()); + let right = map.split_off(&(data.iter().max().unwrap().0 + 1)); + + data.sort(); + assert!(map.into_iter().eq(data)); + assert!(right.into_iter().eq(None)); +} + +#[test] +fn test_split_off_empty_left() { + let mut data = rand_data(314); + + let mut map = BTreeMap::from_iter(data.clone()); + let right = map.split_off(&data.iter().min().unwrap().0); + + data.sort(); + assert!(map.into_iter().eq(None)); + assert!(right.into_iter().eq(data)); +} + +#[test] +fn test_split_off_large_random_sorted() { + let mut data = rand_data(1529); + // special case with maximum height. + data.sort(); + + let mut map = BTreeMap::from_iter(data.clone()); + let key = data[data.len() / 2].0; + let right = map.split_off(&key); + + assert!(map.into_iter().eq(data.clone().into_iter().filter(|x| x.0 < key))); + assert!(right.into_iter().eq(data.into_iter().filter(|x| x.0 >= key))); +} + mod bench { use std::collections::BTreeMap; use std::__rand::{Rng, thread_rng}; diff --git a/src/libcollectionstest/btree/mod.rs b/src/libcollectionstest/btree/mod.rs index 0db48f3ce9e..ea43f423b7c 100644 --- a/src/libcollectionstest/btree/mod.rs +++ b/src/libcollectionstest/btree/mod.rs @@ -10,3 +10,33 @@ mod map; mod set; + +/// XorShiftRng +struct DeterministicRng { + x: u32, + y: u32, + z: u32, + w: u32, +} + +impl DeterministicRng { + fn new() -> Self { + DeterministicRng { + x: 0x193a6754, + y: 0xa8a7d469, + z: 0x97830e05, + w: 0x113ba7bb + } + } + + fn next(&mut self) -> u32 { + let x = self.x; + let t = x ^ (x << 11); + self.x = self.y; + self.y = self.z; + self.z = self.w; + let w_ = self.w; + self.w = w_ ^ (w_ >> 19) ^ (t ^ (t >> 8)); + self.w + } +} diff --git a/src/libcollectionstest/btree/set.rs b/src/libcollectionstest/btree/set.rs index 53ccfd5b4e2..f7b647d7772 100644 --- a/src/libcollectionstest/btree/set.rs +++ b/src/libcollectionstest/btree/set.rs @@ -10,6 +10,9 @@ use std::collections::BTreeSet; +use std::iter::FromIterator; +use super::DeterministicRng; + #[test] fn test_clone_eq() { let mut m = BTreeSet::new(); @@ -289,3 +292,48 @@ fn test_append() { assert_eq!(a.contains(&4), true); assert_eq!(a.contains(&5), true); } + +fn rand_data(len: usize) -> Vec { + let mut rng = DeterministicRng::new(); + Vec::from_iter( + (0..len).map(|_| rng.next()) + ) +} + +#[test] +fn test_split_off_empty_right() { + let mut data = rand_data(173); + + let mut set = BTreeSet::from_iter(data.clone()); + let right = set.split_off(&(data.iter().max().unwrap() + 1)); + + data.sort(); + assert!(set.into_iter().eq(data)); + assert!(right.into_iter().eq(None)); +} + +#[test] +fn test_split_off_empty_left() { + let mut data = rand_data(314); + + let mut set = BTreeSet::from_iter(data.clone()); + let right = set.split_off(data.iter().min().unwrap()); + + data.sort(); + assert!(set.into_iter().eq(None)); + assert!(right.into_iter().eq(data)); +} + +#[test] +fn test_split_off_large_random_sorted() { + let mut data = rand_data(1529); + // special case with maximum height. + data.sort(); + + let mut set = BTreeSet::from_iter(data.clone()); + let key = data[data.len() / 2]; + let right = set.split_off(&key); + + assert!(set.into_iter().eq(data.clone().into_iter().filter(|x| *x < key))); + assert!(right.into_iter().eq(data.into_iter().filter(|x| *x >= key))); +} diff --git a/src/libcollectionstest/lib.rs b/src/libcollectionstest/lib.rs index bae21f1bd9b..840a83f048b 100644 --- a/src/libcollectionstest/lib.rs +++ b/src/libcollectionstest/lib.rs @@ -14,6 +14,7 @@ #![feature(binary_heap_append)] #![feature(box_syntax)] #![feature(btree_append)] +#![feature(btree_split_off)] #![feature(btree_range)] #![feature(collections)] #![feature(collections_bound)]