Auto merge of #97862 - SparrowLii:superset, r=lcnr

optimize `superset` method of `IntervalSet`

Given that intervals in the `IntervalSet` are sorted and strictly separated( it means the `end` of the previous interval will not be equal to the `start` of the next interval), we can reduce the complexity of the `superset` method from O(NMlogN) to O(2N) (N is the number of intervals and M is the length of each interval)
This commit is contained in:
bors 2022-06-09 07:13:46 +00:00
commit 6dc598a01b
2 changed files with 58 additions and 13 deletions

View File

@ -1,7 +1,7 @@
use std::iter::Step; use std::iter::Step;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::ops::Bound;
use std::ops::RangeBounds; use std::ops::RangeBounds;
use std::ops::{Bound, Range};
use crate::vec::Idx; use crate::vec::Idx;
use crate::vec::IndexVec; use crate::vec::IndexVec;
@ -11,6 +11,10 @@ use smallvec::SmallVec;
mod tests; mod tests;
/// Stores a set of intervals on the indices. /// Stores a set of intervals on the indices.
///
/// The elements in `map` are sorted and non-adjacent, which means
/// the second value of the previous element is *greater* than the
/// first value of the following element.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct IntervalSet<I> { pub struct IntervalSet<I> {
// Start, end // Start, end
@ -84,7 +88,7 @@ impl<I: Idx> IntervalSet<I> {
// continue to the next range. We're looking here for the first // continue to the next range. We're looking here for the first
// range which starts *non-adjacently* to our end. // range which starts *non-adjacently* to our end.
let next = self.map.partition_point(|r| r.0 <= end + 1); let next = self.map.partition_point(|r| r.0 <= end + 1);
if let Some(right) = next.checked_sub(1) { let result = if let Some(right) = next.checked_sub(1) {
let (prev_start, prev_end) = self.map[right]; let (prev_start, prev_end) = self.map[right];
if prev_end + 1 >= start { if prev_end + 1 >= start {
// If the start for the inserted range is adjacent to the // If the start for the inserted range is adjacent to the
@ -99,7 +103,7 @@ impl<I: Idx> IntervalSet<I> {
if left != right { if left != right {
self.map.drain(left..right); self.map.drain(left..right);
} }
return true; true
} else { } else {
// We overlap with the previous range, increase it to // We overlap with the previous range, increase it to
// include us. // include us.
@ -107,17 +111,17 @@ impl<I: Idx> IntervalSet<I> {
// Make sure we're actually going to *increase* it though -- // Make sure we're actually going to *increase* it though --
// it may be that end is just inside the previously existing // it may be that end is just inside the previously existing
// set. // set.
return if end > prev_end { if end > prev_end {
self.map[right].1 = end; self.map[right].1 = end;
true true
} else { } else {
false false
}; }
} }
} else { } else {
// Otherwise, we don't overlap, so just insert // Otherwise, we don't overlap, so just insert
self.map.insert(right + 1, (start, end)); self.map.insert(right + 1, (start, end));
return true; true
} }
} else { } else {
if self.map.is_empty() { if self.map.is_empty() {
@ -127,8 +131,16 @@ impl<I: Idx> IntervalSet<I> {
} else { } else {
self.map.insert(next, (start, end)); self.map.insert(next, (start, end));
} }
return true; true
} };
debug_assert!(
self.check_invariants(),
"wrong intervals after insert {:?}..={:?} to {:?}",
start,
end,
self
);
result
} }
pub fn contains(&self, needle: I) -> bool { pub fn contains(&self, needle: I) -> bool {
@ -145,9 +157,26 @@ impl<I: Idx> IntervalSet<I> {
where where
I: Step, I: Step,
{ {
// FIXME: Performance here is probably not great. We will be doing a lot let mut sup_iter = self.iter_intervals();
// of pointless tree traversals. let mut current = None;
other.iter().all(|elem| self.contains(elem)) let contains = |sup: Range<I>, sub: Range<I>, current: &mut Option<Range<I>>| {
if sup.end < sub.start {
// if `sup.end == sub.start`, the next sup doesn't contain `sub.start`
None // continue to the next sup
} else if sup.end >= sub.end && sup.start <= sub.start {
*current = Some(sup); // save the current sup
Some(true)
} else {
Some(false)
}
};
other.iter_intervals().all(|sub| {
current
.take()
.and_then(|sup| contains(sup, sub.clone(), &mut current))
.or_else(|| sup_iter.find_map(|sup| contains(sup, sub.clone(), &mut current)))
.unwrap_or(false)
})
} }
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
@ -174,7 +203,10 @@ impl<I: Idx> IntervalSet<I> {
pub fn insert_all(&mut self) { pub fn insert_all(&mut self) {
self.clear(); self.clear();
self.map.push((0, self.domain.try_into().unwrap())); if let Some(end) = self.domain.checked_sub(1) {
self.map.push((0, end.try_into().unwrap()));
}
debug_assert!(self.check_invariants());
} }
pub fn union(&mut self, other: &IntervalSet<I>) -> bool pub fn union(&mut self, other: &IntervalSet<I>) -> bool
@ -186,8 +218,21 @@ impl<I: Idx> IntervalSet<I> {
for range in other.iter_intervals() { for range in other.iter_intervals() {
did_insert |= self.insert_range(range); did_insert |= self.insert_range(range);
} }
debug_assert!(self.check_invariants());
did_insert did_insert
} }
// Check the intervals are valid, sorted and non-adjacent
fn check_invariants(&self) -> bool {
let mut current: Option<u32> = None;
for (start, end) in &self.map {
if start > end || current.map_or(false, |x| x + 1 >= *start) {
return false;
}
current = Some(*end);
}
current.map_or(true, |x| x < self.domain as u32)
}
} }
/// This data structure optimizes for cases where the stored bits in each row /// This data structure optimizes for cases where the stored bits in each row

View File

@ -2,7 +2,7 @@ use super::*;
#[test] #[test]
fn insert_collapses() { fn insert_collapses() {
let mut set = IntervalSet::<u32>::new(3000); let mut set = IntervalSet::<u32>::new(10000);
set.insert_range(9831..=9837); set.insert_range(9831..=9837);
set.insert_range(43..=9830); set.insert_range(43..=9830);
assert_eq!(set.iter_intervals().collect::<Vec<_>>(), [43..9838]); assert_eq!(set.iter_intervals().collect::<Vec<_>>(), [43..9838]);