Auto merge of #133891 - nnethercote:MixedBitSet, r=Mark-Simulacrum

Introduce `MixedBitSet`

`ChunkedBitSet` is good at avoiding excessive memory usage for programs with very large functgions where dataflow bitsets have very large domain sizes. But it's overly heavyweight for small bitsets, because any non-empty `ChunkedBitSet` takes up at least 256 bytes.

This PR introduces `MixedBitSet`, which is a simple bitset that uses `BitSet` for small/medium bitsets and `ChunkedBitSet` for large bitsets. It's a speed and memory usage win.

r? `@Mark-Simulacrum`
This commit is contained in:
bors 2024-12-09 07:13:11 +00:00
commit f6cb952dc1
9 changed files with 347 additions and 169 deletions

View File

@ -26,7 +26,7 @@ use rustc_data_structures::graph::dominators::Dominators;
use rustc_errors::Diag;
use rustc_hir as hir;
use rustc_hir::def_id::LocalDefId;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, MixedBitSet};
use rustc_index::{IndexSlice, IndexVec};
use rustc_infer::infer::{
InferCtxt, NllRegionVariableOrigin, RegionVariableOrigin, TyCtxtInferExt,
@ -1797,7 +1797,7 @@ impl<'a, 'tcx> MirBorrowckCtxt<'a, '_, 'tcx> {
location: Location,
desired_action: InitializationRequiringAction,
place_span: (PlaceRef<'tcx>, Span),
maybe_uninits: &ChunkedBitSet<MovePathIndex>,
maybe_uninits: &MixedBitSet<MovePathIndex>,
from: u64,
to: u64,
) {

View File

@ -296,6 +296,111 @@ impl<T: Idx> From<GrowableBitSet<T>> for BitSet<T> {
}
}
impl<T> Clone for BitSet<T> {
fn clone(&self) -> Self {
BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData }
}
fn clone_from(&mut self, from: &Self) {
self.domain_size = from.domain_size;
self.words.clone_from(&from.words);
}
}
impl<T: Idx> fmt::Debug for BitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
w.debug_list().entries(self.iter()).finish()
}
}
impl<T: Idx> ToString for BitSet<T> {
fn to_string(&self) -> String {
let mut result = String::new();
let mut sep = '[';
// Note: this is a little endian printout of bytes.
// i tracks how many bits we have printed so far.
let mut i = 0;
for word in &self.words {
let mut word = *word;
for _ in 0..WORD_BYTES {
// for each byte in `word`:
let remain = self.domain_size - i;
// If less than a byte remains, then mask just that many bits.
let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF };
assert!(mask <= 0xFF);
let byte = word & mask;
result.push_str(&format!("{sep}{byte:02x}"));
if remain <= 8 {
break;
}
word >>= 8;
i += 8;
sep = '-';
}
sep = '|';
}
result.push(']');
result
}
}
pub struct BitIter<'a, T: Idx> {
/// A copy of the current word, but with any already-visited bits cleared.
/// (This lets us use `trailing_zeros()` to find the next set bit.) When it
/// is reduced to 0, we move onto the next word.
word: Word,
/// The offset (measured in bits) of the current word.
offset: usize,
/// Underlying iterator over the words.
iter: slice::Iter<'a, Word>,
marker: PhantomData<T>,
}
impl<'a, T: Idx> BitIter<'a, T> {
#[inline]
fn new(words: &'a [Word]) -> BitIter<'a, T> {
// We initialize `word` and `offset` to degenerate values. On the first
// call to `next()` we will fall through to getting the first word from
// `iter`, which sets `word` to the first word (if there is one) and
// `offset` to 0. Doing it this way saves us from having to maintain
// additional state about whether we have started.
BitIter {
word: 0,
offset: usize::MAX - (WORD_BITS - 1),
iter: words.iter(),
marker: PhantomData,
}
}
}
impl<'a, T: Idx> Iterator for BitIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
loop {
if self.word != 0 {
// Get the position of the next set bit in the current word,
// then clear the bit.
let bit_pos = self.word.trailing_zeros() as usize;
self.word ^= 1 << bit_pos;
return Some(T::new(bit_pos + self.offset));
}
// Move onto the next word. `wrapping_add()` is needed to handle
// the degenerate initial value given to `offset` in `new()`.
self.word = *self.iter.next()?;
self.offset = self.offset.wrapping_add(WORD_BITS);
}
}
}
/// A fixed-size bitset type with a partially dense, partially sparse
/// representation. The bitset is broken into chunks, and chunks that are all
/// zeros or all ones are represented and handled very efficiently.
@ -305,6 +410,9 @@ impl<T: Idx> From<GrowableBitSet<T>> for BitSet<T> {
/// some stretches with lots of 0s and 1s mixed in a way that causes trouble
/// for `IntervalSet`.
///
/// Best used via `MixedBitSet`, rather than directly, because `MixedBitSet`
/// has better performance for small bitsets.
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
///
@ -958,117 +1066,12 @@ fn sequential_update<T: Idx>(
it.fold(false, |changed, elem| self_update(elem) | changed)
}
impl<T> Clone for BitSet<T> {
fn clone(&self) -> Self {
BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData }
}
fn clone_from(&mut self, from: &Self) {
self.domain_size = from.domain_size;
self.words.clone_from(&from.words);
}
}
impl<T: Idx> fmt::Debug for BitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
w.debug_list().entries(self.iter()).finish()
}
}
impl<T: Idx> fmt::Debug for ChunkedBitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
w.debug_list().entries(self.iter()).finish()
}
}
impl<T: Idx> ToString for BitSet<T> {
fn to_string(&self) -> String {
let mut result = String::new();
let mut sep = '[';
// Note: this is a little endian printout of bytes.
// i tracks how many bits we have printed so far.
let mut i = 0;
for word in &self.words {
let mut word = *word;
for _ in 0..WORD_BYTES {
// for each byte in `word`:
let remain = self.domain_size - i;
// If less than a byte remains, then mask just that many bits.
let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF };
assert!(mask <= 0xFF);
let byte = word & mask;
result.push_str(&format!("{sep}{byte:02x}"));
if remain <= 8 {
break;
}
word >>= 8;
i += 8;
sep = '-';
}
sep = '|';
}
result.push(']');
result
}
}
pub struct BitIter<'a, T: Idx> {
/// A copy of the current word, but with any already-visited bits cleared.
/// (This lets us use `trailing_zeros()` to find the next set bit.) When it
/// is reduced to 0, we move onto the next word.
word: Word,
/// The offset (measured in bits) of the current word.
offset: usize,
/// Underlying iterator over the words.
iter: slice::Iter<'a, Word>,
marker: PhantomData<T>,
}
impl<'a, T: Idx> BitIter<'a, T> {
#[inline]
fn new(words: &'a [Word]) -> BitIter<'a, T> {
// We initialize `word` and `offset` to degenerate values. On the first
// call to `next()` we will fall through to getting the first word from
// `iter`, which sets `word` to the first word (if there is one) and
// `offset` to 0. Doing it this way saves us from having to maintain
// additional state about whether we have started.
BitIter {
word: 0,
offset: usize::MAX - (WORD_BITS - 1),
iter: words.iter(),
marker: PhantomData,
}
}
}
impl<'a, T: Idx> Iterator for BitIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
loop {
if self.word != 0 {
// Get the position of the next set bit in the current word,
// then clear the bit.
let bit_pos = self.word.trailing_zeros() as usize;
self.word ^= 1 << bit_pos;
return Some(T::new(bit_pos + self.offset));
}
// Move onto the next word. `wrapping_add()` is needed to handle
// the degenerate initial value given to `offset` in `new()`.
self.word = *self.iter.next()?;
self.offset = self.offset.wrapping_add(WORD_BITS);
}
}
}
#[inline]
fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool
where
@ -1106,6 +1109,158 @@ where
false
}
/// A bitset with a mixed representation, using `BitSet` for small and medium
/// bitsets, and `ChunkedBitSet` for large bitsets, i.e. those with enough bits
/// for at least two chunks. This is a good choice for many bitsets that can
/// have large domain sizes (e.g. 5000+).
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
/// just be `usize`.
///
/// All operations that involve an element will panic if the element is equal
/// to or greater than the domain size. All operations that involve two bitsets
/// will panic if the bitsets have differing domain sizes.
#[derive(PartialEq, Eq)]
pub enum MixedBitSet<T> {
Small(BitSet<T>),
Large(ChunkedBitSet<T>),
}
impl<T> MixedBitSet<T> {
pub fn domain_size(&self) -> usize {
match self {
MixedBitSet::Small(set) => set.domain_size(),
MixedBitSet::Large(set) => set.domain_size(),
}
}
}
impl<T: Idx> MixedBitSet<T> {
#[inline]
pub fn new_empty(domain_size: usize) -> MixedBitSet<T> {
if domain_size <= CHUNK_BITS {
MixedBitSet::Small(BitSet::new_empty(domain_size))
} else {
MixedBitSet::Large(ChunkedBitSet::new_empty(domain_size))
}
}
#[inline]
pub fn is_empty(&self) -> bool {
match self {
MixedBitSet::Small(set) => set.is_empty(),
MixedBitSet::Large(set) => set.is_empty(),
}
}
#[inline]
pub fn contains(&self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.contains(elem),
MixedBitSet::Large(set) => set.contains(elem),
}
}
#[inline]
pub fn insert(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.insert(elem),
MixedBitSet::Large(set) => set.insert(elem),
}
}
pub fn insert_all(&mut self) {
match self {
MixedBitSet::Small(set) => set.insert_all(),
MixedBitSet::Large(set) => set.insert_all(),
}
}
#[inline]
pub fn remove(&mut self, elem: T) -> bool {
match self {
MixedBitSet::Small(set) => set.remove(elem),
MixedBitSet::Large(set) => set.remove(elem),
}
}
pub fn iter(&self) -> MixedBitIter<'_, T> {
match self {
MixedBitSet::Small(set) => MixedBitIter::Small(set.iter()),
MixedBitSet::Large(set) => MixedBitIter::Large(set.iter()),
}
}
bit_relations_inherent_impls! {}
}
impl<T> Clone for MixedBitSet<T> {
fn clone(&self) -> Self {
match self {
MixedBitSet::Small(set) => MixedBitSet::Small(set.clone()),
MixedBitSet::Large(set) => MixedBitSet::Large(set.clone()),
}
}
/// WARNING: this implementation of clone_from may panic if the two
/// bitsets have different domain sizes. This constraint is not inherent to
/// `clone_from`, but it works with the existing call sites and allows a
/// faster implementation, which is important because this function is hot.
fn clone_from(&mut self, from: &Self) {
match (self, from) {
(MixedBitSet::Small(set), MixedBitSet::Small(from)) => set.clone_from(from),
(MixedBitSet::Large(set), MixedBitSet::Large(from)) => set.clone_from(from),
_ => panic!("MixedBitSet size mismatch"),
}
}
}
impl<T: Idx> BitRelations<MixedBitSet<T>> for MixedBitSet<T> {
fn union(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.union(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.union(other),
_ => panic!("MixedBitSet size mismatch"),
}
}
fn subtract(&mut self, other: &MixedBitSet<T>) -> bool {
match (self, other) {
(MixedBitSet::Small(set), MixedBitSet::Small(other)) => set.subtract(other),
(MixedBitSet::Large(set), MixedBitSet::Large(other)) => set.subtract(other),
_ => panic!("MixedBitSet size mismatch"),
}
}
fn intersect(&mut self, _other: &MixedBitSet<T>) -> bool {
unimplemented!("implement if/when necessary");
}
}
impl<T: Idx> fmt::Debug for MixedBitSet<T> {
fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt(w),
MixedBitSet::Large(set) => set.fmt(w),
}
}
}
pub enum MixedBitIter<'a, T: Idx> {
Small(BitIter<'a, T>),
Large(ChunkedBitIter<'a, T>),
}
impl<'a, T: Idx> Iterator for MixedBitIter<'a, T> {
type Item = T;
fn next(&mut self) -> Option<T> {
match self {
MixedBitIter::Small(iter) => iter.next(),
MixedBitIter::Large(iter) => iter.next(),
}
}
}
/// A resizable bitset type with a dense representation.
///
/// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
@ -1374,7 +1529,7 @@ impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> {
/// sparse representation.
///
/// Initially, every row has no explicit representation. If any bit within a
/// row is set, the entire row is instantiated as `Some(<ChunkedBitSet>)`.
/// row is set, the entire row is instantiated as `Some(<BitSet>)`.
/// Furthermore, any previously uninstantiated rows prior to it will be
/// instantiated as `None`. Those prior rows may themselves become fully
/// instantiated later on if any of their bits are set.
@ -1388,7 +1543,7 @@ where
C: Idx,
{
num_columns: usize,
rows: IndexVec<R, Option<ChunkedBitSet<C>>>,
rows: IndexVec<R, Option<BitSet<C>>>,
}
impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
@ -1397,10 +1552,10 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
Self { num_columns, rows: IndexVec::new() }
}
fn ensure_row(&mut self, row: R) -> &mut ChunkedBitSet<C> {
// Instantiate any missing rows up to and including row `row` with an empty ChunkedBitSet.
// Then replace row `row` with a full ChunkedBitSet if necessary.
self.rows.get_or_insert_with(row, || ChunkedBitSet::new_empty(self.num_columns))
fn ensure_row(&mut self, row: R) -> &mut BitSet<C> {
// Instantiate any missing rows up to and including row `row` with an empty `BitSet`.
// Then replace row `row` with a full `BitSet` if necessary.
self.rows.get_or_insert_with(row, || BitSet::new_empty(self.num_columns))
}
/// Sets the cell at `(row, column)` to true. Put another way, insert
@ -1474,7 +1629,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
self.row(row).into_iter().flat_map(|r| r.iter())
}
pub fn row(&self, row: R) -> Option<&ChunkedBitSet<C>> {
pub fn row(&self, row: R) -> Option<&BitSet<C>> {
self.rows.get(row)?.as_ref()
}
@ -1484,7 +1639,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
/// Returns true if the row was changed.
pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool
where
ChunkedBitSet<C>: BitRelations<Set>,
BitSet<C>: BitRelations<Set>,
{
match self.rows.get_mut(row) {
Some(Some(row)) => row.intersect(set),
@ -1498,7 +1653,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
/// Returns true if the row was changed.
pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool
where
ChunkedBitSet<C>: BitRelations<Set>,
BitSet<C>: BitRelations<Set>,
{
match self.rows.get_mut(row) {
Some(Some(row)) => row.subtract(set),
@ -1512,7 +1667,7 @@ impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
/// Returns true if the row was changed.
pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool
where
ChunkedBitSet<C>: BitRelations<Set>,
BitSet<C>: BitRelations<Set>,
{
self.ensure_row(row).union(set)
}

View File

@ -503,15 +503,15 @@ fn sparse_matrix_operations() {
matrix.insert(2, 99);
matrix.insert(4, 0);
let mut disjoint: ChunkedBitSet<usize> = ChunkedBitSet::new_empty(100);
let mut disjoint: BitSet<usize> = BitSet::new_empty(100);
disjoint.insert(33);
let mut superset = ChunkedBitSet::new_empty(100);
let mut superset = BitSet::new_empty(100);
superset.insert(22);
superset.insert(75);
superset.insert(33);
let mut subset = ChunkedBitSet::new_empty(100);
let mut subset = BitSet::new_empty(100);
subset.insert(22);
// SparseBitMatrix::remove

View File

@ -4,7 +4,7 @@
use std::fmt;
use rustc_index::Idx;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, ChunkedBitSet, MixedBitSet};
use super::lattice::MaybeReachable;
@ -85,8 +85,8 @@ where
let size = self.domain_size();
assert_eq!(size, old.domain_size());
let mut set_in_self = ChunkedBitSet::new_empty(size);
let mut cleared_in_self = ChunkedBitSet::new_empty(size);
let mut set_in_self = MixedBitSet::new_empty(size);
let mut cleared_in_self = MixedBitSet::new_empty(size);
for i in (0..size).map(T::new) {
match (self.contains(i), old.contains(i)) {
@ -112,8 +112,8 @@ where
let size = self.domain_size();
assert_eq!(size, old.domain_size());
let mut set_in_self = ChunkedBitSet::new_empty(size);
let mut cleared_in_self = ChunkedBitSet::new_empty(size);
let mut set_in_self = MixedBitSet::new_empty(size);
let mut cleared_in_self = MixedBitSet::new_empty(size);
for i in (0..size).map(T::new) {
match (self.contains(i), old.contains(i)) {
@ -127,6 +127,26 @@ where
}
}
impl<T, C> DebugWithContext<C> for MixedBitSet<T>
where
T: Idx + DebugWithContext<C>,
{
fn fmt_with(&self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
MixedBitSet::Small(set) => set.fmt_with(ctxt, f),
MixedBitSet::Large(set) => set.fmt_with(ctxt, f),
}
}
fn fmt_diff_with(&self, old: &Self, ctxt: &C, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match (self, old) {
(MixedBitSet::Small(set), MixedBitSet::Small(old)) => set.fmt_diff_with(old, ctxt, f),
(MixedBitSet::Large(set), MixedBitSet::Large(old)) => set.fmt_diff_with(old, ctxt, f),
_ => panic!("MixedBitSet size mismatch"),
}
}
}
impl<S, C> DebugWithContext<C> for MaybeReachable<S>
where
S: DebugWithContext<C>,
@ -159,8 +179,8 @@ where
}
fn fmt_diff<T, C>(
inserted: &ChunkedBitSet<T>,
removed: &ChunkedBitSet<T>,
inserted: &MixedBitSet<T>,
removed: &MixedBitSet<T>,
ctxt: &C,
f: &mut fmt::Formatter<'_>,
) -> fmt::Result

View File

@ -40,7 +40,7 @@
use std::iter;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec};
use crate::framework::BitSetExt;
@ -126,7 +126,7 @@ impl<T: Idx> JoinSemiLattice for BitSet<T> {
}
}
impl<T: Idx> JoinSemiLattice for ChunkedBitSet<T> {
impl<T: Idx> JoinSemiLattice for MixedBitSet<T> {
fn join(&mut self, other: &Self) -> bool {
self.union(other)
}

View File

@ -35,7 +35,7 @@
use std::cmp::Ordering;
use rustc_data_structures::work_queue::WorkQueue;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, MixedBitSet};
use rustc_index::{Idx, IndexVec};
use rustc_middle::bug;
use rustc_middle::mir::{self, BasicBlock, CallReturnPlaces, Location, TerminatorEdges, traversal};
@ -71,7 +71,7 @@ impl<T: Idx> BitSetExt<T> for BitSet<T> {
}
}
impl<T: Idx> BitSetExt<T> for ChunkedBitSet<T> {
impl<T: Idx> BitSetExt<T> for MixedBitSet<T> {
fn contains(&self, elem: T) -> bool {
self.contains(elem)
}
@ -327,7 +327,7 @@ impl<T: Idx> GenKill<T> for BitSet<T> {
}
}
impl<T: Idx> GenKill<T> for ChunkedBitSet<T> {
impl<T: Idx> GenKill<T> for MixedBitSet<T> {
fn gen_(&mut self, elem: T) {
self.insert(elem);
}

View File

@ -1,7 +1,7 @@
use std::assert_matches::assert_matches;
use rustc_index::Idx;
use rustc_index::bit_set::{BitSet, ChunkedBitSet};
use rustc_index::bit_set::{BitSet, MixedBitSet};
use rustc_middle::bug;
use rustc_middle::mir::{self, Body, CallReturnPlaces, Location, TerminatorEdges};
use rustc_middle::ty::{self, TyCtxt};
@ -70,7 +70,7 @@ impl<'a, 'tcx> MaybeInitializedPlaces<'a, 'tcx> {
pub fn is_unwind_dead(
&self,
place: mir::Place<'tcx>,
state: &MaybeReachable<ChunkedBitSet<MovePathIndex>>,
state: &MaybeReachable<MixedBitSet<MovePathIndex>>,
) -> bool {
if let LookupResult::Exact(path) = self.move_data().rev_lookup.find(place.as_ref()) {
let mut maybe_live = false;
@ -244,8 +244,8 @@ impl<'tcx> MaybeUninitializedPlaces<'_, 'tcx> {
impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
/// There can be many more `MovePathIndex` than there are locals in a MIR body.
/// We use a chunked bitset to avoid paying too high a memory footprint.
type Domain = MaybeReachable<ChunkedBitSet<MovePathIndex>>;
/// We use a mixed bitset to avoid paying too high a memory footprint.
type Domain = MaybeReachable<MixedBitSet<MovePathIndex>>;
const NAME: &'static str = "maybe_init";
@ -256,7 +256,7 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
fn initialize_start_block(&self, _: &mir::Body<'tcx>, state: &mut Self::Domain) {
*state =
MaybeReachable::Reachable(ChunkedBitSet::new_empty(self.move_data().move_paths.len()));
MaybeReachable::Reachable(MixedBitSet::new_empty(self.move_data().move_paths.len()));
drop_flag_effects_for_function_entry(self.body, self.move_data, |path, s| {
assert!(s == DropFlagState::Present);
state.gen_(path);
@ -371,14 +371,14 @@ impl<'tcx> Analysis<'tcx> for MaybeInitializedPlaces<'_, 'tcx> {
impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
/// There can be many more `MovePathIndex` than there are locals in a MIR body.
/// We use a chunked bitset to avoid paying too high a memory footprint.
type Domain = ChunkedBitSet<MovePathIndex>;
/// We use a mixed bitset to avoid paying too high a memory footprint.
type Domain = MixedBitSet<MovePathIndex>;
const NAME: &'static str = "maybe_uninit";
fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
// bottom = initialized (start_block_effect counters this at outset)
ChunkedBitSet::new_empty(self.move_data().move_paths.len())
MixedBitSet::new_empty(self.move_data().move_paths.len())
}
// sets on_entry bits for Arg places
@ -492,14 +492,14 @@ impl<'tcx> Analysis<'tcx> for MaybeUninitializedPlaces<'_, 'tcx> {
impl<'tcx> Analysis<'tcx> for EverInitializedPlaces<'_, 'tcx> {
/// There can be many more `InitIndex` than there are locals in a MIR body.
/// We use a chunked bitset to avoid paying too high a memory footprint.
type Domain = ChunkedBitSet<InitIndex>;
/// We use a mixed bitset to avoid paying too high a memory footprint.
type Domain = MixedBitSet<InitIndex>;
const NAME: &'static str = "ever_init";
fn bottom_value(&self, _: &mir::Body<'tcx>) -> Self::Domain {
// bottom = no initialized variables by default
ChunkedBitSet::new_empty(self.move_data().inits.len())
MixedBitSet::new_empty(self.move_data().inits.len())
}
fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut Self::Domain) {

View File

@ -7,7 +7,7 @@ use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_errors::Subdiagnostic;
use rustc_hir::CRATE_HIR_ID;
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_index::bit_set::ChunkedBitSet;
use rustc_index::bit_set::MixedBitSet;
use rustc_index::{IndexSlice, IndexVec};
use rustc_macros::{LintDiagnostic, Subdiagnostic};
use rustc_middle::bug;
@ -49,24 +49,24 @@ struct DropsReachable<'a, 'mir, 'tcx> {
move_data: &'a MoveData<'tcx>,
maybe_init: &'a mut ResultsCursor<'mir, 'tcx, MaybeInitializedPlaces<'mir, 'tcx>>,
block_drop_value_info: &'a mut IndexSlice<BasicBlock, MovePathIndexAtBlock>,
collected_drops: &'a mut ChunkedBitSet<MovePathIndex>,
visited: FxHashMap<BasicBlock, Rc<RefCell<ChunkedBitSet<MovePathIndex>>>>,
collected_drops: &'a mut MixedBitSet<MovePathIndex>,
visited: FxHashMap<BasicBlock, Rc<RefCell<MixedBitSet<MovePathIndex>>>>,
}
impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
fn visit(&mut self, block: BasicBlock) {
let move_set_size = self.move_data.move_paths.len();
let make_new_path_set = || Rc::new(RefCell::new(ChunkedBitSet::new_empty(move_set_size)));
let make_new_path_set = || Rc::new(RefCell::new(MixedBitSet::new_empty(move_set_size)));
let data = &self.body.basic_blocks[block];
let Some(terminator) = &data.terminator else { return };
// Given that we observe these dropped locals here at `block` so far,
// we will try to update the successor blocks.
// An occupied entry at `block` in `self.visited` signals that we have visited `block` before.
// Given that we observe these dropped locals here at `block` so far, we will try to update
// the successor blocks. An occupied entry at `block` in `self.visited` signals that we
// have visited `block` before.
let dropped_local_here =
Rc::clone(self.visited.entry(block).or_insert_with(make_new_path_set));
// We could have invoked reverse lookup for a `MovePathIndex` every time, but unfortunately it is expensive.
// Let's cache them in `self.block_drop_value_info`.
// We could have invoked reverse lookup for a `MovePathIndex` every time, but unfortunately
// it is expensive. Let's cache them in `self.block_drop_value_info`.
match self.block_drop_value_info[block] {
MovePathIndexAtBlock::Some(dropped) => {
dropped_local_here.borrow_mut().insert(dropped);
@ -76,23 +76,24 @@ impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
&& let LookupResult::Exact(idx) | LookupResult::Parent(Some(idx)) =
self.move_data.rev_lookup.find(place.as_ref())
{
// Since we are working with MIRs at a very early stage,
// observing a `drop` terminator is not indicative enough that
// the drop will definitely happen.
// That is decided in the drop elaboration pass instead.
// Therefore, we need to consult with the maybe-initialization information.
// Since we are working with MIRs at a very early stage, observing a `drop`
// terminator is not indicative enough that the drop will definitely happen.
// That is decided in the drop elaboration pass instead. Therefore, we need to
// consult with the maybe-initialization information.
self.maybe_init.seek_before_primary_effect(Location {
block,
statement_index: data.statements.len(),
});
// Check if the drop of `place` under inspection is really in effect.
// This is true only when `place` may have been initialized along a control flow path from a BID to the drop program point today.
// In other words, this is where the drop of `place` will happen in the future instead.
// Check if the drop of `place` under inspection is really in effect. This is
// true only when `place` may have been initialized along a control flow path
// from a BID to the drop program point today. In other words, this is where
// the drop of `place` will happen in the future instead.
if let MaybeReachable::Reachable(maybe_init) = self.maybe_init.get()
&& maybe_init.contains(idx)
{
// We also cache the drop information, so that we do not need to check on data-flow cursor again
// We also cache the drop information, so that we do not need to check on
// data-flow cursor again.
self.block_drop_value_info[block] = MovePathIndexAtBlock::Some(idx);
dropped_local_here.borrow_mut().insert(idx);
} else {
@ -139,8 +140,9 @@ impl<'a, 'mir, 'tcx> DropsReachable<'a, 'mir, 'tcx> {
// Let's check the observed dropped places in.
self.collected_drops.union(&*dropped_local_there.borrow());
if self.drop_span.is_none() {
// FIXME(@dingxiangfei2009): it turns out that `self.body.source_scopes` are still a bit wonky.
// There is a high chance that this span still points to a block rather than a statement semicolon.
// FIXME(@dingxiangfei2009): it turns out that `self.body.source_scopes` are
// still a bit wonky. There is a high chance that this span still points to a
// block rather than a statement semicolon.
*self.drop_span = Some(terminator.source_info.span);
}
// Now we have discovered a simple control flow path from a future drop point
@ -394,10 +396,10 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
for (&block, candidates) in &bid_per_block {
// We will collect drops on locals on paths between BID points to their actual drop locations
// into `all_locals_dropped`.
let mut all_locals_dropped = ChunkedBitSet::new_empty(move_data.move_paths.len());
let mut all_locals_dropped = MixedBitSet::new_empty(move_data.move_paths.len());
let mut drop_span = None;
for &(_, place) in candidates.iter() {
let mut collected_drops = ChunkedBitSet::new_empty(move_data.move_paths.len());
let mut collected_drops = MixedBitSet::new_empty(move_data.move_paths.len());
// ## On detecting change in relative drop order ##
// Iterate through each BID-containing block `block`.
// If the place `P` targeted by the BID is "maybe initialized",
@ -425,8 +427,9 @@ pub(crate) fn run_lint<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId, body: &Body<
// We shall now exclude some local bindings for the following cases.
{
let mut to_exclude = ChunkedBitSet::new_empty(all_locals_dropped.domain_size());
// We will now do subtraction from the candidate dropped locals, because of the following reasons.
let mut to_exclude = MixedBitSet::new_empty(all_locals_dropped.domain_size());
// We will now do subtraction from the candidate dropped locals, because of the
// following reasons.
for path_idx in all_locals_dropped.iter() {
let move_path = &move_data.move_paths[path_idx];
let dropped_local = move_path.place.local;

View File

@ -1,5 +1,5 @@
use rustc_abi::FieldIdx;
use rustc_index::bit_set::ChunkedBitSet;
use rustc_index::bit_set::MixedBitSet;
use rustc_middle::mir::{Body, TerminatorKind};
use rustc_middle::ty::{self, GenericArgsRef, Ty, TyCtxt, VariantDef};
use rustc_mir_dataflow::impls::MaybeInitializedPlaces;
@ -67,7 +67,7 @@ impl<'tcx> crate::MirPass<'tcx> for RemoveUninitDrops {
fn is_needs_drop_and_init<'tcx>(
tcx: TyCtxt<'tcx>,
typing_env: ty::TypingEnv<'tcx>,
maybe_inits: &ChunkedBitSet<MovePathIndex>,
maybe_inits: &MixedBitSet<MovePathIndex>,
move_data: &MoveData<'tcx>,
ty: Ty<'tcx>,
mpi: MovePathIndex,