coverage: Move most span processing back into coverage::spans

2024-11-22 23:04:33 +00:00 · 2024-06-16 01:11:32 +10:00 · 2024-06-16 01:11:32 +10:00 · bf74fb1d2f
commit bf74fb1d2f
parent e102d2dbd6
2 changed files with 157 additions and 155 deletions
--- a/compiler/rustc_mir_transform/src/coverage/spans.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans.rs
@ -1,9 +1,15 @@
+use std::collections::VecDeque;
+
+use rustc_data_structures::captures::Captures;
+use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::mir;
 use rustc_span::Span;

 use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph};
 use crate::coverage::mappings;
-use crate::coverage::spans::from_mir::SpanFromMir;
+use crate::coverage::spans::from_mir::{
+    extract_covspans_and_holes_from_mir, ExtractedCovspans, SpanFromMir,
+};
 use crate::coverage::ExtractedHirInfo;

 mod from_mir;
@ -19,9 +25,68 @@ pub(super) fn extract_refined_covspans(
    basic_coverage_blocks: &CoverageGraph,
    code_mappings: &mut impl Extend<mappings::CodeMapping>,
 ) {
-    let buckets =
-        from_mir::mir_to_initial_sorted_coverage_spans(mir_body, hir_info, basic_coverage_blocks);
-    for covspans in buckets {
+    let ExtractedCovspans { mut covspans, mut holes } =
+        extract_covspans_and_holes_from_mir(mir_body, hir_info, basic_coverage_blocks);
+
+    covspans.sort_by(|a, b| basic_coverage_blocks.cmp_in_dominator_order(a.bcb, b.bcb));
+    remove_unwanted_macro_spans(&mut covspans);
+    split_visible_macro_spans(&mut covspans);
+
+    let compare_covspans = |a: &SpanFromMir, b: &SpanFromMir| {
+        compare_spans(a.span, b.span)
+            // After deduplication, we want to keep only the most-dominated BCB.
+            .then_with(|| basic_coverage_blocks.cmp_in_dominator_order(a.bcb, b.bcb).reverse())
+    };
+    covspans.sort_by(compare_covspans);
+
+    // Among covspans with the same span, keep only one,
+    // preferring the one with the most-dominated BCB.
+    // (Ideally we should try to preserve _all_ non-dominating BCBs, but that
+    // requires a lot more complexity in the span refiner, for little benefit.)
+    covspans.dedup_by(|b, a| a.span.source_equal(b.span));
+
+    // Sort the holes, and merge overlapping/adjacent holes.
+    holes.sort_by(|a, b| compare_spans(a.span, b.span));
+    holes.dedup_by(|b, a| a.merge_if_overlapping_or_adjacent(b));
+
+    // Now we're ready to start carving holes out of the initial coverage spans,
+    // and grouping them in buckets separated by the holes.
+
+    let mut input_covspans = VecDeque::from(covspans);
+    let mut fragments: Vec<SpanFromMir> = vec![];
+
+    // For each hole:
+    // - Identify the spans that are entirely or partly before the hole.
+    // - Put those spans in a corresponding bucket, truncated to the start of the hole.
+    // - If one of those spans also extends after the hole, put the rest of it
+    //   in a "fragments" vector that is processed by the next hole.
+    let mut buckets = (0..holes.len()).map(|_| vec![]).collect::<Vec<_>>();
+    for (hole, bucket) in holes.iter().zip(&mut buckets) {
+        let fragments_from_prev = std::mem::take(&mut fragments);
+
+        // Only inspect spans that precede or overlap this hole,
+        // leaving the rest to be inspected by later holes.
+        // (This relies on the spans and holes both being sorted.)
+        let relevant_input_covspans =
+            drain_front_while(&mut input_covspans, |c| c.span.lo() < hole.span.hi());
+
+        for covspan in fragments_from_prev.into_iter().chain(relevant_input_covspans) {
+            let (before, after) = covspan.split_around_hole_span(hole.span);
+            bucket.extend(before);
+            fragments.extend(after);
+        }
+    }
+
+    // After finding the spans before each hole, any remaining fragments/spans
+    // form their own final bucket, after the final hole.
+    // (If there were no holes, this will just be all of the initial spans.)
+    fragments.extend(input_covspans);
+    buckets.push(fragments);
+
+    for mut covspans in buckets {
+        // Make sure each individual bucket is internally sorted.
+        covspans.sort_by(compare_covspans);
+
        let covspans = refine_sorted_spans(covspans);
        code_mappings.extend(covspans.into_iter().map(|RefinedCovspan { span, bcb }| {
            // Each span produced by the refiner represents an ordinary code region.
@ -30,6 +95,56 @@ pub(super) fn extract_refined_covspans(
    }
 }

+/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
+/// multiple condition/consequent blocks that have the span of the whole macro
+/// invocation, which is unhelpful. Keeping only the first such span seems to
+/// give better mappings, so remove the others.
+///
+/// (The input spans should be sorted in BCB dominator order, so that the
+/// retained "first" span is likely to dominate the others.)
+fn remove_unwanted_macro_spans(covspans: &mut Vec<SpanFromMir>) {
+    let mut seen_macro_spans = FxHashSet::default();
+    covspans.retain(|covspan| {
+        // Ignore (retain) non-macro-expansion spans.
+        if covspan.visible_macro.is_none() {
+            return true;
+        }
+
+        // Retain only the first macro-expanded covspan with this span.
+        seen_macro_spans.insert(covspan.span)
+    });
+}
+
+/// When a span corresponds to a macro invocation that is visible from the
+/// function body, split it into two parts. The first part covers just the
+/// macro name plus `!`, and the second part covers the rest of the macro
+/// invocation. This seems to give better results for code that uses macros.
+fn split_visible_macro_spans(covspans: &mut Vec<SpanFromMir>) {
+    let mut extra_spans = vec![];
+
+    covspans.retain(|covspan| {
+        let Some(visible_macro) = covspan.visible_macro else { return true };
+
+        let split_len = visible_macro.as_str().len() as u32 + 1;
+        let (before, after) = covspan.span.split_at(split_len);
+        if !covspan.span.contains(before) || !covspan.span.contains(after) {
+            // Something is unexpectedly wrong with the split point.
+            // The debug assertion in `split_at` will have already caught this,
+            // but in release builds it's safer to do nothing and maybe get a
+            // bug report for unexpected coverage, rather than risk an ICE.
+            return true;
+        }
+
+        extra_spans.push(SpanFromMir::new(before, covspan.visible_macro, covspan.bcb));
+        extra_spans.push(SpanFromMir::new(after, covspan.visible_macro, covspan.bcb));
+        false // Discard the original covspan that we just split.
+    });
+
+    // The newly-split spans are added at the end, so any previous sorting
+    // is not preserved.
+    covspans.extend(extra_spans);
+}
+
 #[derive(Debug)]
 struct RefinedCovspan {
    span: Span,
@ -47,6 +162,15 @@ impl RefinedCovspan {
    }
 }

+/// Similar to `.drain(..)`, but stops just before it would remove an item not
+/// satisfying the predicate.
+fn drain_front_while<'a, T>(
+    queue: &'a mut VecDeque<T>,
+    mut pred_fn: impl FnMut(&T) -> bool,
+) -> impl Iterator<Item = T> + Captures<'a> {
+    std::iter::from_fn(move || if pred_fn(queue.front()?) { queue.pop_front() } else { None })
+}
+
 /// Takes one of the buckets of (sorted) spans extracted from MIR, and "refines"
 /// those spans by removing spans that overlap in unwanted ways, and by merging
 /// compatible adjacent spans.
@ -94,3 +218,15 @@ fn refine_sorted_spans(sorted_spans: Vec<SpanFromMir>) -> Vec<RefinedCovspan> {

    refined
 }
+
+/// Compares two spans in (lo ascending, hi descending) order.
+fn compare_spans(a: Span, b: Span) -> std::cmp::Ordering {
+    // First sort by span start.
+    Ord::cmp(&a.lo(), &b.lo())
+        // If span starts are the same, sort by span end in reverse order.
+        // This ensures that if spans A and B are adjacent in the list,
+        // and they overlap but are not equal, then either:
+        // - Span A extends further left, or
+        // - Both have the same start and span A extends further right
+        .then_with(|| Ord::cmp(&a.hi(), &b.hi()).reverse())
+}
--- a/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
+++ b/compiler/rustc_mir_transform/src/coverage/spans/from_mir.rs
@ -1,7 +1,3 @@
-use std::collections::VecDeque;
-
-use rustc_data_structures::captures::Captures;
-use rustc_data_structures::fx::FxHashSet;
 use rustc_middle::bug;
 use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::mir::{
@ -15,20 +11,19 @@ use crate::coverage::graph::{
 };
 use crate::coverage::ExtractedHirInfo;

+pub(crate) struct ExtractedCovspans {
+    pub(crate) covspans: Vec<SpanFromMir>,
+    pub(crate) holes: Vec<Hole>,
+}
+
 /// Traverses the MIR body to produce an initial collection of coverage-relevant
 /// spans, each associated with a node in the coverage graph (BCB) and possibly
 /// other metadata.
-///
-/// The returned spans are divided into one or more buckets, such that:
-/// - The spans in each bucket are strictly after all spans in previous buckets,
-///   and strictly before all spans in subsequent buckets.
-/// - The contents of each bucket are also sorted, in a specific order that is
-///   expected by the subsequent span-refinement step.
-pub(super) fn mir_to_initial_sorted_coverage_spans(
+pub(crate) fn extract_covspans_and_holes_from_mir(
    mir_body: &mir::Body<'_>,
    hir_info: &ExtractedHirInfo,
    basic_coverage_blocks: &CoverageGraph,
-) -> Vec<Vec<SpanFromMir>> {
+) -> ExtractedCovspans {
    let &ExtractedHirInfo { body_span, .. } = hir_info;

    let mut covspans = vec![];
@ -54,136 +49,7 @@ pub(super) fn mir_to_initial_sorted_coverage_spans(
        covspans.push(SpanFromMir::for_fn_sig(fn_sig_span));
    }

-    covspans.sort_by(|a, b| basic_coverage_blocks.cmp_in_dominator_order(a.bcb, b.bcb));
-    remove_unwanted_macro_spans(&mut covspans);
-    split_visible_macro_spans(&mut covspans);
-
-    let compare_covspans = |a: &SpanFromMir, b: &SpanFromMir| {
-        compare_spans(a.span, b.span)
-            // After deduplication, we want to keep only the most-dominated BCB.
-            .then_with(|| basic_coverage_blocks.cmp_in_dominator_order(a.bcb, b.bcb).reverse())
-    };
-    covspans.sort_by(compare_covspans);
-
-    // Among covspans with the same span, keep only one,
-    // preferring the one with the most-dominated BCB.
-    // (Ideally we should try to preserve _all_ non-dominating BCBs, but that
-    // requires a lot more complexity in the span refiner, for little benefit.)
-    covspans.dedup_by(|b, a| a.span.source_equal(b.span));
-
-    // Sort the holes, and merge overlapping/adjacent holes.
-    holes.sort_by(|a, b| compare_spans(a.span, b.span));
-    holes.dedup_by(|b, a| a.merge_if_overlapping_or_adjacent(b));
-
-    // Now we're ready to start carving holes out of the initial coverage spans,
-    // and grouping them in buckets separated by the holes.
-
-    let mut input_covspans = VecDeque::from(covspans);
-    let mut fragments: Vec<SpanFromMir> = vec![];
-
-    // For each hole:
-    // - Identify the spans that are entirely or partly before the hole.
-    // - Put those spans in a corresponding bucket, truncated to the start of the hole.
-    // - If one of those spans also extends after the hole, put the rest of it
-    //   in a "fragments" vector that is processed by the next hole.
-    let mut buckets = (0..holes.len()).map(|_| vec![]).collect::<Vec<_>>();
-    for (hole, bucket) in holes.iter().zip(&mut buckets) {
-        let fragments_from_prev = std::mem::take(&mut fragments);
-
-        // Only inspect spans that precede or overlap this hole,
-        // leaving the rest to be inspected by later holes.
-        // (This relies on the spans and holes both being sorted.)
-        let relevant_input_covspans =
-            drain_front_while(&mut input_covspans, |c| c.span.lo() < hole.span.hi());
-
-        for covspan in fragments_from_prev.into_iter().chain(relevant_input_covspans) {
-            let (before, after) = covspan.split_around_hole_span(hole.span);
-            bucket.extend(before);
-            fragments.extend(after);
-        }
-    }
-
-    // After finding the spans before each hole, any remaining fragments/spans
-    // form their own final bucket, after the final hole.
-    // (If there were no holes, this will just be all of the initial spans.)
-    fragments.extend(input_covspans);
-    buckets.push(fragments);
-
-    // Make sure each individual bucket is still internally sorted.
-    for covspans in &mut buckets {
-        covspans.sort_by(compare_covspans);
-    }
-    buckets
-}
-
-fn compare_spans(a: Span, b: Span) -> std::cmp::Ordering {
-    // First sort by span start.
-    Ord::cmp(&a.lo(), &b.lo())
-        // If span starts are the same, sort by span end in reverse order.
-        // This ensures that if spans A and B are adjacent in the list,
-        // and they overlap but are not equal, then either:
-        // - Span A extends further left, or
-        // - Both have the same start and span A extends further right
-        .then_with(|| Ord::cmp(&a.hi(), &b.hi()).reverse())
-}
-
-/// Similar to `.drain(..)`, but stops just before it would remove an item not
-/// satisfying the predicate.
-fn drain_front_while<'a, T>(
-    queue: &'a mut VecDeque<T>,
-    mut pred_fn: impl FnMut(&T) -> bool,
-) -> impl Iterator<Item = T> + Captures<'a> {
-    std::iter::from_fn(move || if pred_fn(queue.front()?) { queue.pop_front() } else { None })
-}
-
-/// Macros that expand into branches (e.g. `assert!`, `trace!`) tend to generate
-/// multiple condition/consequent blocks that have the span of the whole macro
-/// invocation, which is unhelpful. Keeping only the first such span seems to
-/// give better mappings, so remove the others.
-///
-/// (The input spans should be sorted in BCB dominator order, so that the
-/// retained "first" span is likely to dominate the others.)
-fn remove_unwanted_macro_spans(covspans: &mut Vec<SpanFromMir>) {
-    let mut seen_macro_spans = FxHashSet::default();
-    covspans.retain(|covspan| {
-        // Ignore (retain) non-macro-expansion spans.
-        if covspan.visible_macro.is_none() {
-            return true;
-        }
-
-        // Retain only the first macro-expanded covspan with this span.
-        seen_macro_spans.insert(covspan.span)
-    });
-}
-
-/// When a span corresponds to a macro invocation that is visible from the
-/// function body, split it into two parts. The first part covers just the
-/// macro name plus `!`, and the second part covers the rest of the macro
-/// invocation. This seems to give better results for code that uses macros.
-fn split_visible_macro_spans(covspans: &mut Vec<SpanFromMir>) {
-    let mut extra_spans = vec![];
-
-    covspans.retain(|covspan| {
-        let Some(visible_macro) = covspan.visible_macro else { return true };
-
-        let split_len = visible_macro.as_str().len() as u32 + 1;
-        let (before, after) = covspan.span.split_at(split_len);
-        if !covspan.span.contains(before) || !covspan.span.contains(after) {
-            // Something is unexpectedly wrong with the split point.
-            // The debug assertion in `split_at` will have already caught this,
-            // but in release builds it's safer to do nothing and maybe get a
-            // bug report for unexpected coverage, rather than risk an ICE.
-            return true;
-        }
-
-        extra_spans.push(SpanFromMir::new(before, covspan.visible_macro, covspan.bcb));
-        extra_spans.push(SpanFromMir::new(after, covspan.visible_macro, covspan.bcb));
-        false // Discard the original covspan that we just split.
-    });
-
-    // The newly-split spans are added at the end, so any previous sorting
-    // is not preserved.
-    covspans.extend(extra_spans);
+    ExtractedCovspans { covspans, holes }
 }

 // Generate a set of coverage spans from the filtered set of `Statement`s and `Terminator`s of
@ -402,12 +268,12 @@ fn unexpand_into_body_span_with_prev(
 }

 #[derive(Debug)]
-struct Hole {
-    span: Span,
+pub(crate) struct Hole {
+    pub(crate) span: Span,
 }

 impl Hole {
-    fn merge_if_overlapping_or_adjacent(&mut self, other: &mut Self) -> bool {
+    pub(crate) fn merge_if_overlapping_or_adjacent(&mut self, other: &mut Self) -> bool {
        if !self.span.overlaps_or_adjacent(other.span) {
            return false;
        }
@ -418,7 +284,7 @@ impl Hole {
 }

 #[derive(Debug)]
-pub(super) struct SpanFromMir {
+pub(crate) struct SpanFromMir {
    /// A span that has been extracted from MIR and then "un-expanded" back to
    /// within the current function's `body_span`. After various intermediate
    /// processing steps, this span is emitted as part of the final coverage
@ -426,9 +292,9 @@ pub(super) struct SpanFromMir {
    ///
    /// With the exception of `fn_sig_span`, this should always be contained
    /// within `body_span`.
-    pub(super) span: Span,
-    visible_macro: Option<Symbol>,
-    pub(super) bcb: BasicCoverageBlock,
+    pub(crate) span: Span,
+    pub(crate) visible_macro: Option<Symbol>,
+    pub(crate) bcb: BasicCoverageBlock,
 }

 impl SpanFromMir {
@ -436,14 +302,14 @@ impl SpanFromMir {
        Self::new(fn_sig_span, None, START_BCB)
    }

-    fn new(span: Span, visible_macro: Option<Symbol>, bcb: BasicCoverageBlock) -> Self {
+    pub(crate) fn new(span: Span, visible_macro: Option<Symbol>, bcb: BasicCoverageBlock) -> Self {
        Self { span, visible_macro, bcb }
    }

    /// Splits this span into 0-2 parts:
    /// - The part that is strictly before the hole span, if any.
    /// - The part that is strictly after the hole span, if any.
-    fn split_around_hole_span(&self, hole_span: Span) -> (Option<Self>, Option<Self>) {
+    pub(crate) fn split_around_hole_span(&self, hole_span: Span) -> (Option<Self>, Option<Self>) {
        let before = try {
            let span = self.span.trim_end(hole_span)?;
            Self { span, ..*self }