coverage: Use a query to identify which counter/expression IDs are used

2025-01-19 11:12:43 +00:00 · 2024-11-24 18:26:05 +11:00 · 2024-11-24 18:26:05 +11:00 · 6fc0fe76e8
commit 6fc0fe76e8
parent 121a17ccc3
7 changed files with 93 additions and 92 deletions
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/map_data.rs
@ -1,12 +1,13 @@
 use rustc_data_structures::captures::Captures;
 use rustc_data_structures::fx::FxIndexSet;
 use rustc_index::bit_set::BitSet;
+use rustc_middle::mir::CoverageIdsInfo;
 use rustc_middle::mir::coverage::{
    CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op,
    SourceRegion,
 };
 use rustc_middle::ty::Instance;
-use tracing::{debug, instrument};
+use tracing::debug;

 use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};

@ -16,17 +17,8 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
 pub(crate) struct FunctionCoverageCollector<'tcx> {
    /// Coverage info that was attached to this function by the instrumentor.
    function_coverage_info: &'tcx FunctionCoverageInfo,
+    ids_info: &'tcx CoverageIdsInfo,
    is_used: bool,
-
-    /// Tracks which counters have been seen, so that we can identify mappings
-    /// to counters that were optimized out, and set them to zero.
-    counters_seen: BitSet<CounterId>,
-    /// Contains all expression IDs that have been seen in an `ExpressionUsed`
-    /// coverage statement, plus all expression IDs that aren't directly used
-    /// by any mappings (and therefore do not have expression-used statements).
-    /// After MIR traversal is finished, we can conclude that any IDs missing
-    /// from this set must have had their statements deleted by MIR opts.
-    expressions_seen: BitSet<ExpressionId>,
 }

 impl<'tcx> FunctionCoverageCollector<'tcx> {
@ -34,21 +26,24 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
    pub(crate) fn new(
        instance: Instance<'tcx>,
        function_coverage_info: &'tcx FunctionCoverageInfo,
+        ids_info: &'tcx CoverageIdsInfo,
    ) -> Self {
-        Self::create(instance, function_coverage_info, true)
+        Self::create(instance, function_coverage_info, ids_info, true)
    }

    /// Creates a new set of coverage data for an unused (never called) function.
    pub(crate) fn unused(
        instance: Instance<'tcx>,
        function_coverage_info: &'tcx FunctionCoverageInfo,
+        ids_info: &'tcx CoverageIdsInfo,
    ) -> Self {
-        Self::create(instance, function_coverage_info, false)
+        Self::create(instance, function_coverage_info, ids_info, false)
    }

    fn create(
        instance: Instance<'tcx>,
        function_coverage_info: &'tcx FunctionCoverageInfo,
+        ids_info: &'tcx CoverageIdsInfo,
        is_used: bool,
    ) -> Self {
        let num_counters = function_coverage_info.num_counters;
@ -58,44 +53,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
            num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}"
        );

-        // Create a filled set of expression IDs, so that expressions not
-        // directly used by mappings will be treated as "seen".
-        // (If they end up being unused, LLVM will delete them for us.)
-        let mut expressions_seen = BitSet::new_filled(num_expressions);
-        // For each expression ID that is directly used by one or more mappings,
-        // mark it as not-yet-seen. This indicates that we expect to see a
-        // corresponding `ExpressionUsed` statement during MIR traversal.
-        for mapping in function_coverage_info.mappings.iter() {
-            // Currently we only worry about ordinary code mappings.
-            // For branch and MC/DC mappings, expressions might not correspond
-            // to any particular point in the control-flow graph.
-            // (Keep this in sync with the injection of `ExpressionUsed`
-            // statements in the `InstrumentCoverage` MIR pass.)
-            if let MappingKind::Code(term) = mapping.kind
-                && let CovTerm::Expression(id) = term
-            {
-                expressions_seen.remove(id);
-            }
-        }
-
-        Self {
-            function_coverage_info,
-            is_used,
-            counters_seen: BitSet::new_empty(num_counters),
-            expressions_seen,
-        }
-    }
-
-    /// Marks a counter ID as having been seen in a counter-increment statement.
-    #[instrument(level = "debug", skip(self))]
-    pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) {
-        self.counters_seen.insert(id);
-    }
-
-    /// Marks an expression ID as having been seen in an expression-used statement.
-    #[instrument(level = "debug", skip(self))]
-    pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) {
-        self.expressions_seen.insert(id);
+        Self { function_coverage_info, ids_info, is_used }
    }

    /// Identify expressions that will always have a value of zero, and note
@ -117,7 +75,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
        // (By construction, expressions can only refer to other expressions
        // that have lower IDs, so one pass is sufficient.)
        for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() {
-            if !self.is_used || !self.expressions_seen.contains(id) {
+            if !self.is_used || !self.ids_info.expressions_seen.contains(id) {
                // If an expression was not seen, it must have been optimized away,
                // so any operand that refers to it can be replaced with zero.
                zero_expressions.insert(id);
@ -146,7 +104,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
                    assert_operand_expression_is_lower(id);
                }

-                if is_zero_term(&self.counters_seen, &zero_expressions, *operand) {
+                if is_zero_term(&self.ids_info.counters_seen, &zero_expressions, *operand) {
                    *operand = CovTerm::Zero;
                }
            };
@ -172,17 +130,17 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {

    pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
        let zero_expressions = self.identify_zero_expressions();
-        let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self;
+        let FunctionCoverageCollector { function_coverage_info, ids_info, is_used, .. } = self;

-        FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions }
+        FunctionCoverage { function_coverage_info, ids_info, is_used, zero_expressions }
    }
 }

 pub(crate) struct FunctionCoverage<'tcx> {
    pub(crate) function_coverage_info: &'tcx FunctionCoverageInfo,
+    ids_info: &'tcx CoverageIdsInfo,
    is_used: bool,

-    counters_seen: BitSet<CounterId>,
    zero_expressions: ZeroExpressions,
 }

@ -238,7 +196,7 @@ impl<'tcx> FunctionCoverage<'tcx> {
    }

    fn is_zero_term(&self, term: CovTerm) -> bool {
-        !self.is_used || is_zero_term(&self.counters_seen, &self.zero_expressions, term)
+        !self.is_used || is_zero_term(&self.ids_info.counters_seen, &self.zero_expressions, term)
    }
 }

--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mapgen.rs
@ -536,7 +536,11 @@ fn add_unused_function_coverage<'tcx>(
    );

    // An unused function's mappings will all be rewritten to map to zero.
-    let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info);
+    let function_coverage = FunctionCoverageCollector::unused(
+        instance,
+        function_coverage_info,
+        tcx.coverage_ids_info(instance.def),
+    );

    cx.coverage_cx().function_coverage_map.borrow_mut().insert(instance, function_coverage);
 }
--- a/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/coverageinfo/mod.rs
@ -157,27 +157,28 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
            return;
        };

-        let mut coverage_map = coverage_cx.function_coverage_map.borrow_mut();
-        let func_coverage = coverage_map
-            .entry(instance)
-            .or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info));
+        // Mark the instance as used in this CGU, for coverage purposes.
+        // This includes functions that were not partitioned into this CGU,
+        // but were MIR-inlined into one of this CGU's functions.
+        coverage_cx.function_coverage_map.borrow_mut().entry(instance).or_insert_with(|| {
+            FunctionCoverageCollector::new(
+                instance,
+                function_coverage_info,
+                bx.tcx.coverage_ids_info(instance.def),
+            )
+        });

        match *kind {
            CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
                "marker statement {kind:?} should have been removed by CleanupPostBorrowck"
            ),
            CoverageKind::CounterIncrement { id } => {
-                func_coverage.mark_counter_id_seen(id);
-                // We need to explicitly drop the `RefMut` before calling into
-                // `instrprof_increment`, as that needs an exclusive borrow.
-                drop(coverage_map);
-
                // The number of counters passed to `llvm.instrprof.increment` might
                // be smaller than the number originally inserted by the instrumentor,
                // if some high-numbered counters were removed by MIR optimizations.
                // If so, LLVM's profiler runtime will use fewer physical counters.
                let num_counters =
-                    bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
+                    bx.tcx().coverage_ids_info(instance.def).num_counters_after_mir_opts();
                assert!(
                    num_counters as usize <= function_coverage_info.num_counters,
                    "num_counters disagreement: query says {num_counters} but function info only has {}",
@ -194,11 +195,11 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                );
                bx.instrprof_increment(fn_name, hash, num_counters, index);
            }
-            CoverageKind::ExpressionUsed { id } => {
-                func_coverage.mark_expression_id_seen(id);
+            CoverageKind::ExpressionUsed { id: _ } => {
+                // Expression-used statements are markers that are handled by
+                // `coverage_ids_info`, so there's nothing to codegen here.
            }
            CoverageKind::CondBitmapUpdate { index, decision_depth } => {
-                drop(coverage_map);
                let cond_bitmap = coverage_cx
                    .try_get_mcdc_condition_bitmap(&instance, decision_depth)
                    .expect("mcdc cond bitmap should have been allocated for updating");
@ -206,7 +207,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
                bx.mcdc_condbitmap_update(cond_index, cond_bitmap);
            }
            CoverageKind::TestVectorBitmapUpdate { bitmap_idx, decision_depth } => {
-                drop(coverage_map);
                let cond_bitmap =
                    coverage_cx.try_get_mcdc_condition_bitmap(&instance, decision_depth).expect(
                        "mcdc cond bitmap should have been allocated for merging \
--- a/compiler/rustc_middle/src/mir/query.rs
+++ b/compiler/rustc_middle/src/mir/query.rs
@ -8,7 +8,7 @@ use rustc_abi::{FieldIdx, VariantIdx};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_errors::ErrorGuaranteed;
 use rustc_hir::def_id::LocalDefId;
-use rustc_index::bit_set::BitMatrix;
+use rustc_index::bit_set::{BitMatrix, BitSet};
 use rustc_index::{Idx, IndexVec};
 use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
 use rustc_span::Span;
@ -358,12 +358,22 @@ pub struct DestructuredConstant<'tcx> {
 /// Used by the `coverage_ids_info` query.
 #[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable)]
 pub struct CoverageIdsInfo {
-    /// Coverage codegen needs to know the highest counter ID that is ever
+    pub counters_seen: BitSet<mir::coverage::CounterId>,
+    pub expressions_seen: BitSet<mir::coverage::ExpressionId>,
+}
+
+impl CoverageIdsInfo {
+    /// Coverage codegen needs to know how many coverage counters are ever
    /// incremented within a function, so that it can set the `num-counters`
    /// argument of the `llvm.instrprof.increment` intrinsic.
    ///
    /// This may be less than the highest counter ID emitted by the
    /// InstrumentCoverage MIR pass, if the highest-numbered counter increments
    /// were removed by MIR optimizations.
-    pub max_counter_id: mir::coverage::CounterId,
+    pub fn num_counters_after_mir_opts(&self) -> u32 {
+        // FIXME(Zalathar): Currently this treats an unused counter as "used"
+        // if its ID is less than that of the highest counter that really is
+        // used. Fixing this would require adding a renumbering step somewhere.
+        self.counters_seen.last_set_in(..).map_or(0, |max| max.as_u32() + 1)
+    }
 }
--- a/compiler/rustc_mir_transform/src/coverage/query.rs
+++ b/compiler/rustc_mir_transform/src/coverage/query.rs
@ -1,6 +1,7 @@
 use rustc_data_structures::captures::Captures;
+use rustc_index::bit_set::BitSet;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
-use rustc_middle::mir::coverage::{CounterId, CoverageKind};
+use rustc_middle::mir::coverage::{CovTerm, CoverageKind, MappingKind};
 use rustc_middle::mir::{Body, CoverageIdsInfo, Statement, StatementKind};
 use rustc_middle::query::TyCtxtAt;
 use rustc_middle::ty::{self, TyCtxt};
@ -86,15 +87,43 @@ fn coverage_ids_info<'tcx>(
 ) -> CoverageIdsInfo {
    let mir_body = tcx.instance_mir(instance_def);

-    let max_counter_id = all_coverage_in_mir_body(mir_body)
-        .filter_map(|kind| match *kind {
-            CoverageKind::CounterIncrement { id } => Some(id),
-            _ => None,
-        })
-        .max()
-        .unwrap_or(CounterId::ZERO);
+    let Some(fn_cov_info) = mir_body.function_coverage_info.as_ref() else {
+        return CoverageIdsInfo {
+            counters_seen: BitSet::new_empty(0),
+            expressions_seen: BitSet::new_empty(0),
+        };
+    };

-    CoverageIdsInfo { max_counter_id }
+    let mut counters_seen = BitSet::new_empty(fn_cov_info.num_counters);
+    let mut expressions_seen = BitSet::new_filled(fn_cov_info.expressions.len());
+
+    // For each expression ID that is directly used by one or more mappings,
+    // mark it as not-yet-seen. This indicates that we expect to see a
+    // corresponding `ExpressionUsed` statement during MIR traversal.
+    for mapping in fn_cov_info.mappings.iter() {
+        // Currently we only worry about ordinary code mappings.
+        // For branch and MC/DC mappings, expressions might not correspond
+        // to any particular point in the control-flow graph.
+        // (Keep this in sync with the injection of `ExpressionUsed`
+        // statements in the `InstrumentCoverage` MIR pass.)
+        if let MappingKind::Code(CovTerm::Expression(id)) = mapping.kind {
+            expressions_seen.remove(id);
+        }
+    }
+
+    for kind in all_coverage_in_mir_body(mir_body) {
+        match *kind {
+            CoverageKind::CounterIncrement { id } => {
+                counters_seen.insert(id);
+            }
+            CoverageKind::ExpressionUsed { id } => {
+                expressions_seen.insert(id);
+            }
+            _ => {}
+        }
+    }
+
+    CoverageIdsInfo { counters_seen, expressions_seen }
 }

 fn all_coverage_in_mir_body<'a, 'tcx>(
--- a/tests/coverage/inline-dead.cov-map
+++ b/tests/coverage/inline-dead.cov-map
@ -8,18 +8,18 @@ Number of file 0 mappings: 1
 Highest counter ID seen: (none)

 Function name: inline_dead::live::<false>
-Raw bytes (26): 0x[01, 01, 01, 01, 00, 04, 01, 0e, 01, 01, 09, 00, 02, 09, 00, 0f, 02, 02, 09, 00, 0a, 01, 02, 01, 00, 02]
+Raw bytes (26): 0x[01, 01, 01, 01, 05, 04, 01, 0e, 01, 01, 09, 05, 02, 09, 00, 0f, 02, 02, 09, 00, 0a, 01, 02, 01, 00, 02]
 Number of files: 1
 - file 0 => global file 1
 Number of expressions: 1
- expression 0 operands: lhs = Counter(0), rhs = Zero
+- expression 0 operands: lhs = Counter(0), rhs = Counter(1)
 Number of file 0 mappings: 4
 - Code(Counter(0)) at (prev + 14, 1) to (start + 1, 9)
- Code(Zero) at (prev + 2, 9) to (start + 0, 15)
+- Code(Counter(1)) at (prev + 2, 9) to (start + 0, 15)
 - Code(Expression(0, Sub)) at (prev + 2, 9) to (start + 0, 10)
-    = (c0 - Zero)
+    = (c0 - c1)
 - Code(Counter(0)) at (prev + 2, 1) to (start + 0, 2)
-Highest counter ID seen: c0
+Highest counter ID seen: c1

 Function name: inline_dead::main
 Raw bytes (14): 0x[01, 01, 00, 02, 01, 04, 01, 03, 0a, 01, 06, 05, 01, 02]
--- a/tests/coverage/let_else_loop.cov-map
+++ b/tests/coverage/let_else_loop.cov-map
@ -21,13 +21,13 @@ Number of file 0 mappings: 3
 Highest counter ID seen: (none)

 Function name: let_else_loop::loopy
-Raw bytes (19): 0x[01, 01, 00, 03, 01, 09, 01, 01, 14, 00, 01, 1c, 00, 23, 05, 01, 01, 00, 02]
+Raw bytes (19): 0x[01, 01, 00, 03, 01, 09, 01, 01, 14, 09, 01, 1c, 00, 23, 05, 01, 01, 00, 02]
 Number of files: 1
 - file 0 => global file 1
 Number of expressions: 0
 Number of file 0 mappings: 3
 - Code(Counter(0)) at (prev + 9, 1) to (start + 1, 20)
- Code(Zero) at (prev + 1, 28) to (start + 0, 35)
+- Code(Counter(2)) at (prev + 1, 28) to (start + 0, 35)
 - Code(Counter(1)) at (prev + 1, 1) to (start + 0, 2)
-Highest counter ID seen: c1
+Highest counter ID seen: c2