Rollup merge of #133446 - Zalathar:querify, r=cjgillot

coverage: Use a query to identify which counter/expression IDs are used

Given that we already have a query to identify the highest-numbered counter ID in a MIR body, we can extend that query to also build bitsets of used counter/expression IDs. That lets us avoid some messy coverage bookkeeping during the main MIR traversal for codegen.

This does mean that we fail to treat some IDs as used in certain MIR-inlining scenarios, but I think that's fine, because it means that the results will be consistent across all instantiations of a function.

---

There's some more cleanup I want to do in the function coverage collector, since it isn't really collecting anything any more, but I'll leave that for future work.
This commit is contained in:
Jacob Pratt 2024-12-01 21:38:25 -05:00 committed by GitHub
commit fa2edee758
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 114 additions and 113 deletions

View File

@ -82,8 +82,8 @@ pub(crate) struct CodegenCx<'ll, 'tcx> {
pub isize_ty: &'ll Type,
/// Extra codegen state needed when coverage instrumentation is enabled.
pub coverage_cx: Option<coverageinfo::CrateCoverageContext<'ll, 'tcx>>,
/// Extra per-CGU codegen state needed when coverage instrumentation is enabled.
pub coverage_cx: Option<coverageinfo::CguCoverageContext<'ll, 'tcx>>,
pub dbg_cx: Option<debuginfo::CodegenUnitDebugContext<'ll, 'tcx>>,
eh_personality: Cell<Option<&'ll Value>>,
@ -525,7 +525,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
let (llcx, llmod) = (&*llvm_module.llcx, llvm_module.llmod());
let coverage_cx =
tcx.sess.instrument_coverage().then(coverageinfo::CrateCoverageContext::new);
tcx.sess.instrument_coverage().then(coverageinfo::CguCoverageContext::new);
let dbg_cx = if tcx.sess.opts.debuginfo != DebugInfo::None {
let dctx = debuginfo::CodegenUnitDebugContext::new(llmod);
@ -576,7 +576,7 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
/// Extra state that is only available when coverage instrumentation is enabled.
#[inline]
#[track_caller]
pub(crate) fn coverage_cx(&self) -> &coverageinfo::CrateCoverageContext<'ll, 'tcx> {
pub(crate) fn coverage_cx(&self) -> &coverageinfo::CguCoverageContext<'ll, 'tcx> {
self.coverage_cx.as_ref().expect("only called when coverage instrumentation is enabled")
}

View File

@ -1,12 +1,13 @@
use rustc_data_structures::captures::Captures;
use rustc_data_structures::fx::FxIndexSet;
use rustc_index::bit_set::BitSet;
use rustc_middle::mir::CoverageIdsInfo;
use rustc_middle::mir::coverage::{
CounterId, CovTerm, Expression, ExpressionId, FunctionCoverageInfo, Mapping, MappingKind, Op,
SourceRegion,
};
use rustc_middle::ty::Instance;
use tracing::{debug, instrument};
use tracing::debug;
use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
@ -16,17 +17,8 @@ use crate::coverageinfo::ffi::{Counter, CounterExpression, ExprKind};
pub(crate) struct FunctionCoverageCollector<'tcx> {
/// Coverage info that was attached to this function by the instrumentor.
function_coverage_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo,
is_used: bool,
/// Tracks which counters have been seen, so that we can identify mappings
/// to counters that were optimized out, and set them to zero.
counters_seen: BitSet<CounterId>,
/// Contains all expression IDs that have been seen in an `ExpressionUsed`
/// coverage statement, plus all expression IDs that aren't directly used
/// by any mappings (and therefore do not have expression-used statements).
/// After MIR traversal is finished, we can conclude that any IDs missing
/// from this set must have had their statements deleted by MIR opts.
expressions_seen: BitSet<ExpressionId>,
}
impl<'tcx> FunctionCoverageCollector<'tcx> {
@ -34,21 +26,24 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
pub(crate) fn new(
instance: Instance<'tcx>,
function_coverage_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo,
) -> Self {
Self::create(instance, function_coverage_info, true)
Self::create(instance, function_coverage_info, ids_info, true)
}
/// Creates a new set of coverage data for an unused (never called) function.
pub(crate) fn unused(
instance: Instance<'tcx>,
function_coverage_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo,
) -> Self {
Self::create(instance, function_coverage_info, false)
Self::create(instance, function_coverage_info, ids_info, false)
}
fn create(
instance: Instance<'tcx>,
function_coverage_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo,
is_used: bool,
) -> Self {
let num_counters = function_coverage_info.num_counters;
@ -58,44 +53,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
num_counters={num_counters}, num_expressions={num_expressions}, is_used={is_used}"
);
// Create a filled set of expression IDs, so that expressions not
// directly used by mappings will be treated as "seen".
// (If they end up being unused, LLVM will delete them for us.)
let mut expressions_seen = BitSet::new_filled(num_expressions);
// For each expression ID that is directly used by one or more mappings,
// mark it as not-yet-seen. This indicates that we expect to see a
// corresponding `ExpressionUsed` statement during MIR traversal.
for mapping in function_coverage_info.mappings.iter() {
// Currently we only worry about ordinary code mappings.
// For branch and MC/DC mappings, expressions might not correspond
// to any particular point in the control-flow graph.
// (Keep this in sync with the injection of `ExpressionUsed`
// statements in the `InstrumentCoverage` MIR pass.)
if let MappingKind::Code(term) = mapping.kind
&& let CovTerm::Expression(id) = term
{
expressions_seen.remove(id);
}
}
Self {
function_coverage_info,
is_used,
counters_seen: BitSet::new_empty(num_counters),
expressions_seen,
}
}
/// Marks a counter ID as having been seen in a counter-increment statement.
#[instrument(level = "debug", skip(self))]
pub(crate) fn mark_counter_id_seen(&mut self, id: CounterId) {
self.counters_seen.insert(id);
}
/// Marks an expression ID as having been seen in an expression-used statement.
#[instrument(level = "debug", skip(self))]
pub(crate) fn mark_expression_id_seen(&mut self, id: ExpressionId) {
self.expressions_seen.insert(id);
Self { function_coverage_info, ids_info, is_used }
}
/// Identify expressions that will always have a value of zero, and note
@ -117,7 +75,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
// (By construction, expressions can only refer to other expressions
// that have lower IDs, so one pass is sufficient.)
for (id, expression) in self.function_coverage_info.expressions.iter_enumerated() {
if !self.expressions_seen.contains(id) {
if !self.is_used || !self.ids_info.expressions_seen.contains(id) {
// If an expression was not seen, it must have been optimized away,
// so any operand that refers to it can be replaced with zero.
zero_expressions.insert(id);
@ -146,7 +104,7 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
assert_operand_expression_is_lower(id);
}
if is_zero_term(&self.counters_seen, &zero_expressions, *operand) {
if is_zero_term(&self.ids_info.counters_seen, &zero_expressions, *operand) {
*operand = CovTerm::Zero;
}
};
@ -172,17 +130,17 @@ impl<'tcx> FunctionCoverageCollector<'tcx> {
pub(crate) fn into_finished(self) -> FunctionCoverage<'tcx> {
let zero_expressions = self.identify_zero_expressions();
let FunctionCoverageCollector { function_coverage_info, is_used, counters_seen, .. } = self;
let FunctionCoverageCollector { function_coverage_info, ids_info, is_used, .. } = self;
FunctionCoverage { function_coverage_info, is_used, counters_seen, zero_expressions }
FunctionCoverage { function_coverage_info, ids_info, is_used, zero_expressions }
}
}
pub(crate) struct FunctionCoverage<'tcx> {
pub(crate) function_coverage_info: &'tcx FunctionCoverageInfo,
ids_info: &'tcx CoverageIdsInfo,
is_used: bool,
counters_seen: BitSet<CounterId>,
zero_expressions: ZeroExpressions,
}
@ -238,7 +196,7 @@ impl<'tcx> FunctionCoverage<'tcx> {
}
fn is_zero_term(&self, term: CovTerm) -> bool {
is_zero_term(&self.counters_seen, &self.zero_expressions, term)
!self.is_used || is_zero_term(&self.ids_info.counters_seen, &self.zero_expressions, term)
}
}

View File

@ -535,9 +535,12 @@ fn add_unused_function_coverage<'tcx>(
}),
);
// An unused function's mappings will automatically be rewritten to map to
// zero, because none of its counters/expressions are marked as seen.
let function_coverage = FunctionCoverageCollector::unused(instance, function_coverage_info);
// An unused function's mappings will all be rewritten to map to zero.
let function_coverage = FunctionCoverageCollector::unused(
instance,
function_coverage_info,
tcx.coverage_ids_info(instance.def),
);
cx.coverage_cx().function_coverage_map.borrow_mut().insert(instance, function_coverage);
}

View File

@ -21,8 +21,8 @@ mod llvm_cov;
pub(crate) mod map_data;
mod mapgen;
/// A context object for maintaining all state needed by the coverageinfo module.
pub(crate) struct CrateCoverageContext<'ll, 'tcx> {
/// Extra per-CGU context/state needed for coverage instrumentation.
pub(crate) struct CguCoverageContext<'ll, 'tcx> {
/// Coverage data for each instrumented function identified by DefId.
pub(crate) function_coverage_map:
RefCell<FxIndexMap<Instance<'tcx>, FunctionCoverageCollector<'tcx>>>,
@ -32,7 +32,7 @@ pub(crate) struct CrateCoverageContext<'ll, 'tcx> {
covfun_section_name: OnceCell<CString>,
}
impl<'ll, 'tcx> CrateCoverageContext<'ll, 'tcx> {
impl<'ll, 'tcx> CguCoverageContext<'ll, 'tcx> {
pub(crate) fn new() -> Self {
Self {
function_coverage_map: Default::default(),
@ -143,6 +143,13 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
let bx = self;
// Due to LocalCopy instantiation or MIR inlining, coverage statements
// can end up in a crate that isn't doing coverage instrumentation.
// When that happens, we currently just discard those statements, so
// the corresponding code will be undercounted.
// FIXME(Zalathar): Find a better solution for mixed-coverage builds.
let Some(coverage_cx) = &bx.cx.coverage_cx else { return };
let Some(function_coverage_info) =
bx.tcx.instance_mir(instance.def).function_coverage_info.as_deref()
else {
@ -150,32 +157,28 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
return;
};
// FIXME(#132395): Unwrapping `coverage_cx` here has led to ICEs in the
// wild, so keep this early-return until we understand why.
let mut coverage_map = match bx.coverage_cx {
Some(ref cx) => cx.function_coverage_map.borrow_mut(),
None => return,
};
let func_coverage = coverage_map
.entry(instance)
.or_insert_with(|| FunctionCoverageCollector::new(instance, function_coverage_info));
// Mark the instance as used in this CGU, for coverage purposes.
// This includes functions that were not partitioned into this CGU,
// but were MIR-inlined into one of this CGU's functions.
coverage_cx.function_coverage_map.borrow_mut().entry(instance).or_insert_with(|| {
FunctionCoverageCollector::new(
instance,
function_coverage_info,
bx.tcx.coverage_ids_info(instance.def),
)
});
match *kind {
CoverageKind::SpanMarker | CoverageKind::BlockMarker { .. } => unreachable!(
"marker statement {kind:?} should have been removed by CleanupPostBorrowck"
),
CoverageKind::CounterIncrement { id } => {
func_coverage.mark_counter_id_seen(id);
// We need to explicitly drop the `RefMut` before calling into
// `instrprof_increment`, as that needs an exclusive borrow.
drop(coverage_map);
// The number of counters passed to `llvm.instrprof.increment` might
// be smaller than the number originally inserted by the instrumentor,
// if some high-numbered counters were removed by MIR optimizations.
// If so, LLVM's profiler runtime will use fewer physical counters.
let num_counters =
bx.tcx().coverage_ids_info(instance.def).max_counter_id.as_u32() + 1;
bx.tcx().coverage_ids_info(instance.def).num_counters_after_mir_opts();
assert!(
num_counters as usize <= function_coverage_info.num_counters,
"num_counters disagreement: query says {num_counters} but function info only has {}",
@ -192,23 +195,23 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
);
bx.instrprof_increment(fn_name, hash, num_counters, index);
}
CoverageKind::ExpressionUsed { id } => {
func_coverage.mark_expression_id_seen(id);
CoverageKind::ExpressionUsed { id: _ } => {
// Expression-used statements are markers that are handled by
// `coverage_ids_info`, so there's nothing to codegen here.
}
CoverageKind::CondBitmapUpdate { index, decision_depth } => {
drop(coverage_map);
let cond_bitmap = bx
.coverage_cx()
let cond_bitmap = coverage_cx
.try_get_mcdc_condition_bitmap(&instance, decision_depth)
.expect("mcdc cond bitmap should have been allocated for updating");
let cond_index = bx.const_i32(index as i32);
bx.mcdc_condbitmap_update(cond_index, cond_bitmap);
}
CoverageKind::TestVectorBitmapUpdate { bitmap_idx, decision_depth } => {
drop(coverage_map);
let cond_bitmap = bx.coverage_cx()
.try_get_mcdc_condition_bitmap(&instance, decision_depth)
.expect("mcdc cond bitmap should have been allocated for merging into the global bitmap");
let cond_bitmap =
coverage_cx.try_get_mcdc_condition_bitmap(&instance, decision_depth).expect(
"mcdc cond bitmap should have been allocated for merging \
into the global bitmap",
);
assert!(
bitmap_idx as usize <= function_coverage_info.mcdc_bitmap_bits,
"bitmap index of the decision out of range"

View File

@ -28,7 +28,6 @@ rustc_index::newtype_index! {
#[derive(HashStable)]
#[encodable]
#[orderable]
#[max = 0xFFFF_FFFF]
#[debug_format = "CounterId({})"]
pub struct CounterId {}
}
@ -46,7 +45,6 @@ rustc_index::newtype_index! {
#[derive(HashStable)]
#[encodable]
#[orderable]
#[max = 0xFFFF_FFFF]
#[debug_format = "ExpressionId({})"]
pub struct ExpressionId {}
}

View File

@ -8,7 +8,7 @@ use rustc_abi::{FieldIdx, VariantIdx};
use rustc_data_structures::fx::FxIndexMap;
use rustc_errors::ErrorGuaranteed;
use rustc_hir::def_id::LocalDefId;
use rustc_index::bit_set::BitMatrix;
use rustc_index::bit_set::{BitMatrix, BitSet};
use rustc_index::{Idx, IndexVec};
use rustc_macros::{HashStable, TyDecodable, TyEncodable, TypeFoldable, TypeVisitable};
use rustc_span::Span;
@ -359,12 +359,22 @@ pub struct DestructuredConstant<'tcx> {
/// Used by the `coverage_ids_info` query.
#[derive(Clone, TyEncodable, TyDecodable, Debug, HashStable)]
pub struct CoverageIdsInfo {
/// Coverage codegen needs to know the highest counter ID that is ever
pub counters_seen: BitSet<mir::coverage::CounterId>,
pub expressions_seen: BitSet<mir::coverage::ExpressionId>,
}
impl CoverageIdsInfo {
/// Coverage codegen needs to know how many coverage counters are ever
/// incremented within a function, so that it can set the `num-counters`
/// argument of the `llvm.instrprof.increment` intrinsic.
///
/// This may be less than the highest counter ID emitted by the
/// InstrumentCoverage MIR pass, if the highest-numbered counter increments
/// were removed by MIR optimizations.
pub max_counter_id: mir::coverage::CounterId,
pub fn num_counters_after_mir_opts(&self) -> u32 {
// FIXME(Zalathar): Currently this treats an unused counter as "used"
// if its ID is less than that of the highest counter that really is
// used. Fixing this would require adding a renumbering step somewhere.
self.counters_seen.last_set_in(..).map_or(0, |max| max.as_u32() + 1)
}
}

View File

@ -1,6 +1,7 @@
use rustc_data_structures::captures::Captures;
use rustc_index::bit_set::BitSet;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::mir::coverage::{CounterId, CoverageKind};
use rustc_middle::mir::coverage::{CovTerm, CoverageKind, MappingKind};
use rustc_middle::mir::{Body, CoverageIdsInfo, Statement, StatementKind};
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::{self, TyCtxt};
@ -86,15 +87,43 @@ fn coverage_ids_info<'tcx>(
) -> CoverageIdsInfo {
let mir_body = tcx.instance_mir(instance_def);
let max_counter_id = all_coverage_in_mir_body(mir_body)
.filter_map(|kind| match *kind {
CoverageKind::CounterIncrement { id } => Some(id),
_ => None,
})
.max()
.unwrap_or(CounterId::ZERO);
let Some(fn_cov_info) = mir_body.function_coverage_info.as_ref() else {
return CoverageIdsInfo {
counters_seen: BitSet::new_empty(0),
expressions_seen: BitSet::new_empty(0),
};
};
CoverageIdsInfo { max_counter_id }
let mut counters_seen = BitSet::new_empty(fn_cov_info.num_counters);
let mut expressions_seen = BitSet::new_filled(fn_cov_info.expressions.len());
// For each expression ID that is directly used by one or more mappings,
// mark it as not-yet-seen. This indicates that we expect to see a
// corresponding `ExpressionUsed` statement during MIR traversal.
for mapping in fn_cov_info.mappings.iter() {
// Currently we only worry about ordinary code mappings.
// For branch and MC/DC mappings, expressions might not correspond
// to any particular point in the control-flow graph.
// (Keep this in sync with the injection of `ExpressionUsed`
// statements in the `InstrumentCoverage` MIR pass.)
if let MappingKind::Code(CovTerm::Expression(id)) = mapping.kind {
expressions_seen.remove(id);
}
}
for kind in all_coverage_in_mir_body(mir_body) {
match *kind {
CoverageKind::CounterIncrement { id } => {
counters_seen.insert(id);
}
CoverageKind::ExpressionUsed { id } => {
expressions_seen.insert(id);
}
_ => {}
}
}
CoverageIdsInfo { counters_seen, expressions_seen }
}
fn all_coverage_in_mir_body<'a, 'tcx>(

View File

@ -8,18 +8,18 @@ Number of file 0 mappings: 1
Highest counter ID seen: (none)
Function name: inline_dead::live::<false>
Raw bytes (26): 0x[01, 01, 01, 01, 00, 04, 01, 0e, 01, 01, 09, 00, 02, 09, 00, 0f, 02, 02, 09, 00, 0a, 01, 02, 01, 00, 02]
Raw bytes (26): 0x[01, 01, 01, 01, 05, 04, 01, 0e, 01, 01, 09, 05, 02, 09, 00, 0f, 02, 02, 09, 00, 0a, 01, 02, 01, 00, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 1
- expression 0 operands: lhs = Counter(0), rhs = Zero
- expression 0 operands: lhs = Counter(0), rhs = Counter(1)
Number of file 0 mappings: 4
- Code(Counter(0)) at (prev + 14, 1) to (start + 1, 9)
- Code(Zero) at (prev + 2, 9) to (start + 0, 15)
- Code(Counter(1)) at (prev + 2, 9) to (start + 0, 15)
- Code(Expression(0, Sub)) at (prev + 2, 9) to (start + 0, 10)
= (c0 - Zero)
= (c0 - c1)
- Code(Counter(0)) at (prev + 2, 1) to (start + 0, 2)
Highest counter ID seen: c0
Highest counter ID seen: c1
Function name: inline_dead::main
Raw bytes (14): 0x[01, 01, 00, 02, 01, 04, 01, 03, 0a, 01, 06, 05, 01, 02]

View File

@ -21,13 +21,13 @@ Number of file 0 mappings: 3
Highest counter ID seen: (none)
Function name: let_else_loop::loopy
Raw bytes (19): 0x[01, 01, 00, 03, 01, 09, 01, 01, 14, 00, 01, 1c, 00, 23, 05, 01, 01, 00, 02]
Raw bytes (19): 0x[01, 01, 00, 03, 01, 09, 01, 01, 14, 09, 01, 1c, 00, 23, 05, 01, 01, 00, 02]
Number of files: 1
- file 0 => global file 1
Number of expressions: 0
Number of file 0 mappings: 3
- Code(Counter(0)) at (prev + 9, 1) to (start + 1, 20)
- Code(Zero) at (prev + 1, 28) to (start + 0, 35)
- Code(Counter(2)) at (prev + 1, 28) to (start + 0, 35)
- Code(Counter(1)) at (prev + 1, 1) to (start + 0, 2)
Highest counter ID seen: c1
Highest counter ID seen: c2