pub(super) mod query; mod counters; mod graph; mod mappings; mod spans; #[cfg(test)] mod tests; mod unexpand; use rustc_hir as hir; use rustc_hir::intravisit::{Visitor, walk_expr}; use rustc_middle::hir::map::Map; use rustc_middle::hir::nested_filter; use rustc_middle::mir::coverage::{ CoverageKind, DecisionInfo, FunctionCoverageInfo, Mapping, MappingKind, SourceRegion, }; use rustc_middle::mir::{ self, BasicBlock, BasicBlockData, SourceInfo, Statement, StatementKind, Terminator, TerminatorKind, }; use rustc_middle::ty::TyCtxt; use rustc_span::def_id::LocalDefId; use rustc_span::source_map::SourceMap; use rustc_span::{BytePos, Pos, RelativeBytePos, Span, Symbol}; use tracing::{debug, debug_span, instrument, trace}; use crate::coverage::counters::{CounterIncrementSite, CoverageCounters}; use crate::coverage::graph::CoverageGraph; use crate::coverage::mappings::ExtractedMappings; /// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected /// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen /// to construct the coverage map. pub(super) struct InstrumentCoverage; impl<'tcx> crate::MirPass<'tcx> for InstrumentCoverage { fn is_enabled(&self, sess: &rustc_session::Session) -> bool { sess.instrument_coverage() } fn run_pass(&self, tcx: TyCtxt<'tcx>, mir_body: &mut mir::Body<'tcx>) { let mir_source = mir_body.source; // This pass runs after MIR promotion, but before promoted MIR starts to // be transformed, so it should never see promoted MIR. assert!(mir_source.promoted.is_none()); let def_id = mir_source.def_id().expect_local(); if !tcx.is_eligible_for_coverage(def_id) { trace!("InstrumentCoverage skipped for {def_id:?} (not eligible)"); return; } // An otherwise-eligible function is still skipped if its start block // is known to be unreachable. match mir_body.basic_blocks[mir::START_BLOCK].terminator().kind { TerminatorKind::Unreachable => { trace!("InstrumentCoverage skipped for unreachable `START_BLOCK`"); return; } _ => {} } instrument_function_for_coverage(tcx, mir_body); } } fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir::Body<'tcx>) { let def_id = mir_body.source.def_id(); let _span = debug_span!("instrument_function_for_coverage", ?def_id).entered(); let hir_info = extract_hir_info(tcx, def_id.expect_local()); let basic_coverage_blocks = CoverageGraph::from_mir(mir_body); //////////////////////////////////////////////////// // Extract coverage spans and other mapping info from MIR. let extracted_mappings = mappings::extract_all_mapping_info_from_mir( tcx, mir_body, &hir_info, &basic_coverage_blocks, ); //////////////////////////////////////////////////// // Create an optimized mix of `Counter`s and `Expression`s for the `CoverageGraph`. Ensure // every coverage span has a `Counter` or `Expression` assigned to its `BasicCoverageBlock` // and all `Expression` dependencies (operands) are also generated, for any other // `BasicCoverageBlock`s not already associated with a coverage span. let bcbs_with_counter_mappings = extracted_mappings.all_bcbs_with_counter_mappings(); if bcbs_with_counter_mappings.is_empty() { // No relevant spans were found in MIR, so skip instrumenting this function. return; } let coverage_counters = CoverageCounters::make_bcb_counters(&basic_coverage_blocks, &bcbs_with_counter_mappings); let mappings = create_mappings(tcx, &hir_info, &extracted_mappings, &coverage_counters); if mappings.is_empty() { // No spans could be converted into valid mappings, so skip this function. debug!("no spans could be converted into valid mappings; skipping"); return; } inject_coverage_statements( mir_body, &basic_coverage_blocks, &extracted_mappings, &coverage_counters, ); inject_mcdc_statements(mir_body, &basic_coverage_blocks, &extracted_mappings); let mcdc_num_condition_bitmaps = extracted_mappings .mcdc_mappings .iter() .map(|&(mappings::MCDCDecision { decision_depth, .. }, _)| decision_depth) .max() .map_or(0, |max| usize::from(max) + 1); mir_body.function_coverage_info = Some(Box::new(FunctionCoverageInfo { function_source_hash: hir_info.function_source_hash, num_counters: coverage_counters.num_counters(), mcdc_bitmap_bits: extracted_mappings.mcdc_bitmap_bits, expressions: coverage_counters.into_expressions(), mappings, mcdc_num_condition_bitmaps, })); } /// For each coverage span extracted from MIR, create a corresponding /// mapping. /// /// Precondition: All BCBs corresponding to those spans have been given /// coverage counters. fn create_mappings<'tcx>( tcx: TyCtxt<'tcx>, hir_info: &ExtractedHirInfo, extracted_mappings: &ExtractedMappings, coverage_counters: &CoverageCounters, ) -> Vec { let source_map = tcx.sess.source_map(); let body_span = hir_info.body_span; let source_file = source_map.lookup_source_file(body_span.lo()); use rustc_session::RemapFileNameExt; use rustc_session::config::RemapPathScopeComponents; let file_name = Symbol::intern( &source_file.name.for_scope(tcx.sess, RemapPathScopeComponents::MACRO).to_string_lossy(), ); let term_for_bcb = |bcb| coverage_counters.term_for_bcb(bcb).expect("all BCBs with spans were given counters"); let region_for_span = |span: Span| make_source_region(source_map, file_name, span, body_span); // Fully destructure the mappings struct to make sure we don't miss any kinds. let ExtractedMappings { num_bcbs: _, code_mappings, branch_pairs, mcdc_bitmap_bits: _, mcdc_degraded_branches, mcdc_mappings, } = extracted_mappings; let mut mappings = Vec::new(); mappings.extend(code_mappings.iter().filter_map( // Ordinary code mappings are the simplest kind. |&mappings::CodeMapping { span, bcb }| { let source_region = region_for_span(span)?; let kind = MappingKind::Code(term_for_bcb(bcb)); Some(Mapping { kind, source_region }) }, )); mappings.extend(branch_pairs.iter().filter_map( |&mappings::BranchPair { span, true_bcb, false_bcb }| { let true_term = term_for_bcb(true_bcb); let false_term = term_for_bcb(false_bcb); let kind = MappingKind::Branch { true_term, false_term }; let source_region = region_for_span(span)?; Some(Mapping { kind, source_region }) }, )); let term_for_bcb = |bcb| coverage_counters.term_for_bcb(bcb).expect("all BCBs with spans were given counters"); // MCDC branch mappings are appended with their decisions in case decisions were ignored. mappings.extend(mcdc_degraded_branches.iter().filter_map( |&mappings::MCDCBranch { span, true_bcb, false_bcb, condition_info: _, true_index: _, false_index: _, }| { let source_region = region_for_span(span)?; let true_term = term_for_bcb(true_bcb); let false_term = term_for_bcb(false_bcb); Some(Mapping { kind: MappingKind::Branch { true_term, false_term }, source_region }) }, )); for (decision, branches) in mcdc_mappings { let num_conditions = branches.len() as u16; let conditions = branches .into_iter() .filter_map( |&mappings::MCDCBranch { span, true_bcb, false_bcb, condition_info, true_index: _, false_index: _, }| { let source_region = region_for_span(span)?; let true_term = term_for_bcb(true_bcb); let false_term = term_for_bcb(false_bcb); Some(Mapping { kind: MappingKind::MCDCBranch { true_term, false_term, mcdc_params: condition_info, }, source_region, }) }, ) .collect::>(); if conditions.len() == num_conditions as usize && let Some(source_region) = region_for_span(decision.span) { // LLVM requires end index for counter mapping regions. let kind = MappingKind::MCDCDecision(DecisionInfo { bitmap_idx: (decision.bitmap_idx + decision.num_test_vectors) as u32, num_conditions, }); mappings.extend( std::iter::once(Mapping { kind, source_region }).chain(conditions.into_iter()), ); } else { mappings.extend(conditions.into_iter().map(|mapping| { let MappingKind::MCDCBranch { true_term, false_term, mcdc_params: _ } = mapping.kind else { unreachable!("all mappings here are MCDCBranch as shown above"); }; Mapping { kind: MappingKind::Branch { true_term, false_term }, source_region: mapping.source_region, } })) } } mappings } /// For each BCB node or BCB edge that has an associated coverage counter, /// inject any necessary coverage statements into MIR. fn inject_coverage_statements<'tcx>( mir_body: &mut mir::Body<'tcx>, basic_coverage_blocks: &CoverageGraph, extracted_mappings: &ExtractedMappings, coverage_counters: &CoverageCounters, ) { // Inject counter-increment statements into MIR. for (id, counter_increment_site) in coverage_counters.counter_increment_sites() { // Determine the block to inject a counter-increment statement into. // For BCB nodes this is just their first block, but for edges we need // to create a new block between the two BCBs, and inject into that. let target_bb = match *counter_increment_site { CounterIncrementSite::Node { bcb } => basic_coverage_blocks[bcb].leader_bb(), CounterIncrementSite::Edge { from_bcb, to_bcb } => { // Create a new block between the last block of `from_bcb` and // the first block of `to_bcb`. let from_bb = basic_coverage_blocks[from_bcb].last_bb(); let to_bb = basic_coverage_blocks[to_bcb].leader_bb(); let new_bb = inject_edge_counter_basic_block(mir_body, from_bb, to_bb); debug!( "Edge {from_bcb:?} (last {from_bb:?}) -> {to_bcb:?} (leader {to_bb:?}) \ requires a new MIR BasicBlock {new_bb:?} for counter increment {id:?}", ); new_bb } }; inject_statement(mir_body, CoverageKind::CounterIncrement { id }, target_bb); } // For each counter expression that is directly associated with at least one // span, we inject an "expression-used" statement, so that coverage codegen // can check whether the injected statement survived MIR optimization. // (BCB edges can't have spans, so we only need to process BCB nodes here.) // // We only do this for ordinary `Code` mappings, because branch and MC/DC // mappings might have expressions that don't correspond to any single // point in the control-flow graph. // // See the code in `rustc_codegen_llvm::coverageinfo::map_data` that deals // with "expressions seen" and "zero terms". let eligible_bcbs = extracted_mappings.bcbs_with_ordinary_code_mappings(); for (bcb, expression_id) in coverage_counters .bcb_nodes_with_coverage_expressions() .filter(|&(bcb, _)| eligible_bcbs.contains(bcb)) { inject_statement( mir_body, CoverageKind::ExpressionUsed { id: expression_id }, basic_coverage_blocks[bcb].leader_bb(), ); } } /// For each conditions inject statements to update condition bitmap after it has been evaluated. /// For each decision inject statements to update test vector bitmap after it has been evaluated. fn inject_mcdc_statements<'tcx>( mir_body: &mut mir::Body<'tcx>, basic_coverage_blocks: &CoverageGraph, extracted_mappings: &ExtractedMappings, ) { for (decision, conditions) in &extracted_mappings.mcdc_mappings { // Inject test vector update first because `inject_statement` always insert new statement at head. for &end in &decision.end_bcbs { let end_bb = basic_coverage_blocks[end].leader_bb(); inject_statement( mir_body, CoverageKind::TestVectorBitmapUpdate { bitmap_idx: decision.bitmap_idx as u32, decision_depth: decision.decision_depth, }, end_bb, ); } for &mappings::MCDCBranch { span: _, true_bcb, false_bcb, condition_info: _, true_index, false_index, } in conditions { for (index, bcb) in [(false_index, false_bcb), (true_index, true_bcb)] { let bb = basic_coverage_blocks[bcb].leader_bb(); inject_statement( mir_body, CoverageKind::CondBitmapUpdate { index: index as u32, decision_depth: decision.decision_depth, }, bb, ); } } } } /// Given two basic blocks that have a control-flow edge between them, creates /// and returns a new block that sits between those blocks. fn inject_edge_counter_basic_block( mir_body: &mut mir::Body<'_>, from_bb: BasicBlock, to_bb: BasicBlock, ) -> BasicBlock { let span = mir_body[from_bb].terminator().source_info.span.shrink_to_hi(); let new_bb = mir_body.basic_blocks_mut().push(BasicBlockData { statements: vec![], // counter will be injected here terminator: Some(Terminator { source_info: SourceInfo::outermost(span), kind: TerminatorKind::Goto { target: to_bb }, }), is_cleanup: false, }); let edge_ref = mir_body[from_bb] .terminator_mut() .successors_mut() .find(|successor| **successor == to_bb) .expect("from_bb should have a successor for to_bb"); *edge_ref = new_bb; new_bb } fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb: BasicBlock) { debug!(" injecting statement {counter_kind:?} for {bb:?}"); let data = &mut mir_body[bb]; let source_info = data.terminator().source_info; let statement = Statement { source_info, kind: StatementKind::Coverage(counter_kind) }; data.statements.insert(0, statement); } /// Convert the Span into its file name, start line and column, and end line and column. /// /// Line numbers and column numbers are 1-based. Unlike most column numbers emitted by /// the compiler, these column numbers are denoted in **bytes**, because that's what /// LLVM's `llvm-cov` tool expects to see in coverage maps. /// /// Returns `None` if the conversion failed for some reason. This shouldn't happen, /// but it's hard to rule out entirely (especially in the presence of complex macros /// or other expansions), and if it does happen then skipping a span or function is /// better than an ICE or `llvm-cov` failure that the user might have no way to avoid. #[instrument(level = "debug", skip(source_map))] fn make_source_region( source_map: &SourceMap, file_name: Symbol, span: Span, body_span: Span, ) -> Option { let lo = span.lo(); let hi = span.hi(); let file = source_map.lookup_source_file(lo); if !file.contains(hi) { debug!(?span, ?file, ?lo, ?hi, "span crosses multiple files; skipping"); return None; } // Column numbers need to be in bytes, so we can't use the more convenient // `SourceMap` methods for looking up file coordinates. let rpos_and_line_and_byte_column = |pos: BytePos| -> Option<(RelativeBytePos, usize, usize)> { let rpos = file.relative_position(pos); let line_index = file.lookup_line(rpos)?; let line_start = file.lines()[line_index]; // Line numbers and column numbers are 1-based, so add 1 to each. Some((rpos, line_index + 1, (rpos - line_start).to_usize() + 1)) }; let (lo_rpos, mut start_line, mut start_col) = rpos_and_line_and_byte_column(lo)?; let (hi_rpos, mut end_line, mut end_col) = rpos_and_line_and_byte_column(hi)?; // If the span is empty, try to expand it horizontally by one character's // worth of bytes, so that it is more visible in `llvm-cov` reports. // We do this after resolving line/column numbers, so that empty spans at the // end of a line get an extra column instead of wrapping to the next line. if span.is_empty() && body_span.contains(span) && let Some(src) = &file.src { // Prefer to expand the end position, if it won't go outside the body span. if hi < body_span.hi() { let hi_rpos = hi_rpos.to_usize(); let nudge_bytes = src.ceil_char_boundary(hi_rpos + 1) - hi_rpos; end_col += nudge_bytes; } else if lo > body_span.lo() { let lo_rpos = lo_rpos.to_usize(); let nudge_bytes = lo_rpos - src.floor_char_boundary(lo_rpos - 1); // Subtract the nudge, but don't go below column 1. start_col = start_col.saturating_sub(nudge_bytes).max(1); } // If neither nudge could be applied, stick with the empty span coordinates. } // Apply an offset so that code in doctests has correct line numbers. // FIXME(#79417): Currently we have no way to offset doctest _columns_. start_line = source_map.doctest_offset_line(&file.name, start_line); end_line = source_map.doctest_offset_line(&file.name, end_line); check_source_region(SourceRegion { file_name, start_line: start_line as u32, start_col: start_col as u32, end_line: end_line as u32, end_col: end_col as u32, }) } /// If `llvm-cov` sees a source region that is improperly ordered (end < start), /// it will immediately exit with a fatal error. To prevent that from happening, /// discard regions that are improperly ordered, or might be interpreted in a /// way that makes them improperly ordered. fn check_source_region(source_region: SourceRegion) -> Option { let SourceRegion { file_name: _, start_line, start_col, end_line, end_col } = source_region; // Line/column coordinates are supposed to be 1-based. If we ever emit // coordinates of 0, `llvm-cov` might misinterpret them. let all_nonzero = [start_line, start_col, end_line, end_col].into_iter().all(|x| x != 0); // Coverage mappings use the high bit of `end_col` to indicate that a // region is actually a "gap" region, so make sure it's unset. let end_col_has_high_bit_unset = (end_col & (1 << 31)) == 0; // If a region is improperly ordered (end < start), `llvm-cov` will exit // with a fatal error, which is inconvenient for users and hard to debug. let is_ordered = (start_line, start_col) <= (end_line, end_col); if all_nonzero && end_col_has_high_bit_unset && is_ordered { Some(source_region) } else { debug!( ?source_region, ?all_nonzero, ?end_col_has_high_bit_unset, ?is_ordered, "Skipping source region that would be misinterpreted or rejected by LLVM" ); // If this happens in a debug build, ICE to make it easier to notice. debug_assert!(false, "Improper source region: {source_region:?}"); None } } /// Function information extracted from HIR by the coverage instrumentor. #[derive(Debug)] struct ExtractedHirInfo { function_source_hash: u64, is_async_fn: bool, /// The span of the function's signature, extended to the start of `body_span`. /// Must have the same context and filename as the body span. fn_sig_span_extended: Option, body_span: Span, /// "Holes" are regions within the body span that should not be included in /// coverage spans for this function (e.g. closures and nested items). hole_spans: Vec, } fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> ExtractedHirInfo { // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back // to HIR for it. // HACK: For synthetic MIR bodies (async closures), use the def id of the HIR body. if tcx.is_synthetic_mir(def_id) { return extract_hir_info(tcx, tcx.local_parent(def_id)); } let hir_node = tcx.hir_node_by_def_id(def_id); let fn_body_id = hir_node.body_id().expect("HIR node is a function with body"); let hir_body = tcx.hir().body(fn_body_id); let maybe_fn_sig = hir_node.fn_sig(); let is_async_fn = maybe_fn_sig.is_some_and(|fn_sig| fn_sig.header.is_async()); let mut body_span = hir_body.value.span; use hir::{Closure, Expr, ExprKind, Node}; // Unexpand a closure's body span back to the context of its declaration. // This helps with closure bodies that consist of just a single bang-macro, // and also with closure bodies produced by async desugaring. if let Node::Expr(&Expr { kind: ExprKind::Closure(&Closure { fn_decl_span, .. }), .. }) = hir_node { body_span = body_span.find_ancestor_in_same_ctxt(fn_decl_span).unwrap_or(body_span); } // The actual signature span is only used if it has the same context and // filename as the body, and precedes the body. let fn_sig_span_extended = maybe_fn_sig .map(|fn_sig| fn_sig.span) .filter(|&fn_sig_span| { let source_map = tcx.sess.source_map(); let file_idx = |span: Span| source_map.lookup_source_file_idx(span.lo()); fn_sig_span.eq_ctxt(body_span) && fn_sig_span.hi() <= body_span.lo() && file_idx(fn_sig_span) == file_idx(body_span) }) // If so, extend it to the start of the body span. .map(|fn_sig_span| fn_sig_span.with_hi(body_span.lo())); let function_source_hash = hash_mir_source(tcx, hir_body); let hole_spans = extract_hole_spans_from_hir(tcx, body_span, hir_body); ExtractedHirInfo { function_source_hash, is_async_fn, fn_sig_span_extended, body_span, hole_spans, } } fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 { // FIXME(cjgillot) Stop hashing HIR manually here. let owner = hir_body.id().hir_id.owner; tcx.hir_owner_nodes(owner).opt_hash_including_bodies.unwrap().to_smaller_hash().as_u64() } fn extract_hole_spans_from_hir<'tcx>( tcx: TyCtxt<'tcx>, body_span: Span, // Usually `hir_body.value.span`, but not always hir_body: &hir::Body<'tcx>, ) -> Vec { struct HolesVisitor<'hir, F> { hir: Map<'hir>, visit_hole_span: F, } impl<'hir, F: FnMut(Span)> Visitor<'hir> for HolesVisitor<'hir, F> { /// - We need `NestedFilter::INTRA = true` so that `visit_item` will be called. /// - Bodies of nested items don't actually get visited, because of the /// `visit_item` override. /// - For nested bodies that are not part of an item, we do want to visit any /// items contained within them. type NestedFilter = nested_filter::All; fn nested_visit_map(&mut self) -> Self::Map { self.hir } fn visit_item(&mut self, item: &'hir hir::Item<'hir>) { (self.visit_hole_span)(item.span); // Having visited this item, we don't care about its children, // so don't call `walk_item`. } // We override `visit_expr` instead of the more specific expression // visitors, so that we have direct access to the expression span. fn visit_expr(&mut self, expr: &'hir hir::Expr<'hir>) { match expr.kind { hir::ExprKind::Closure(_) | hir::ExprKind::ConstBlock(_) => { (self.visit_hole_span)(expr.span); // Having visited this expression, we don't care about its // children, so don't call `walk_expr`. } // For other expressions, recursively visit as normal. _ => walk_expr(self, expr), } } } let mut hole_spans = vec![]; let mut visitor = HolesVisitor { hir: tcx.hir(), visit_hole_span: |hole_span| { // Discard any holes that aren't directly visible within the body span. if body_span.contains(hole_span) && body_span.eq_ctxt(hole_span) { hole_spans.push(hole_span); } }, }; visitor.visit_body(hir_body); hole_spans }