Auto merge of #116922 - Zalathar:unused, r=cjgillot

coverage: Emit mappings for unused functions without generating stubs

For a while I've been annoyed by the fact that generating coverage maps for unused functions involves generating a stub function at the LLVM level.

As I suspected, generating that stub function isn't actually necessary, as long as we specifically tell LLVM about the symbol names of all the functions that have coverage mappings but weren't codegenned (due to being unused).

---

There is some helper code that gets moved around in the follow-up patches, so look at the first patch to see the most important functional changes.

---

`@rustbot` label +A-code-coverage
This commit is contained in:
bors 2023-10-21 16:47:30 +00:00
commit 0d1664674a
4 changed files with 103 additions and 150 deletions

View File

@ -4,14 +4,16 @@ use crate::coverageinfo::ffi::CounterMappingRegion;
use crate::coverageinfo::map_data::FunctionCoverage; use crate::coverageinfo::map_data::FunctionCoverage;
use crate::llvm; use crate::llvm;
use rustc_codegen_ssa::traits::ConstMethods; use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods};
use rustc_data_structures::fx::FxIndexSet; use rustc_data_structures::fx::FxIndexSet;
use rustc_hir::def::DefKind; use rustc_hir::def::DefKind;
use rustc_hir::def_id::DefId; use rustc_hir::def_id::DefId;
use rustc_index::IndexVec; use rustc_index::IndexVec;
use rustc_middle::bug; use rustc_middle::bug;
use rustc_middle::mir;
use rustc_middle::mir::coverage::CodeRegion; use rustc_middle::mir::coverage::CodeRegion;
use rustc_middle::ty::{self, TyCtxt}; use rustc_middle::ty::{self, TyCtxt};
use rustc_span::def_id::DefIdSet;
use rustc_span::Symbol; use rustc_span::Symbol;
/// Generates and exports the Coverage Map. /// Generates and exports the Coverage Map.
@ -94,8 +96,14 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
// Generate the LLVM IR representation of the coverage map and store it in a well-known global // Generate the LLVM IR representation of the coverage map and store it in a well-known global
let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val); let cov_data_val = generate_coverage_map(cx, version, filenames_size, filenames_val);
let mut unused_function_names = Vec::new();
let covfun_section_name = coverageinfo::covfun_section_name(cx); let covfun_section_name = coverageinfo::covfun_section_name(cx);
for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data { for (mangled_function_name, source_hash, is_used, coverage_mapping_buffer) in function_data {
if !is_used {
unused_function_names.push(mangled_function_name);
}
save_function_record( save_function_record(
cx, cx,
&covfun_section_name, &covfun_section_name,
@ -107,6 +115,25 @@ pub fn finalize(cx: &CodegenCx<'_, '_>) {
); );
} }
// For unused functions, we need to take their mangled names and store them
// in a specially-named global array. LLVM's `InstrProfiling` pass will
// detect this global and include those names in its `__llvm_prf_names`
// section. (See `llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp`.)
if !unused_function_names.is_empty() {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
let name_globals = unused_function_names
.into_iter()
.map(|mangled_function_name| cx.const_str(mangled_function_name).0)
.collect::<Vec<_>>();
let initializer = cx.const_array(cx.type_ptr(), &name_globals);
let array = llvm::add_global(cx.llmod, cx.val_ty(initializer), "__llvm_coverage_names");
llvm::set_global_constant(array, true);
llvm::set_linkage(array, llvm::Linkage::InternalLinkage);
llvm::set_initializer(array, initializer);
}
// Save the coverage data value to LLVM IR // Save the coverage data value to LLVM IR
coverageinfo::save_cov_data_to_mod(cx, cov_data_val); coverageinfo::save_cov_data_to_mod(cx, cov_data_val);
} }
@ -287,13 +314,12 @@ fn save_function_record(
/// `-Clink-dead-code` will not generate code for unused generic functions.) /// `-Clink-dead-code` will not generate code for unused generic functions.)
/// ///
/// We can find the unused functions (including generic functions) by the set difference of all MIR /// We can find the unused functions (including generic functions) by the set difference of all MIR
/// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`tcx` query /// `DefId`s (`tcx` query `mir_keys`) minus the codegenned `DefId`s (`codegenned_and_inlined_items`).
/// `codegened_and_inlined_items`).
/// ///
/// These unused functions are then codegen'd in one of the CGUs which is marked as the /// These unused functions don't need to be codegenned, but we do need to add them to the function
/// "code coverage dead code cgu" during the partitioning process. This prevents us from generating /// coverage map (in a single designated CGU) so that we still emit coverage mappings for them.
/// code regions for the same function more than once which can lead to linker errors regarding /// We also end up adding their symbol names to a special global array that LLVM will include in
/// duplicate symbols. /// its embedded coverage data.
fn add_unused_functions(cx: &CodegenCx<'_, '_>) { fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu()); assert!(cx.codegen_unit.is_code_coverage_dead_code_cgu());
@ -324,19 +350,80 @@ fn add_unused_functions(cx: &CodegenCx<'_, '_>) {
}) })
.collect(); .collect();
let codegenned_def_ids = tcx.codegened_and_inlined_items(()); let codegenned_def_ids = codegenned_and_inlined_items(tcx);
for non_codegenned_def_id in // For each `DefId` that should have coverage instrumentation but wasn't
eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) // codegenned, add it to the function coverage map as an unused function.
{ for def_id in eligible_def_ids.into_iter().filter(|id| !codegenned_def_ids.contains(id)) {
// Skip any function that didn't have coverage data added to it by the // Skip any function that didn't have coverage data added to it by the
// coverage instrumentor. // coverage instrumentor.
let body = tcx.instance_mir(ty::InstanceDef::Item(non_codegenned_def_id)); let body = tcx.instance_mir(ty::InstanceDef::Item(def_id));
let Some(function_coverage_info) = body.function_coverage_info.as_deref() else { let Some(function_coverage_info) = body.function_coverage_info.as_deref() else {
continue; continue;
}; };
debug!("generating unused fn: {:?}", non_codegenned_def_id); debug!("generating unused fn: {def_id:?}");
cx.define_unused_fn(non_codegenned_def_id, function_coverage_info); let instance = declare_unused_fn(tcx, def_id);
add_unused_function_coverage(cx, instance, function_coverage_info);
}
}
/// All items participating in code generation together with (instrumented)
/// items inlined into them.
fn codegenned_and_inlined_items(tcx: TyCtxt<'_>) -> DefIdSet {
let (items, cgus) = tcx.collect_and_partition_mono_items(());
let mut visited = DefIdSet::default();
let mut result = items.clone();
for cgu in cgus {
for item in cgu.items().keys() {
if let mir::mono::MonoItem::Fn(ref instance) = item {
let did = instance.def_id();
if !visited.insert(did) {
continue;
}
let body = tcx.instance_mir(instance.def);
for block in body.basic_blocks.iter() {
for statement in &block.statements {
let mir::StatementKind::Coverage(_) = statement.kind else { continue };
let scope = statement.source_info.scope;
if let Some(inlined) = scope.inlined_instance(&body.source_scopes) {
result.insert(inlined.def_id());
}
}
}
}
}
}
result
}
fn declare_unused_fn<'tcx>(tcx: TyCtxt<'tcx>, def_id: DefId) -> ty::Instance<'tcx> {
ty::Instance::new(
def_id,
ty::GenericArgs::for_item(tcx, def_id, |param, _| {
if let ty::GenericParamDefKind::Lifetime = param.kind {
tcx.lifetimes.re_erased.into()
} else {
tcx.mk_param_from_def(param)
}
}),
)
}
fn add_unused_function_coverage<'tcx>(
cx: &CodegenCx<'_, 'tcx>,
instance: ty::Instance<'tcx>,
function_coverage_info: &'tcx mir::coverage::FunctionCoverageInfo,
) {
// An unused function's mappings will automatically be rewritten to map to
// zero, because none of its counters/expressions are marked as seen.
let function_coverage = FunctionCoverage::unused(instance, function_coverage_info);
if let Some(coverage_context) = cx.coverage_context() {
coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage);
} else {
bug!("Could not get the `coverage_context`");
} }
} }

View File

@ -1,6 +1,5 @@
use crate::llvm; use crate::llvm;
use crate::abi::Abi;
use crate::builder::Builder; use crate::builder::Builder;
use crate::common::CodegenCx; use crate::common::CodegenCx;
use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion}; use crate::coverageinfo::ffi::{CounterExpression, CounterMappingRegion};
@ -12,17 +11,12 @@ use rustc_codegen_ssa::traits::{
StaticMethods, StaticMethods,
}; };
use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::fx::FxHashMap;
use rustc_hir as hir;
use rustc_hir::def_id::DefId;
use rustc_llvm::RustString; use rustc_llvm::RustString;
use rustc_middle::bug; use rustc_middle::bug;
use rustc_middle::mir::coverage::{CounterId, CoverageKind, FunctionCoverageInfo}; use rustc_middle::mir::coverage::CoverageKind;
use rustc_middle::mir::Coverage; use rustc_middle::mir::Coverage;
use rustc_middle::ty; use rustc_middle::ty::layout::HasTyCtxt;
use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt};
use rustc_middle::ty::GenericArgs;
use rustc_middle::ty::Instance; use rustc_middle::ty::Instance;
use rustc_middle::ty::Ty;
use std::cell::RefCell; use std::cell::RefCell;
@ -30,8 +24,6 @@ pub(crate) mod ffi;
pub(crate) mod map_data; pub(crate) mod map_data;
pub mod mapgen; pub mod mapgen;
const UNUSED_FUNCTION_COUNTER_ID: CounterId = CounterId::START;
const VAR_ALIGN_BYTES: usize = 8; const VAR_ALIGN_BYTES: usize = 8;
/// A context object for maintaining all state needed by the coverageinfo module. /// A context object for maintaining all state needed by the coverageinfo module.
@ -76,25 +68,6 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> {
bug!("Could not get the `coverage_context`"); bug!("Could not get the `coverage_context`");
} }
} }
/// Functions with MIR-based coverage are normally codegenned _only_ if
/// called. LLVM coverage tools typically expect every function to be
/// defined (even if unused), with at least one call to LLVM intrinsic
/// `instrprof.increment`.
///
/// Codegen a small function that will never be called, with one counter
/// that will never be incremented.
///
/// For used/called functions, the coverageinfo was already added to the
/// `function_coverage_map` (keyed by function `Instance`) during codegen.
/// But in this case, since the unused function was _not_ previously
/// codegenned, collect the function coverage info from MIR and add an
/// "unused" entry to the function coverage map.
fn define_unused_fn(&self, def_id: DefId, function_coverage_info: &'tcx FunctionCoverageInfo) {
let instance = declare_unused_fn(self, def_id);
codegen_unused_fn_and_counter(self, instance);
add_unused_function_coverage(self, instance, function_coverage_info);
}
} }
impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> { impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
@ -159,76 +132,6 @@ impl<'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'_, '_, 'tcx> {
} }
} }
fn declare_unused_fn<'tcx>(cx: &CodegenCx<'_, 'tcx>, def_id: DefId) -> Instance<'tcx> {
let tcx = cx.tcx;
let instance = Instance::new(
def_id,
GenericArgs::for_item(tcx, def_id, |param, _| {
if let ty::GenericParamDefKind::Lifetime = param.kind {
tcx.lifetimes.re_erased.into()
} else {
tcx.mk_param_from_def(param)
}
}),
);
let llfn = cx.declare_fn(
tcx.symbol_name(instance).name,
cx.fn_abi_of_fn_ptr(
ty::Binder::dummy(tcx.mk_fn_sig(
[Ty::new_unit(tcx)],
Ty::new_unit(tcx),
false,
hir::Unsafety::Unsafe,
Abi::Rust,
)),
ty::List::empty(),
),
None,
);
llvm::set_linkage(llfn, llvm::Linkage::PrivateLinkage);
llvm::set_visibility(llfn, llvm::Visibility::Default);
assert!(cx.instances.borrow_mut().insert(instance, llfn).is_none());
instance
}
fn codegen_unused_fn_and_counter<'tcx>(cx: &CodegenCx<'_, 'tcx>, instance: Instance<'tcx>) {
let llfn = cx.get_fn(instance);
let llbb = Builder::append_block(cx, llfn, "unused_function");
let mut bx = Builder::build(cx, llbb);
let fn_name = bx.get_pgo_func_name_var(instance);
let hash = bx.const_u64(0);
let num_counters = bx.const_u32(1);
let index = bx.const_u32(u32::from(UNUSED_FUNCTION_COUNTER_ID));
debug!(
"codegen intrinsic instrprof.increment(fn_name={:?}, hash={:?}, num_counters={:?},
index={:?}) for unused function: {:?}",
fn_name, hash, num_counters, index, instance
);
bx.instrprof_increment(fn_name, hash, num_counters, index);
bx.ret_void();
}
fn add_unused_function_coverage<'tcx>(
cx: &CodegenCx<'_, 'tcx>,
instance: Instance<'tcx>,
function_coverage_info: &'tcx FunctionCoverageInfo,
) {
// An unused function's mappings will automatically be rewritten to map to
// zero, because none of its counters/expressions are marked as seen.
let function_coverage = FunctionCoverage::unused(instance, function_coverage_info);
if let Some(coverage_context) = cx.coverage_context() {
coverage_context.function_coverage_map.borrow_mut().insert(instance, function_coverage);
} else {
bug!("Could not get the `coverage_context`");
}
}
/// Calls llvm::createPGOFuncNameVar() with the given function instance's /// Calls llvm::createPGOFuncNameVar() with the given function instance's
/// mangled function name. The LLVM API returns an llvm::GlobalVariable /// mangled function name. The LLVM API returns an llvm::GlobalVariable
/// containing the function name, with the specific variable name and linkage /// containing the function name, with the specific variable name and linkage

View File

@ -1882,12 +1882,6 @@ rustc_queries! {
desc { |tcx| "determining whether `{}` needs codegen", tcx.def_path_str(def_id) } desc { |tcx| "determining whether `{}` needs codegen", tcx.def_path_str(def_id) }
} }
/// All items participating in code generation together with items inlined into them.
query codegened_and_inlined_items(_: ()) -> &'tcx DefIdSet {
eval_always
desc { "collecting codegened and inlined items" }
}
query codegen_unit(sym: Symbol) -> &'tcx CodegenUnit<'tcx> { query codegen_unit(sym: Symbol) -> &'tcx CodegenUnit<'tcx> {
desc { "getting codegen unit `{sym}`" } desc { "getting codegen unit `{sym}`" }
} }

View File

@ -105,7 +105,6 @@ use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE};
use rustc_hir::definitions::DefPathDataName; use rustc_hir::definitions::DefPathDataName;
use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags; use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel}; use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
use rustc_middle::mir;
use rustc_middle::mir::mono::{ use rustc_middle::mir::mono::{
CodegenUnit, CodegenUnitNameBuilder, InstantiationMode, Linkage, MonoItem, MonoItemData, CodegenUnit, CodegenUnitNameBuilder, InstantiationMode, Linkage, MonoItem, MonoItemData,
Visibility, Visibility,
@ -1279,38 +1278,8 @@ fn dump_mono_items_stats<'tcx>(
Ok(()) Ok(())
} }
fn codegened_and_inlined_items(tcx: TyCtxt<'_>, (): ()) -> &DefIdSet {
let (items, cgus) = tcx.collect_and_partition_mono_items(());
let mut visited = DefIdSet::default();
let mut result = items.clone();
for cgu in cgus {
for item in cgu.items().keys() {
if let MonoItem::Fn(ref instance) = item {
let did = instance.def_id();
if !visited.insert(did) {
continue;
}
let body = tcx.instance_mir(instance.def);
for block in body.basic_blocks.iter() {
for statement in &block.statements {
let mir::StatementKind::Coverage(_) = statement.kind else { continue };
let scope = statement.source_info.scope;
if let Some(inlined) = scope.inlined_instance(&body.source_scopes) {
result.insert(inlined.def_id());
}
}
}
}
}
}
tcx.arena.alloc(result)
}
pub fn provide(providers: &mut Providers) { pub fn provide(providers: &mut Providers) {
providers.collect_and_partition_mono_items = collect_and_partition_mono_items; providers.collect_and_partition_mono_items = collect_and_partition_mono_items;
providers.codegened_and_inlined_items = codegened_and_inlined_items;
providers.is_codegened_item = |tcx, def_id| { providers.is_codegened_item = |tcx, def_id| {
let (all_mono_items, _) = tcx.collect_and_partition_mono_items(()); let (all_mono_items, _) = tcx.collect_and_partition_mono_items(());