Auto merge of #125928 - michaelwoerister:fix-cgu-hashstable, r=oli-obk

Stabilize order of MonoItems in CGUs and disallow query_instability lint for rustc_monomorphize

The HashStable impl for `CodegenUnit` was incorrect as described in [MCP 533](https://github.com/rust-lang/compiler-team/issues/533). This PR removes any indeterminism from the way codegen units are built. The changes are pretty straightforward.

Part of https://github.com/rust-lang/rust/issues/84447 and [MCP 533](https://github.com/rust-lang/compiler-team/issues/533).
This commit is contained in:
bors 2024-06-07 04:02:59 +00:00
commit b74702fbb2
4 changed files with 74 additions and 74 deletions

View File

@ -5,9 +5,9 @@ use rustc_data_structures::base_n::BaseNString;
use rustc_data_structures::base_n::ToBaseN; use rustc_data_structures::base_n::ToBaseN;
use rustc_data_structures::base_n::CASE_INSENSITIVE; use rustc_data_structures::base_n::CASE_INSENSITIVE;
use rustc_data_structures::fingerprint::Fingerprint; use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::fx::FxIndexMap; use rustc_data_structures::fx::FxIndexMap;
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher}; use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher, ToStableHashKey};
use rustc_data_structures::unord::UnordMap;
use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE}; use rustc_hir::def_id::{CrateNum, DefId, LOCAL_CRATE};
use rustc_hir::ItemId; use rustc_hir::ItemId;
use rustc_index::Idx; use rustc_index::Idx;
@ -241,7 +241,17 @@ impl<'tcx> fmt::Display for MonoItem<'tcx> {
} }
} }
#[derive(Debug)] impl ToStableHashKey<StableHashingContext<'_>> for MonoItem<'_> {
type KeyType = Fingerprint;
fn to_stable_hash_key(&self, hcx: &StableHashingContext<'_>) -> Self::KeyType {
let mut hasher = StableHasher::new();
self.hash_stable(&mut hcx.clone(), &mut hasher);
hasher.finish()
}
}
#[derive(Debug, HashStable)]
pub struct CodegenUnit<'tcx> { pub struct CodegenUnit<'tcx> {
/// A name for this CGU. Incremental compilation requires that /// A name for this CGU. Incremental compilation requires that
/// name be unique amongst **all** crates. Therefore, it should /// name be unique amongst **all** crates. Therefore, it should
@ -430,38 +440,19 @@ impl<'tcx> CodegenUnit<'tcx> {
} }
} }
impl<'a, 'tcx> HashStable<StableHashingContext<'a>> for CodegenUnit<'tcx> { impl ToStableHashKey<StableHashingContext<'_>> for CodegenUnit<'_> {
fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) { type KeyType = String;
let CodegenUnit {
ref items,
name,
// The size estimate is not relevant to the hash
size_estimate: _,
primary: _,
is_code_coverage_dead_code_cgu,
} = *self;
name.hash_stable(hcx, hasher); fn to_stable_hash_key(&self, _: &StableHashingContext<'_>) -> Self::KeyType {
is_code_coverage_dead_code_cgu.hash_stable(hcx, hasher); // Codegen unit names are conceptually required to be stable across
// compilation session so that object file names match up.
let mut items: Vec<(Fingerprint, _)> = items self.name.to_string()
.iter()
.map(|(mono_item, &attrs)| {
let mut hasher = StableHasher::new();
mono_item.hash_stable(hcx, &mut hasher);
let mono_item_fingerprint = hasher.finish();
(mono_item_fingerprint, attrs)
})
.collect();
items.sort_unstable_by_key(|i| i.0);
items.hash_stable(hcx, hasher);
} }
} }
pub struct CodegenUnitNameBuilder<'tcx> { pub struct CodegenUnitNameBuilder<'tcx> {
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
cache: FxHashMap<CrateNum, String>, cache: UnordMap<CrateNum, String>,
} }
impl<'tcx> CodegenUnitNameBuilder<'tcx> { impl<'tcx> CodegenUnitNameBuilder<'tcx> {

View File

@ -207,8 +207,8 @@
mod move_check; mod move_check;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_data_structures::sync::{par_for_each_in, LRef, MTLock}; use rustc_data_structures::sync::{par_for_each_in, LRef, MTLock};
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_hir as hir; use rustc_hir as hir;
use rustc_hir::def::DefKind; use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId}; use rustc_hir::def_id::{DefId, DefIdMap, LocalDefId};
@ -251,10 +251,10 @@ pub enum MonoItemCollectionStrategy {
pub struct UsageMap<'tcx> { pub struct UsageMap<'tcx> {
// Maps every mono item to the mono items used by it. // Maps every mono item to the mono items used by it.
used_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>, used_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
// Maps every mono item to the mono items that use it. // Maps every mono item to the mono items that use it.
user_map: FxHashMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>, user_map: UnordMap<MonoItem<'tcx>, Vec<MonoItem<'tcx>>>,
} }
type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>; type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
@ -262,10 +262,10 @@ type MonoItems<'tcx> = Vec<Spanned<MonoItem<'tcx>>>;
/// The state that is shared across the concurrent threads that are doing collection. /// The state that is shared across the concurrent threads that are doing collection.
struct SharedState<'tcx> { struct SharedState<'tcx> {
/// Items that have been or are currently being recursively collected. /// Items that have been or are currently being recursively collected.
visited: MTLock<FxHashSet<MonoItem<'tcx>>>, visited: MTLock<UnordSet<MonoItem<'tcx>>>,
/// Items that have been or are currently being recursively treated as "mentioned", i.e., their /// Items that have been or are currently being recursively treated as "mentioned", i.e., their
/// consts are evaluated but nothing is added to the collection. /// consts are evaluated but nothing is added to the collection.
mentioned: MTLock<FxHashSet<MonoItem<'tcx>>>, mentioned: MTLock<UnordSet<MonoItem<'tcx>>>,
/// Which items are being used where, for better errors. /// Which items are being used where, for better errors.
usage_map: MTLock<UsageMap<'tcx>>, usage_map: MTLock<UsageMap<'tcx>>,
} }
@ -290,7 +290,7 @@ enum CollectionMode {
impl<'tcx> UsageMap<'tcx> { impl<'tcx> UsageMap<'tcx> {
fn new() -> UsageMap<'tcx> { fn new() -> UsageMap<'tcx> {
UsageMap { used_map: FxHashMap::default(), user_map: FxHashMap::default() } UsageMap { used_map: Default::default(), user_map: Default::default() }
} }
fn record_used<'a>( fn record_used<'a>(
@ -668,7 +668,7 @@ struct MirUsedCollector<'a, 'tcx> {
used_items: &'a mut MonoItems<'tcx>, used_items: &'a mut MonoItems<'tcx>,
/// See the comment in `collect_items_of_instance` for the purpose of this set. /// See the comment in `collect_items_of_instance` for the purpose of this set.
/// Note that this contains *not-monomorphized* items! /// Note that this contains *not-monomorphized* items!
used_mentioned_items: &'a mut FxHashSet<MentionedItem<'tcx>>, used_mentioned_items: &'a mut UnordSet<MentionedItem<'tcx>>,
instance: Instance<'tcx>, instance: Instance<'tcx>,
visiting_call_terminator: bool, visiting_call_terminator: bool,
move_check: move_check::MoveCheckState, move_check: move_check::MoveCheckState,
@ -1272,7 +1272,7 @@ fn collect_items_of_instance<'tcx>(
// mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already // mentioned item. So instead we collect all pre-monomorphized `MentionedItem` that were already
// added to `used_items` in a hash set, which can efficiently query in the // added to `used_items` in a hash set, which can efficiently query in the
// `body.mentioned_items` loop below without even having to monomorphize the item. // `body.mentioned_items` loop below without even having to monomorphize the item.
let mut used_mentioned_items = FxHashSet::<MentionedItem<'tcx>>::default(); let mut used_mentioned_items = Default::default();
let mut collector = MirUsedCollector { let mut collector = MirUsedCollector {
tcx, tcx,
body, body,
@ -1628,10 +1628,10 @@ fn create_mono_items_for_default_impls<'tcx>(
//=----------------------------------------------------------------------------- //=-----------------------------------------------------------------------------
#[instrument(skip(tcx, strategy), level = "debug")] #[instrument(skip(tcx, strategy), level = "debug")]
pub fn collect_crate_mono_items( pub(crate) fn collect_crate_mono_items<'tcx>(
tcx: TyCtxt<'_>, tcx: TyCtxt<'tcx>,
strategy: MonoItemCollectionStrategy, strategy: MonoItemCollectionStrategy,
) -> (FxHashSet<MonoItem<'_>>, UsageMap<'_>) { ) -> (Vec<MonoItem<'tcx>>, UsageMap<'tcx>) {
let _prof_timer = tcx.prof.generic_activity("monomorphization_collector"); let _prof_timer = tcx.prof.generic_activity("monomorphization_collector");
let roots = tcx let roots = tcx
@ -1641,8 +1641,8 @@ pub fn collect_crate_mono_items(
debug!("building mono item graph, beginning at roots"); debug!("building mono item graph, beginning at roots");
let mut state = SharedState { let mut state = SharedState {
visited: MTLock::new(FxHashSet::default()), visited: MTLock::new(UnordSet::default()),
mentioned: MTLock::new(FxHashSet::default()), mentioned: MTLock::new(UnordSet::default()),
usage_map: MTLock::new(UsageMap::new()), usage_map: MTLock::new(UsageMap::new()),
}; };
let recursion_limit = tcx.recursion_limit(); let recursion_limit = tcx.recursion_limit();
@ -1665,5 +1665,11 @@ pub fn collect_crate_mono_items(
}); });
} }
(state.visited.into_inner(), state.usage_map.into_inner()) // The set of MonoItems was created in an inherently indeterministic order because
// of parallelism. We sort it here to ensure that the output is deterministic.
let mono_items = tcx.with_stable_hashing_context(move |ref hcx| {
state.visited.into_inner().into_sorted(hcx, true)
});
(mono_items, state.usage_map.into_inner())
} }

View File

@ -1,6 +1,5 @@
#![feature(array_windows)] #![feature(array_windows)]
#![feature(is_sorted)] #![feature(is_sorted)]
#![allow(rustc::potential_query_instability)]
use rustc_hir::lang_items::LangItem; use rustc_hir::lang_items::LangItem;
use rustc_middle::bug; use rustc_middle::bug;

View File

@ -98,8 +98,9 @@ use std::fs::{self, File};
use std::io::{BufWriter, Write}; use std::io::{BufWriter, Write};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_data_structures::fx::{FxIndexMap, FxIndexSet};
use rustc_data_structures::sync; use rustc_data_structures::sync;
use rustc_data_structures::unord::{UnordMap, UnordSet};
use rustc_hir::def::DefKind; use rustc_hir::def::DefKind;
use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE}; use rustc_hir::def_id::{DefId, DefIdSet, LOCAL_CRATE};
use rustc_hir::definitions::DefPathDataName; use rustc_hir::definitions::DefPathDataName;
@ -131,7 +132,7 @@ struct PlacedMonoItems<'tcx> {
/// The codegen units, sorted by name to make things deterministic. /// The codegen units, sorted by name to make things deterministic.
codegen_units: Vec<CodegenUnit<'tcx>>, codegen_units: Vec<CodegenUnit<'tcx>>,
internalization_candidates: FxHashSet<MonoItem<'tcx>>, internalization_candidates: UnordSet<MonoItem<'tcx>>,
} }
// The output CGUs are sorted by name. // The output CGUs are sorted by name.
@ -197,9 +198,9 @@ fn place_mono_items<'tcx, I>(cx: &PartitioningCx<'_, 'tcx>, mono_items: I) -> Pl
where where
I: Iterator<Item = MonoItem<'tcx>>, I: Iterator<Item = MonoItem<'tcx>>,
{ {
let mut codegen_units = FxHashMap::default(); let mut codegen_units = UnordMap::default();
let is_incremental_build = cx.tcx.sess.opts.incremental.is_some(); let is_incremental_build = cx.tcx.sess.opts.incremental.is_some();
let mut internalization_candidates = FxHashSet::default(); let mut internalization_candidates = UnordSet::default();
// Determine if monomorphizations instantiated in this crate will be made // Determine if monomorphizations instantiated in this crate will be made
// available to downstream crates. This depends on whether we are in // available to downstream crates. This depends on whether we are in
@ -209,7 +210,7 @@ where
cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics(); cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx); let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
let cgu_name_cache = &mut FxHashMap::default(); let cgu_name_cache = &mut UnordMap::default();
for mono_item in mono_items { for mono_item in mono_items {
// Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items // Handle only root (GloballyShared) items directly here. Inlined (LocalCopy) items
@ -260,7 +261,7 @@ where
// going via another root item. This includes drop-glue, functions from // going via another root item. This includes drop-glue, functions from
// external crates, and local functions the definition of which is // external crates, and local functions the definition of which is
// marked with `#[inline]`. // marked with `#[inline]`.
let mut reachable_inlined_items = FxHashSet::default(); let mut reachable_inlined_items = FxIndexSet::default();
get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items); get_reachable_inlined_items(cx.tcx, mono_item, cx.usage_map, &mut reachable_inlined_items);
// Add those inlined items. It's possible an inlined item is reachable // Add those inlined items. It's possible an inlined item is reachable
@ -284,8 +285,9 @@ where
codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name)); codegen_units.insert(cgu_name, CodegenUnit::new(cgu_name));
} }
let mut codegen_units: Vec<_> = codegen_units.into_values().collect(); let mut codegen_units: Vec<_> = cx.tcx.with_stable_hashing_context(|ref hcx| {
codegen_units.sort_by(|a, b| a.name().as_str().cmp(b.name().as_str())); codegen_units.into_items().map(|(_, cgu)| cgu).collect_sorted(hcx, true)
});
for cgu in codegen_units.iter_mut() { for cgu in codegen_units.iter_mut() {
cgu.compute_size_estimate(); cgu.compute_size_estimate();
@ -297,7 +299,7 @@ where
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
item: MonoItem<'tcx>, item: MonoItem<'tcx>,
usage_map: &UsageMap<'tcx>, usage_map: &UsageMap<'tcx>,
visited: &mut FxHashSet<MonoItem<'tcx>>, visited: &mut FxIndexSet<MonoItem<'tcx>>,
) { ) {
usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| { usage_map.for_each_inlined_used_item(tcx, item, |inlined_item| {
let is_new = visited.insert(inlined_item); let is_new = visited.insert(inlined_item);
@ -320,7 +322,7 @@ fn merge_codegen_units<'tcx>(
assert!(codegen_units.is_sorted_by(|a, b| a.name().as_str() <= b.name().as_str())); assert!(codegen_units.is_sorted_by(|a, b| a.name().as_str() <= b.name().as_str()));
// This map keeps track of what got merged into what. // This map keeps track of what got merged into what.
let mut cgu_contents: FxHashMap<Symbol, Vec<Symbol>> = let mut cgu_contents: UnordMap<Symbol, Vec<Symbol>> =
codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect(); codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name()])).collect();
// If N is the maximum number of CGUs, and the CGUs are sorted from largest // If N is the maximum number of CGUs, and the CGUs are sorted from largest
@ -422,22 +424,24 @@ fn merge_codegen_units<'tcx>(
// For CGUs that contain the code of multiple modules because of the // For CGUs that contain the code of multiple modules because of the
// merging done above, we use a concatenation of the names of all // merging done above, we use a concatenation of the names of all
// contained CGUs. // contained CGUs.
let new_cgu_names: FxHashMap<Symbol, String> = cgu_contents let new_cgu_names = UnordMap::from(
.into_iter() cgu_contents
// This `filter` makes sure we only update the name of CGUs that .items()
// were actually modified by merging. // This `filter` makes sure we only update the name of CGUs that
.filter(|(_, cgu_contents)| cgu_contents.len() > 1) // were actually modified by merging.
.map(|(current_cgu_name, cgu_contents)| { .filter(|(_, cgu_contents)| cgu_contents.len() > 1)
let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| s.as_str()).collect(); .map(|(current_cgu_name, cgu_contents)| {
let mut cgu_contents: Vec<&str> =
cgu_contents.iter().map(|s| s.as_str()).collect();
// Sort the names, so things are deterministic and easy to // Sort the names, so things are deterministic and easy to
// predict. We are sorting primitive `&str`s here so we can // predict. We are sorting primitive `&str`s here so we can
// use unstable sort. // use unstable sort.
cgu_contents.sort_unstable(); cgu_contents.sort_unstable();
(current_cgu_name, cgu_contents.join("--")) (*current_cgu_name, cgu_contents.join("--"))
}) }),
.collect(); );
for cgu in codegen_units.iter_mut() { for cgu in codegen_units.iter_mut() {
if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) { if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) {
@ -511,7 +515,7 @@ fn compute_inlined_overlap<'tcx>(cgu1: &CodegenUnit<'tcx>, cgu2: &CodegenUnit<'t
fn internalize_symbols<'tcx>( fn internalize_symbols<'tcx>(
cx: &PartitioningCx<'_, 'tcx>, cx: &PartitioningCx<'_, 'tcx>,
codegen_units: &mut [CodegenUnit<'tcx>], codegen_units: &mut [CodegenUnit<'tcx>],
internalization_candidates: FxHashSet<MonoItem<'tcx>>, internalization_candidates: UnordSet<MonoItem<'tcx>>,
) { ) {
/// For symbol internalization, we need to know whether a symbol/mono-item /// For symbol internalization, we need to know whether a symbol/mono-item
/// is used from outside the codegen unit it is defined in. This type is /// is used from outside the codegen unit it is defined in. This type is
@ -522,7 +526,7 @@ fn internalize_symbols<'tcx>(
MultipleCgus, MultipleCgus,
} }
let mut mono_item_placements = FxHashMap::default(); let mut mono_item_placements = UnordMap::default();
let single_codegen_unit = codegen_units.len() == 1; let single_codegen_unit = codegen_units.len() == 1;
if !single_codegen_unit { if !single_codegen_unit {
@ -739,7 +743,7 @@ fn mono_item_linkage_and_visibility<'tcx>(
(Linkage::External, vis) (Linkage::External, vis)
} }
type CguNameCache = FxHashMap<(DefId, bool), Symbol>; type CguNameCache = UnordMap<(DefId, bool), Symbol>;
fn static_visibility<'tcx>( fn static_visibility<'tcx>(
tcx: TyCtxt<'tcx>, tcx: TyCtxt<'tcx>,
@ -932,7 +936,7 @@ fn debug_dump<'a, 'tcx: 'a>(tcx: TyCtxt<'tcx>, label: &str, cgus: &[CodegenUnit<
// //
// Also, unreached inlined items won't be counted here. This is fine. // Also, unreached inlined items won't be counted here. This is fine.
let mut inlined_items = FxHashSet::default(); let mut inlined_items = UnordSet::default();
let mut root_items = 0; let mut root_items = 0;
let mut unique_inlined_items = 0; let mut unique_inlined_items = 0;
@ -1164,7 +1168,7 @@ fn collect_and_partition_mono_items(tcx: TyCtxt<'_>, (): ()) -> (&DefIdSet, &[Co
} }
if tcx.sess.opts.unstable_opts.print_mono_items.is_some() { if tcx.sess.opts.unstable_opts.print_mono_items.is_some() {
let mut item_to_cgus: FxHashMap<_, Vec<_>> = Default::default(); let mut item_to_cgus: UnordMap<_, Vec<_>> = Default::default();
for cgu in codegen_units { for cgu in codegen_units {
for (&mono_item, &data) in cgu.items() { for (&mono_item, &data) in cgu.items() {
@ -1240,7 +1244,7 @@ fn dump_mono_items_stats<'tcx>(
let mut file = BufWriter::new(file); let mut file = BufWriter::new(file);
// Gather instantiated mono items grouped by def_id // Gather instantiated mono items grouped by def_id
let mut items_per_def_id: FxHashMap<_, Vec<_>> = Default::default(); let mut items_per_def_id: FxIndexMap<_, Vec<_>> = Default::default();
for cgu in codegen_units { for cgu in codegen_units {
cgu.items() cgu.items()
.keys() .keys()