From 1e5b4594e1aad47fccd855fa54dd40a84d07dbdf Mon Sep 17 00:00:00 2001 From: Michael Woerister Date: Thu, 19 Mar 2020 17:14:02 +0100 Subject: [PATCH] Make the rustc respect the `-C codegen-units` flag in incremental mode. Before this commit `-C codegen-units` would just get silently be ignored if `-C incremental` was specified too. After this commit one can control the number of codegen units generated during incremental compilation. The default is rather high at 256, so most crates won't see a difference unless explicitly opting into a lower count. --- src/librustc_mir/monomorphize/partitioning.rs | 84 +++++++++++++------ src/librustc_session/session.rs | 7 ++ 2 files changed, 67 insertions(+), 24 deletions(-) diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs index ba01e370aae..9068c0541a4 100644 --- a/src/librustc_mir/monomorphize/partitioning.rs +++ b/src/librustc_mir/monomorphize/partitioning.rs @@ -107,19 +107,11 @@ use rustc_middle::mir::mono::{InstantiationMode, MonoItem}; use rustc_middle::ty::print::characteristic_def_id_of_type; use rustc_middle::ty::query::Providers; use rustc_middle::ty::{self, DefIdTree, InstanceDef, TyCtxt}; -use rustc_span::symbol::Symbol; +use rustc_span::symbol::{Symbol, SymbolStr}; use crate::monomorphize::collector::InliningMap; use crate::monomorphize::collector::{self, MonoItemCollectionMode}; -pub enum PartitioningStrategy { - /// Generates one codegen unit per source-level module. - PerModule, - - /// Partition the whole crate into a fixed number of codegen units. - FixedUnitCount(usize), -} - // Anything we can't find a proper codegen unit for goes into this. fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol { name_builder.build_cgu_name(LOCAL_CRATE, &["fallback"], Some("cgu")) @@ -128,7 +120,7 @@ fn fallback_cgu_name(name_builder: &mut CodegenUnitNameBuilder<'_>) -> Symbol { pub fn partition<'tcx, I>( tcx: TyCtxt<'tcx>, mono_items: I, - strategy: PartitioningStrategy, + max_cgu_count: usize, inlining_map: &InliningMap<'tcx>, ) -> Vec> where @@ -148,11 +140,10 @@ where debug_dump(tcx, "INITIAL PARTITIONING:", initial_partitioning.codegen_units.iter()); - // If the partitioning should produce a fixed count of codegen units, merge - // until that count is reached. - if let PartitioningStrategy::FixedUnitCount(count) = strategy { + // Merge until we have at most `max_cgu_count` codegen units. + { let _prof_timer = tcx.prof.generic_activity("cgu_partitioning_merge_cgus"); - merge_codegen_units(tcx, &mut initial_partitioning, count); + merge_codegen_units(tcx, &mut initial_partitioning, max_cgu_count); debug_dump(tcx, "POST MERGING:", initial_partitioning.codegen_units.iter()); } @@ -480,6 +471,10 @@ fn merge_codegen_units<'tcx>( // the stable sort below will keep everything nice and deterministic. codegen_units.sort_by_cached_key(|cgu| cgu.name().as_str()); + // This map keeps track of what got merged into what. + let mut cgu_contents: FxHashMap> = + codegen_units.iter().map(|cgu| (cgu.name(), vec![cgu.name().as_str()])).collect(); + // Merge the two smallest codegen units until the target size is reached. while codegen_units.len() > target_cgu_count { // Sort small cgus to the back @@ -487,20 +482,67 @@ fn merge_codegen_units<'tcx>( let mut smallest = codegen_units.pop().unwrap(); let second_smallest = codegen_units.last_mut().unwrap(); + // Move the mono-items from `smallest` to `second_smallest` second_smallest.modify_size_estimate(smallest.size_estimate()); for (k, v) in smallest.items_mut().drain() { second_smallest.items_mut().insert(k, v); } + + // Record that `second_smallest` now contains all the stuff that was in + // `smallest` before. + let mut consumed_cgu_names = cgu_contents.remove(&smallest.name()).unwrap(); + cgu_contents.get_mut(&second_smallest.name()).unwrap().extend(consumed_cgu_names.drain(..)); + debug!( - "CodegenUnit {} merged in to CodegenUnit {}", + "CodegenUnit {} merged into CodegenUnit {}", smallest.name(), second_smallest.name() ); } let cgu_name_builder = &mut CodegenUnitNameBuilder::new(tcx); - for (index, cgu) in codegen_units.iter_mut().enumerate() { - cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index)); + + if tcx.sess.opts.incremental.is_some() { + // If we are doing incremental compilation, we want CGU names to + // reflect the path of the source level module they correspond to. + // For CGUs that contain the code of multiple modules because of the + // merging done above, we use a concatenation of the names of + // all contained CGUs. + let new_cgu_names: FxHashMap = cgu_contents + .into_iter() + // This `filter` makes sure we only update the name of CGUs that + // were actually modified by merging. + .filter(|(_, cgu_contents)| cgu_contents.len() > 1) + .map(|(current_cgu_name, cgu_contents)| { + let mut cgu_contents: Vec<&str> = cgu_contents.iter().map(|s| &s[..]).collect(); + + // Sort the names, so things are deterministic and easy to + // predict. + cgu_contents.sort(); + + (current_cgu_name, cgu_contents.join("--")) + }) + .collect(); + + for cgu in codegen_units.iter_mut() { + if let Some(new_cgu_name) = new_cgu_names.get(&cgu.name()) { + if tcx.sess.opts.debugging_opts.human_readable_cgu_names { + cgu.set_name(Symbol::intern(&new_cgu_name)); + } else { + // If we don't require CGU names to be human-readable, we + // use a fixed length hash of the composite CGU name + // instead. + let new_cgu_name = CodegenUnit::mangle_name(&new_cgu_name); + cgu.set_name(Symbol::intern(&new_cgu_name)); + } + } + } + } else { + // If we are compiling non-incrementally we just generate simple CGU + // names containing an index. + for (index, cgu) in codegen_units.iter_mut().enumerate() { + cgu.set_name(numbered_codegen_unit_name(cgu_name_builder, index)); + } } } @@ -879,13 +921,7 @@ fn collect_and_partition_mono_items( let (codegen_units, _) = tcx.sess.time("partition_and_assert_distinct_symbols", || { sync::join( || { - let strategy = if tcx.sess.opts.incremental.is_some() { - PartitioningStrategy::PerModule - } else { - PartitioningStrategy::FixedUnitCount(tcx.sess.codegen_units()) - }; - - partition(tcx, items.iter().cloned(), strategy, &inlining_map) + partition(tcx, items.iter().cloned(), tcx.sess.codegen_units(), &inlining_map) .into_iter() .map(Arc::new) .collect::>() diff --git a/src/librustc_session/session.rs b/src/librustc_session/session.rs index b3d75143c56..7f8b55d9d76 100644 --- a/src/librustc_session/session.rs +++ b/src/librustc_session/session.rs @@ -758,6 +758,13 @@ impl Session { return n as usize; } + // If incremental compilation is turned on, we default to a high number + // codegen units in order to reduce the "collateral damage" small + // changes cause. + if self.opts.incremental.is_some() { + return 256; + } + // Why is 16 codegen units the default all the time? // // The main reason for enabling multiple codegen units by default is to