From d6156e8fe5619143c687983d3ffa5b7ccc37c77e Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Mon, 8 Jun 2020 09:02:57 -0700 Subject: [PATCH] Change how compiler-builtins gets many CGUs This commit intends to fix an accidental regression from #70846. The goal of #70846 was to build compiler-builtins with a maximal number of CGUs to ensure that each module in the source corresponds to an object file. This high degree of control for compiler-builtins is desirable to ensure that there's at most one exported symbol per CGU, ideally enabling compiler-builtins to not conflict with the system libgcc as often. In #70846, however, only part of the compiler understands that compiler-builtins is built with many CGUs. The rest of the compiler thinks it's building with `sess.codegen_units()`. Notably the calculation of `sess.lto()` consults `sess.codegen_units()`, which when there's only one CGU it disables ThinLTO. This means that compiler-builtins is built without ThinLTO, which is quite harmful to performance! This is the root of the cause from #73135 where intrinsics were found to not be inlining trivial functions. The fix applied in this commit is to remove the special-casing of compiler-builtins in the compiler. Instead the build system is now responsible for special-casing compiler-builtins. It doesn't know exactly how many CGUs will be needed but it passes a large number that is assumed to be much greater than the number of source-level modules needed. After reading the various locations in the compiler source, this seemed like the best solution rather than adding more and more special casing in the compiler for compiler-builtins. Closes #73135 --- Cargo.toml | 13 ++++++ src/librustc_mir/monomorphize/partitioning.rs | 9 +---- .../partitioning/compiler-builtins.rs | 40 ------------------- 3 files changed, 14 insertions(+), 48 deletions(-) delete mode 100644 src/test/codegen-units/partitioning/compiler-builtins.rs diff --git a/Cargo.toml b/Cargo.toml index f2177a99a9b..f10d539d829 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,19 @@ debug-assertions = false debug = false debug-assertions = false +[profile.release.package.compiler_builtins] +# For compiler-builtins we always use a high number of codegen units. +# The goal here is to place every single intrinsic into its own object +# file to avoid symbol clashes with the system libgcc if possible. Note +# that this number doesn't actually produce this many object files, we +# just don't create more than this number of object files. +# +# It's a bit of a bummer that we have to pass this here, unfortunately. +# Ideally this would be specified through an env var to Cargo so Cargo +# knows how many CGUs are for this specific crate, but for now +# per-crate configuration isn't specifiable in the environment. +codegen-units = 10000 + # We want the RLS to use the version of Cargo that we've got vendored in this # repository to ensure that the same exact version of Cargo is used by both the # RLS and the Cargo binary itself. The RLS depends on Cargo as a git repository diff --git a/src/librustc_mir/monomorphize/partitioning.rs b/src/librustc_mir/monomorphize/partitioning.rs index db1ea72c0a5..a945c1d626a 100644 --- a/src/librustc_mir/monomorphize/partitioning.rs +++ b/src/librustc_mir/monomorphize/partitioning.rs @@ -454,18 +454,11 @@ fn default_visibility(tcx: TyCtxt<'_>, id: DefId, is_generic: bool) -> Visibilit fn merge_codegen_units<'tcx>( tcx: TyCtxt<'tcx>, initial_partitioning: &mut PreInliningPartitioning<'tcx>, - mut target_cgu_count: usize, + target_cgu_count: usize, ) { assert!(target_cgu_count >= 1); let codegen_units = &mut initial_partitioning.codegen_units; - if tcx.is_compiler_builtins(LOCAL_CRATE) { - // Compiler builtins require some degree of control over how mono items - // are partitioned into compilation units. Provide it by keeping the - // original partitioning when compiling the compiler builtins crate. - target_cgu_count = codegen_units.len(); - } - // Note that at this point in time the `codegen_units` here may not be in a // deterministic order (but we know they're deterministically the same set). // We want this merging to produce a deterministic ordering of codegen units diff --git a/src/test/codegen-units/partitioning/compiler-builtins.rs b/src/test/codegen-units/partitioning/compiler-builtins.rs deleted file mode 100644 index 25195743b04..00000000000 --- a/src/test/codegen-units/partitioning/compiler-builtins.rs +++ /dev/null @@ -1,40 +0,0 @@ -// Verifies that during compiler_builtins compilation the codegen units are kept -// unmerged. Even when only a single codegen unit is requested with -Ccodegen-units=1. -// -// compile-flags: -Zprint-mono-items=eager -Ccodegen-units=1 - -#![compiler_builtins] -#![crate_type="lib"] -#![feature(compiler_builtins)] - -mod atomics { - //~ MONO_ITEM fn compiler_builtins::atomics[0]::sync_1[0] @@ compiler_builtins-cgu.0[External] - #[no_mangle] - pub extern "C" fn sync_1() {} - - //~ MONO_ITEM fn compiler_builtins::atomics[0]::sync_2[0] @@ compiler_builtins-cgu.0[External] - #[no_mangle] - pub extern "C" fn sync_2() {} - - //~ MONO_ITEM fn compiler_builtins::atomics[0]::sync_3[0] @@ compiler_builtins-cgu.0[External] - #[no_mangle] - pub extern "C" fn sync_3() {} -} - -mod x { - //~ MONO_ITEM fn compiler_builtins::x[0]::x[0] @@ compiler_builtins-cgu.1[External] - #[no_mangle] - pub extern "C" fn x() {} -} - -mod y { - //~ MONO_ITEM fn compiler_builtins::y[0]::y[0] @@ compiler_builtins-cgu.2[External] - #[no_mangle] - pub extern "C" fn y() {} -} - -mod z { - //~ MONO_ITEM fn compiler_builtins::z[0]::z[0] @@ compiler_builtins-cgu.3[External] - #[no_mangle] - pub extern "C" fn z() {} -}