Share inline(never) generics across crates

This reduces code sizes and better respects programmer intent when marking inline(never). Previously such a marking was essentially ignored for generic functions, as we'd still inline them in remote crates.
2024-12-04 04:39:16 +00:00 · 2024-11-23 13:19:17 -05:00 · 2024-11-23 13:19:17 -05:00 · 4a216a25d1
commit 4a216a25d1
parent 39cb3386dd
26 changed files with 127 additions and 52 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -4137,6 +4137,7 @@ name = "rustc_monomorphize"
 version = "0.0.0"
 dependencies = [
 "rustc_abi",
+ "rustc_attr",
 "rustc_data_structures",
 "rustc_errors",
 "rustc_fluent_macro",
--- a/compiler/rustc_codegen_llvm/src/callee.rs
+++ b/compiler/rustc_codegen_llvm/src/callee.rs
@ -104,7 +104,10 @@ pub(crate) fn get_fn<'ll, 'tcx>(cx: &CodegenCx<'ll, 'tcx>, instance: Instance<'t

            let is_hidden = if is_generic {
                // This is a monomorphization of a generic function.
-                if !cx.tcx.sess.opts.share_generics() {
+                if !(cx.tcx.sess.opts.share_generics()
+                    || tcx.codegen_fn_attrs(instance_def_id).inline
+                        == rustc_attr::InlineAttr::Never)
+                {
                    // When not sharing generics, all instances are in the same
                    // crate and have hidden visibility.
                    true
--- a/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
+++ b/compiler/rustc_codegen_ssa/src/back/symbol_export.rs
@ -282,7 +282,7 @@ fn exported_symbols_provider_local(
        }));
    }

-    if tcx.sess.opts.share_generics() && tcx.local_crate_exports_generics() {
+    if tcx.local_crate_exports_generics() {
        use rustc_middle::mir::mono::{Linkage, MonoItem, Visibility};
        use rustc_middle::ty::InstanceKind;

@ -310,6 +310,16 @@ fn exported_symbols_provider_local(
                continue;
            }

+            if !tcx.sess.opts.share_generics() {
+                if tcx.codegen_fn_attrs(mono_item.def_id()).inline == rustc_attr::InlineAttr::Never
+                {
+                    // this is OK, we explicitly allow sharing inline(never) across crates even
+                    // without share-generics.
+                } else {
+                    continue;
+                }
+            }
+
            match *mono_item {
                MonoItem::Fn(Instance { def: InstanceKind::Item(def), args }) => {
                    if args.non_erasable_generics().next().is_some() {
--- a/compiler/rustc_middle/src/mir/mono.rs
+++ b/compiler/rustc_middle/src/mir/mono.rs
@ -111,6 +111,13 @@ impl<'tcx> MonoItem<'tcx> {
                    return InstantiationMode::GloballyShared { may_conflict: false };
                }

+                if let InlineAttr::Never = tcx.codegen_fn_attrs(instance.def_id()).inline
+                    && self.is_generic_fn()
+                {
+                    // Upgrade inline(never) to a globally shared instance.
+                    return InstantiationMode::GloballyShared { may_conflict: true };
+                }
+
                // At this point we don't have explicit linkage and we're an
                // inlined function. If we're inlining into all CGUs then we'll
                // be creating a local copy per CGU.
--- a/compiler/rustc_middle/src/ty/context.rs
+++ b/compiler/rustc_middle/src/ty/context.rs
@ -1946,8 +1946,6 @@ impl<'tcx> TyCtxt<'tcx> {

    #[inline]
    pub fn local_crate_exports_generics(self) -> bool {
-        debug_assert!(self.sess.opts.share_generics());
-
        self.crate_types().iter().any(|crate_type| {
            match crate_type {
                CrateType::Executable
--- a/compiler/rustc_middle/src/ty/instance.rs
+++ b/compiler/rustc_middle/src/ty/instance.rs
@ -190,19 +190,23 @@ impl<'tcx> Instance<'tcx> {
    /// This method already takes into account the global `-Zshare-generics`
    /// setting, always returning `None` if `share-generics` is off.
    pub fn upstream_monomorphization(&self, tcx: TyCtxt<'tcx>) -> Option<CrateNum> {
-        // If we are not in share generics mode, we don't link to upstream
-        // monomorphizations but always instantiate our own internal versions
-        // instead.
-        if !tcx.sess.opts.share_generics() {
-            return None;
-        }
-
        // If this is an item that is defined in the local crate, no upstream
        // crate can know about it/provide a monomorphization.
        if self.def_id().is_local() {
            return None;
        }

+        // If we are not in share generics mode, we don't link to upstream
+        // monomorphizations but always instantiate our own internal versions
+        // instead.
+        if !tcx.sess.opts.share_generics()
+            // However, if the def_id is marked inline(never), then it's fine to just reuse the
+            // upstream monomorphization.
+            && tcx.codegen_fn_attrs(self.def_id()).inline != rustc_attr::InlineAttr::Never
+        {
+            return None;
+        }
+
        // If this a non-generic instance, it cannot be a shared monomorphization.
        self.args.non_erasable_generics().next()?;

--- a/compiler/rustc_monomorphize/Cargo.toml
+++ b/compiler/rustc_monomorphize/Cargo.toml
@ -6,6 +6,7 @@ edition = "2021"
 [dependencies]
 # tidy-alphabetical-start
 rustc_abi = { path = "../rustc_abi" }
+rustc_attr = { path = "../rustc_attr" }
 rustc_data_structures = { path = "../rustc_data_structures" }
 rustc_errors = { path = "../rustc_errors" }
 rustc_fluent_macro = { path = "../rustc_fluent_macro" }
--- a/compiler/rustc_monomorphize/src/partitioning.rs
+++ b/compiler/rustc_monomorphize/src/partitioning.rs
@ -208,8 +208,8 @@ where
    // available to downstream crates. This depends on whether we are in
    // share-generics mode and whether the current crate can even have
    // downstream crates.
-    let export_generics =
-        cx.tcx.sess.opts.share_generics() && cx.tcx.local_crate_exports_generics();
+    let can_export_generics = cx.tcx.local_crate_exports_generics();
+    let always_export_generics = can_export_generics && cx.tcx.sess.opts.share_generics();

    let cgu_name_builder = &mut CodegenUnitNameBuilder::new(cx.tcx);
    let cgu_name_cache = &mut UnordMap::default();
@ -249,7 +249,8 @@ where
            cx.tcx,
            &mono_item,
            &mut can_be_internalized,
-            export_generics,
+            can_export_generics,
+            always_export_generics,
        );
        if visibility == Visibility::Hidden && can_be_internalized {
            internalization_candidates.insert(mono_item);
@ -739,12 +740,19 @@ fn mono_item_linkage_and_visibility<'tcx>(
    tcx: TyCtxt<'tcx>,
    mono_item: &MonoItem<'tcx>,
    can_be_internalized: &mut bool,
-    export_generics: bool,
+    can_export_generics: bool,
+    always_export_generics: bool,
 ) -> (Linkage, Visibility) {
    if let Some(explicit_linkage) = mono_item.explicit_linkage(tcx) {
        return (explicit_linkage, Visibility::Default);
    }
-    let vis = mono_item_visibility(tcx, mono_item, can_be_internalized, export_generics);
+    let vis = mono_item_visibility(
+        tcx,
+        mono_item,
+        can_be_internalized,
+        can_export_generics,
+        always_export_generics,
+    );
    (Linkage::External, vis)
 }

@ -767,7 +775,8 @@ fn mono_item_visibility<'tcx>(
    tcx: TyCtxt<'tcx>,
    mono_item: &MonoItem<'tcx>,
    can_be_internalized: &mut bool,
-    export_generics: bool,
+    can_export_generics: bool,
+    always_export_generics: bool,
 ) -> Visibility {
    let instance = match mono_item {
        // This is pretty complicated; see below.
@ -826,7 +835,11 @@ fn mono_item_visibility<'tcx>(

    // Upstream `DefId` instances get different handling than local ones.
    let Some(def_id) = def_id.as_local() else {
-        return if export_generics && is_generic {
+        return if is_generic
+            && (always_export_generics
+                || (can_export_generics
+                    && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never))
+        {
            // If it is an upstream monomorphization and we export generics, we must make
            // it available to downstream crates.
            *can_be_internalized = false;
@ -837,7 +850,10 @@ fn mono_item_visibility<'tcx>(
    };

    if is_generic {
-        if export_generics {
+        if always_export_generics
+            || (can_export_generics
+                && tcx.codegen_fn_attrs(def_id).inline == rustc_attr::InlineAttr::Never)
+        {
            if tcx.is_unreachable_local_definition(def_id) {
                // This instance cannot be used from another crate.
                Visibility::Hidden
--- a/library/alloc/src/raw_vec.rs
+++ b/library/alloc/src/raw_vec.rs
@ -757,7 +757,9 @@ impl<A: Allocator> RawVecInner<A> {
    }
 }

-#[inline(never)]
+// not marked inline(never) since we want optimizers to be able to observe the specifics of this
+// function, see tests/codegen/vec-reserve-extend.rs.
+#[cold]
 fn finish_grow<A>(
    new_layout: Layout,
    current_memory: Option<(NonNull<u8>, Layout)>,
--- a/library/std/src/lib.rs
+++ b/library/std/src/lib.rs
@ -362,6 +362,7 @@
 #![feature(strict_provenance_atomic_ptr)]
 #![feature(sync_unsafe_cell)]
 #![feature(ub_checks)]
+#![feature(used_with_arg)]
 // tidy-alphabetical-end
 //
 // Library features (alloc):
--- a/library/std/src/panicking.rs
+++ b/library/std/src/panicking.rs
@ -27,6 +27,22 @@ use crate::sys::backtrace;
 use crate::sys::stdio::panic_output;
 use crate::{fmt, intrinsics, process, thread};

+// This forces codegen of the function called by panic!() inside the std crate, rather than in
+// downstream crates. Primarily this is useful for rustc's codegen tests, which rely on noticing
+// complete removal of panic from generated IR. Since begin_panic is inline(never), it's only
+// codegen'd once per crate-graph so this pushes that to std rather than our codegen test crates.
+//
+// (See https://github.com/rust-lang/rust/pull/123244 for more info on why).
+//
+// If this is causing problems we can also modify those codegen tests to use a crate type like
+// cdylib which doesn't export "Rust" symbols to downstream linkage units.
+#[unstable(feature = "libstd_sys_internals", reason = "used by the panic! macro", issue = "none")]
+#[doc(hidden)]
+#[allow(dead_code)]
+#[used(compiler)]
+pub static EMPTY_PANIC: fn(&'static str) -> ! =
+    begin_panic::<&'static str> as fn(&'static str) -> !;
+
 // Binary interface to the panic runtime that the standard library depends on.
 //
 // The standard library is tagged with `#![needs_panic_runtime]` (introduced in
--- a/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs
+++ b/tests/codegen-units/partitioning/auxiliary/cgu_generic_function.rs
@ -11,10 +11,21 @@ pub fn foo<T>(x: T) -> (T, u32, i8) {
 #[inline(never)]
 fn bar<T>(x: T) -> (T, Struct) {
    let _ = not_exported_and_not_generic(0);
+    exported_and_generic::<u32>(0);
    (x, Struct(1))
 }

+pub static F: fn(u32) -> u32 = exported_and_generic::<u32>;
+
 // These should not contribute to the codegen items of other crates.
+
+// This is generic, but it's only instantiated with a u32 argument and that instantiation is present
+// in the local crate (see F above).
+#[inline(never)]
+pub fn exported_and_generic<T>(x: T) -> T {
+    x
+}
+
 #[inline(never)]
 pub fn exported_but_not_generic(x: i32) -> i64 {
    x as i64
--- a/tests/codegen/avr/avr-func-addrspace.rs
+++ b/tests/codegen/avr/avr-func-addrspace.rs
@ -86,7 +86,7 @@ pub extern "C" fn test() {

    // A call through the Fn trait must use address space 1.
    //
-    // CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait()
+    // CHECK: call{{.+}}addrspace(1) void @call_through_fn_trait({{.*}})
    call_through_fn_trait(&mut update_bar_value);

    // A call through a global variable must use address space 1.
--- a/tests/codegen/issues/issue-13018.rs
+++ b/tests/codegen/issues/issue-13018.rs
@ -2,7 +2,10 @@

 // A drop([...].clone()) sequence on an Rc should be a no-op
 // In particular, no call to __rust_dealloc should be emitted
-#![crate_type = "lib"]
+//
+// We use a cdylib since it's a leaf unit for Rust purposes, so doesn't codegen -Zshare-generics
+// code.
+#![crate_type = "cdylib"]
 use std::rc::Rc;

 pub fn foo(t: &Rc<Vec<usize>>) {
--- a/tests/run-make/naked-symbol-visibility/a_rust_dylib.rs
+++ b/tests/run-make/naked-symbol-visibility/a_rust_dylib.rs
@ -1,4 +1,4 @@
-#![feature(naked_functions, asm_const, linkage)]
+#![feature(naked_functions, linkage)]
 #![crate_type = "dylib"]

 use std::arch::naked_asm;
@ -38,7 +38,7 @@ pub extern "C" fn public_vanilla() -> u32 {

 #[naked]
 #[no_mangle]
-pub extern "C" fn public_naked() -> u32 {
+pub extern "C" fn public_naked_nongeneric() -> u32 {
    unsafe { naked_asm!("mov rax, 42", "ret") }
 }

--- a/tests/run-make/naked-symbol-visibility/rmake.rs
+++ b/tests/run-make/naked-symbol-visibility/rmake.rs
@ -17,10 +17,12 @@ fn main() {
    not_exported(&rdylib, "private_naked");

    global_function(&rdylib, "public_vanilla");
-    global_function(&rdylib, "public_naked");
+    global_function(&rdylib, "public_naked_nongeneric");

    not_exported(&rdylib, "public_vanilla_generic");
-    not_exported(&rdylib, "public_naked_generic");
+    // #[naked] functions are implicitly #[inline(never)], so they get shared regardless of
+    // -Zshare-generics.
+    global_function(&rdylib, "public_naked_generic");

    global_function(&rdylib, "vanilla_external_linkage");
    global_function(&rdylib, "naked_external_linkage");
--- a/tests/ui/panics/issue-47429-short-backtraces.rs
+++ b/tests/ui/panics/issue-47429-short-backtraces.rs
@ -6,6 +6,12 @@
 //@ check-run-results
 //@ exec-env:RUST_BACKTRACE=1

+// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
+// symbols.
+//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
+// And this is for differences between std with and without debuginfo.
+//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
+
 //@ ignore-msvc see #62897 and `backtrace-debuginfo.rs` test
 //@ ignore-android FIXME #17520
 //@ ignore-openbsd no support for libbacktrace without filename
@ -14,11 +20,6 @@
 //@ ignore-sgx no subprocess support
 //@ ignore-fuchsia Backtraces not symbolized

-// NOTE(eddyb) output differs between symbol mangling schemes
-//@ revisions: legacy v0
-//@ [legacy] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
-//@     [v0] compile-flags: -Csymbol-mangling-version=v0
-
 fn main() {
    panic!()
 }
--- a/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr
+++ b/tests/ui/panics/issue-47429-short-backtraces.legacy.run.stderr
@ -1,4 +1,4 @@
-thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5:
+thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:24:5:
 explicit panic
 stack backtrace:
   0: std::panicking::begin_panic
--- a/tests/ui/panics/issue-47429-short-backtraces.v0.run.stderr
+++ b/tests/ui/panics/issue-47429-short-backtraces.v0.run.stderr
@ -1,6 +0,0 @@
-thread 'main' panicked at $DIR/issue-47429-short-backtraces.rs:23:5:
-explicit panic
-stack backtrace:
-   0: std::panicking::begin_panic::<&str>
-   1: issue_47429_short_backtraces::main
-note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
--- a/tests/ui/panics/runtime-switch.rs
+++ b/tests/ui/panics/runtime-switch.rs
@ -6,6 +6,12 @@
 //@ check-run-results
 //@ exec-env:RUST_BACKTRACE=0

+// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
+// symbols.
+//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
+// And this is for differences between std with and without debuginfo.
+//@ normalize-stderr-test: "\n +at [^\n]+" -> ""
+
 //@ ignore-msvc see #62897 and `backtrace-debuginfo.rs` test
 //@ ignore-android FIXME #17520
 //@ ignore-openbsd no support for libbacktrace without filename
@ -14,11 +20,6 @@
 //@ ignore-sgx no subprocess support
 //@ ignore-fuchsia Backtrace not symbolized

-// NOTE(eddyb) output differs between symbol mangling schemes
-//@ revisions: legacy v0
-//@ [legacy] compile-flags: -Zunstable-options -Csymbol-mangling-version=legacy
-//@     [v0] compile-flags: -Csymbol-mangling-version=v0
-
 #![feature(panic_backtrace_config)]

 fn main() {
--- a/tests/ui/panics/runtime-switch.legacy.run.stderr
+++ b/tests/ui/panics/runtime-switch.legacy.run.stderr
@ -1,4 +1,4 @@
-thread 'main' panicked at $DIR/runtime-switch.rs:26:5:
+thread 'main' panicked at $DIR/runtime-switch.rs:27:5:
 explicit panic
 stack backtrace:
   0: std::panicking::begin_panic
--- a/tests/ui/panics/runtime-switch.v0.run.stderr
+++ b/tests/ui/panics/runtime-switch.v0.run.stderr
@ -1,6 +0,0 @@
-thread 'main' panicked at $DIR/runtime-switch.rs:26:5:
-explicit panic
-stack backtrace:
-   0: std::panicking::begin_panic::<&str>
-   1: runtime_switch::main
-note: Some details are omitted, run with `RUST_BACKTRACE=full` for a verbose backtrace.
--- a/tests/ui/panics/short-ice-remove-middle-frames-2.rs
+++ b/tests/ui/panics/short-ice-remove-middle-frames-2.rs
@ -9,6 +9,11 @@
 //@ ignore-sgx Backtraces not symbolized
 //@ ignore-fuchsia Backtraces not symbolized
 //@ ignore-msvc the `__rust_{begin,end}_short_backtrace` symbols aren't reliable.
+// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
+// symbols.
+//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
+// And this is for differences between std with and without debuginfo.
+//@ normalize-stderr-test: "\n +at [^\n]+" -> ""

 /// This test case make sure that we can have multiple pairs of `__rust_{begin,end}_short_backtrace`

--- a/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr
+++ b/tests/ui/panics/short-ice-remove-middle-frames-2.run.stderr
@ -1,4 +1,4 @@
-thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:56:5:
+thread 'main' panicked at $DIR/short-ice-remove-middle-frames-2.rs:61:5:
 debug!!!
 stack backtrace:
   0: std::panicking::begin_panic
--- a/tests/ui/panics/short-ice-remove-middle-frames.rs
+++ b/tests/ui/panics/short-ice-remove-middle-frames.rs
@ -10,6 +10,11 @@
 //@ ignore-fuchsia Backtraces not symbolized
 //@ ignore-msvc the `__rust_{begin,end}_short_backtrace` symbols aren't reliable.

+// This is needed to avoid test output differences across std being built with v0 symbols vs legacy
+// symbols.
+//@ normalize-stderr-test: "begin_panic::<&str>" -> "begin_panic"
+// And this is for differences between std with and without debuginfo.
+//@ normalize-stderr-test: "\n +at [^\n]+" -> ""

 #[inline(never)]
 fn __rust_begin_short_backtrace<T, F: FnOnce() -> T>(f: F) -> T {
--- a/tests/ui/panics/short-ice-remove-middle-frames.run.stderr
+++ b/tests/ui/panics/short-ice-remove-middle-frames.run.stderr
@ -1,4 +1,4 @@
-thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:52:5:
+thread 'main' panicked at $DIR/short-ice-remove-middle-frames.rs:57:5:
 debug!!!
 stack backtrace:
   0: std::panicking::begin_panic