Auto merge of #76830 - Artoria2e5:tune, r=nagisa

Pass tune-cpu to LLVM I think this is how it should work... See https://internals.rust-lang.org/t/expose-tune-cpu-from-llvm/13088 for the background. Or the documentation diff.
2024-11-22 14:55:26 +00:00 · 2020-10-13 02:49:00 +00:00 · 2020-10-13 02:49:00 +00:00 · f54072bb81
commit f54072bb81
parent afb4514c09 a35a93f09c
9 changed files with 80 additions and 8 deletions
--- a/compiler/rustc_codegen_llvm/src/attributes.rs
+++ b/compiler/rustc_codegen_llvm/src/attributes.rs
@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
    );
 }
 pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
    if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
        let tune_cpu = SmallCStr::new(tune);
        llvm::AddFunctionAttrStringValue(
            llfn,
            llvm::AttributePlace::Function,
            const_cstr!("tune-cpu"),
            tune_cpu.as_c_str(),
        );
    }
 }
 /// Sets the `NonLazyBind` LLVM attribute on a given function,
 /// assuming the codegen options allow skipping the PLT.
 pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
@ -303,6 +315,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
    // Without this, ThinLTO won't inline Rust functions into Clang generated
    // functions (because Clang annotates functions this way too).
    apply_target_cpu_attr(cx, llfn);
    // tune-cpu is only conveyed through the attribute for our purpose.
    // The target doesn't care; the subtarget reads our attribute.
    apply_tune_cpu_attr(cx, llfn);
    let features = llvm_target_features(cx.tcx.sess)
        .map(|s| s.to_string())
--- a/compiler/rustc_codegen_llvm/src/context.rs
+++ b/compiler/rustc_codegen_llvm/src/context.rs
@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
    }
    fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
-        attributes::apply_target_cpu_attr(self, llfn)
+        attributes::apply_target_cpu_attr(self, llfn);
        attributes::apply_tune_cpu_attr(self, llfn);
    }
    fn create_used_variable(&self) {
--- a/compiler/rustc_codegen_llvm/src/lib.rs
+++ b/compiler/rustc_codegen_llvm/src/lib.rs
@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
    fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
        llvm_util::target_cpu(sess)
    }
    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
        llvm_util::tune_cpu(sess)
    }
 }
 impl WriteBackendMethods for LlvmCodegenBackend {
--- a/compiler/rustc_codegen_llvm/src/llvm_util.rs
+++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs
@ -202,11 +202,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
    }
 }
-pub fn target_cpu(sess: &Session) -> &str {
+fn handle_native(name: &str) -> &str {
    let name = match sess.opts.cg.target_cpu {
        Some(ref s) => &**s,
        None => &*sess.target.target.options.cpu,
    };
    if name != "native" {
        return name;
    }
@ -217,3 +213,19 @@ pub fn target_cpu(sess: &Session) -> &str {
        str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
    }
 }
 pub fn target_cpu(sess: &Session) -> &str {
    let name = match sess.opts.cg.target_cpu {
        Some(ref s) => &**s,
        None => &*sess.target.target.options.cpu,
    };
    handle_native(name)
 }
 pub fn tune_cpu(sess: &Session) -> Option<&str> {
    match sess.opts.debugging_opts.tune_cpu {
        Some(ref s) => Some(handle_native(&**s)),
        None => None,
    }
 }
--- a/compiler/rustc_codegen_ssa/src/traits/backend.rs
+++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs
@ -124,4 +124,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
        opt_level: config::OptLevel,
    ) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
    fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
    fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
 }
--- a/compiler/rustc_interface/src/tests.rs
+++ b/compiler/rustc_interface/src/tests.rs
@ -585,6 +585,7 @@ fn test_debugging_options_tracking_hash() {
    tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
    tracked!(teach, true);
    tracked!(thinlto, Some(true));
    tracked!(tune_cpu, Some(String::from("abc")));
    tracked!(tls_model, Some(TlsModel::GeneralDynamic));
    tracked!(treat_err_as_bug, Some(1));
    tracked!(unleash_the_miri_inside_of_you, true);
--- a/compiler/rustc_session/src/options.rs
+++ b/compiler/rustc_session/src/options.rs
@ -1078,6 +1078,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
        "show extended diagnostic help (default: no)"),
    terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
        "set the current terminal width"),
    tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
        "select processor to schedule for (`rustc --print target-cpus` for details)"),
    thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
        "enable ThinLTO when possible"),
    // We default to 1 here since we want to behave like
--- a/src/doc/rustc/src/codegen-options/index.md
+++ b/src/doc/rustc/src/codegen-options/index.md
@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
 This instructs `rustc` to generate code specifically for a particular processor.
 You can run `rustc --print target-cpus` to see the valid options to pass
-here. Additionally, `native` can be passed to use the processor of the host
+here. Each target has a default base CPU. Special values include:
-machine. Each target has a default base CPU.
+
 * `native` can be passed to use the processor of the host machine. 
 * `generic` refers to an LLVM target with minimal features but modern tuning.
 ## target-feature
@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
 Each target and [`target-cpu`](#target-cpu) has a default set of enabled
 features.
 ## tune-cpu
 This instructs `rustc` to schedule code specifically for a particular
 processor. This does not affect the compatibility (instruction sets or ABI),
 but should make your code slightly more efficient on the selected CPU.
 The valid options are the same as those for [`target-cpu`](#target-cpu).
 The default is `None`, which LLVM translates as the `target-cpu`.
 This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
 Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
 effective only for x86 targets.
 [option-emit]: ../command-line-arguments.md#option-emit
 [option-o-optimize]: ../command-line-arguments.md#option-o-optimize
 [profile-guided optimization]: ../profile-guided-optimization.md
--- a/src/test/codegen/tune-cpu-on-functions.rs
+++ b/src/test/codegen/tune-cpu-on-functions.rs
@ -0,0 +1,21 @@
 // This test makes sure that functions get annotated with the proper
 // "tune-cpu" attribute in LLVM.
 // no-prefer-dynamic
 // ignore-tidy-linelength
 // compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
 #![crate_type = "staticlib"]
 // CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
 #[no_mangle]
 pub extern fn exported() {
    not_exported();
 }
 // CHECK-LABEL: ; tune_cpu_on_functions::not_exported
 // CHECK-NEXT: ; Function Attrs:
 // CHECK-NEXT: define {{.*}}() {{.*}} #0
 fn not_exported() {}
 // CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"