Auto merge of #76830 - Artoria2e5:tune, r=nagisa

Pass tune-cpu to LLVM

I think this is how it should work...

See https://internals.rust-lang.org/t/expose-tune-cpu-from-llvm/13088 for the background. Or the documentation diff.
This commit is contained in:
bors 2020-10-13 02:49:00 +00:00
commit f54072bb81
9 changed files with 80 additions and 8 deletions

View File

@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
); );
} }
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
let tune_cpu = SmallCStr::new(tune);
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
const_cstr!("tune-cpu"),
tune_cpu.as_c_str(),
);
}
}
/// Sets the `NonLazyBind` LLVM attribute on a given function, /// Sets the `NonLazyBind` LLVM attribute on a given function,
/// assuming the codegen options allow skipping the PLT. /// assuming the codegen options allow skipping the PLT.
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) { pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
@ -303,6 +315,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
// Without this, ThinLTO won't inline Rust functions into Clang generated // Without this, ThinLTO won't inline Rust functions into Clang generated
// functions (because Clang annotates functions this way too). // functions (because Clang annotates functions this way too).
apply_target_cpu_attr(cx, llfn); apply_target_cpu_attr(cx, llfn);
// tune-cpu is only conveyed through the attribute for our purpose.
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);
let features = llvm_target_features(cx.tcx.sess) let features = llvm_target_features(cx.tcx.sess)
.map(|s| s.to_string()) .map(|s| s.to_string())

View File

@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
} }
fn apply_target_cpu_attr(&self, llfn: &'ll Value) { fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
attributes::apply_target_cpu_attr(self, llfn) attributes::apply_target_cpu_attr(self, llfn);
attributes::apply_tune_cpu_attr(self, llfn);
} }
fn create_used_variable(&self) { fn create_used_variable(&self) {

View File

@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str { fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
llvm_util::target_cpu(sess) llvm_util::target_cpu(sess)
} }
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
llvm_util::tune_cpu(sess)
}
} }
impl WriteBackendMethods for LlvmCodegenBackend { impl WriteBackendMethods for LlvmCodegenBackend {

View File

@ -202,11 +202,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
} }
} }
pub fn target_cpu(sess: &Session) -> &str { fn handle_native(name: &str) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};
if name != "native" { if name != "native" {
return name; return name;
} }
@ -217,3 +213,19 @@ pub fn target_cpu(sess: &Session) -> &str {
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap() str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
} }
} }
pub fn target_cpu(sess: &Session) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};
handle_native(name)
}
pub fn tune_cpu(sess: &Session) -> Option<&str> {
match sess.opts.debugging_opts.tune_cpu {
Some(ref s) => Some(handle_native(&**s)),
None => None,
}
}

View File

@ -124,4 +124,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
opt_level: config::OptLevel, opt_level: config::OptLevel,
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>; ) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str; fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
} }

View File

@ -585,6 +585,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(symbol_mangling_version, SymbolManglingVersion::V0); tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
tracked!(teach, true); tracked!(teach, true);
tracked!(thinlto, Some(true)); tracked!(thinlto, Some(true));
tracked!(tune_cpu, Some(String::from("abc")));
tracked!(tls_model, Some(TlsModel::GeneralDynamic)); tracked!(tls_model, Some(TlsModel::GeneralDynamic));
tracked!(treat_err_as_bug, Some(1)); tracked!(treat_err_as_bug, Some(1));
tracked!(unleash_the_miri_inside_of_you, true); tracked!(unleash_the_miri_inside_of_you, true);

View File

@ -1078,6 +1078,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"show extended diagnostic help (default: no)"), "show extended diagnostic help (default: no)"),
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED], terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
"set the current terminal width"), "set the current terminal width"),
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
"select processor to schedule for (`rustc --print target-cpus` for details)"),
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED], thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable ThinLTO when possible"), "enable ThinLTO when possible"),
// We default to 1 here since we want to behave like // We default to 1 here since we want to behave like

View File

@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
This instructs `rustc` to generate code specifically for a particular processor. This instructs `rustc` to generate code specifically for a particular processor.
You can run `rustc --print target-cpus` to see the valid options to pass You can run `rustc --print target-cpus` to see the valid options to pass
here. Additionally, `native` can be passed to use the processor of the host here. Each target has a default base CPU. Special values include:
machine. Each target has a default base CPU.
* `native` can be passed to use the processor of the host machine.
* `generic` refers to an LLVM target with minimal features but modern tuning.
## target-feature ## target-feature
@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
Each target and [`target-cpu`](#target-cpu) has a default set of enabled Each target and [`target-cpu`](#target-cpu) has a default set of enabled
features. features.
## tune-cpu
This instructs `rustc` to schedule code specifically for a particular
processor. This does not affect the compatibility (instruction sets or ABI),
but should make your code slightly more efficient on the selected CPU.
The valid options are the same as those for [`target-cpu`](#target-cpu).
The default is `None`, which LLVM translates as the `target-cpu`.
This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
effective only for x86 targets.
[option-emit]: ../command-line-arguments.md#option-emit [option-emit]: ../command-line-arguments.md#option-emit
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize [option-o-optimize]: ../command-line-arguments.md#option-o-optimize
[profile-guided optimization]: ../profile-guided-optimization.md [profile-guided optimization]: ../profile-guided-optimization.md

View File

@ -0,0 +1,21 @@
// This test makes sure that functions get annotated with the proper
// "tune-cpu" attribute in LLVM.
// no-prefer-dynamic
// ignore-tidy-linelength
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
#![crate_type = "staticlib"]
// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
#[no_mangle]
pub extern fn exported() {
not_exported();
}
// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
// CHECK-NEXT: ; Function Attrs:
// CHECK-NEXT: define {{.*}}() {{.*}} #0
fn not_exported() {}
// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"