mirror of
https://github.com/rust-lang/rust.git
synced 2024-11-22 14:55:26 +00:00
Auto merge of #76830 - Artoria2e5:tune, r=nagisa
Pass tune-cpu to LLVM I think this is how it should work... See https://internals.rust-lang.org/t/expose-tune-cpu-from-llvm/13088 for the background. Or the documentation diff.
This commit is contained in:
commit
f54072bb81
@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
|
||||||
|
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
|
||||||
|
let tune_cpu = SmallCStr::new(tune);
|
||||||
|
llvm::AddFunctionAttrStringValue(
|
||||||
|
llfn,
|
||||||
|
llvm::AttributePlace::Function,
|
||||||
|
const_cstr!("tune-cpu"),
|
||||||
|
tune_cpu.as_c_str(),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Sets the `NonLazyBind` LLVM attribute on a given function,
|
/// Sets the `NonLazyBind` LLVM attribute on a given function,
|
||||||
/// assuming the codegen options allow skipping the PLT.
|
/// assuming the codegen options allow skipping the PLT.
|
||||||
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
|
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
|
||||||
@ -303,6 +315,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
|
|||||||
// Without this, ThinLTO won't inline Rust functions into Clang generated
|
// Without this, ThinLTO won't inline Rust functions into Clang generated
|
||||||
// functions (because Clang annotates functions this way too).
|
// functions (because Clang annotates functions this way too).
|
||||||
apply_target_cpu_attr(cx, llfn);
|
apply_target_cpu_attr(cx, llfn);
|
||||||
|
// tune-cpu is only conveyed through the attribute for our purpose.
|
||||||
|
// The target doesn't care; the subtarget reads our attribute.
|
||||||
|
apply_tune_cpu_attr(cx, llfn);
|
||||||
|
|
||||||
let features = llvm_target_features(cx.tcx.sess)
|
let features = llvm_target_features(cx.tcx.sess)
|
||||||
.map(|s| s.to_string())
|
.map(|s| s.to_string())
|
||||||
|
@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
|
fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
|
||||||
attributes::apply_target_cpu_attr(self, llfn)
|
attributes::apply_target_cpu_attr(self, llfn);
|
||||||
|
attributes::apply_tune_cpu_attr(self, llfn);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn create_used_variable(&self) {
|
fn create_used_variable(&self) {
|
||||||
|
@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
|
|||||||
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
|
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
|
||||||
llvm_util::target_cpu(sess)
|
llvm_util::target_cpu(sess)
|
||||||
}
|
}
|
||||||
|
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
|
||||||
|
llvm_util::tune_cpu(sess)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl WriteBackendMethods for LlvmCodegenBackend {
|
impl WriteBackendMethods for LlvmCodegenBackend {
|
||||||
|
@ -202,11 +202,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn target_cpu(sess: &Session) -> &str {
|
fn handle_native(name: &str) -> &str {
|
||||||
let name = match sess.opts.cg.target_cpu {
|
|
||||||
Some(ref s) => &**s,
|
|
||||||
None => &*sess.target.target.options.cpu,
|
|
||||||
};
|
|
||||||
if name != "native" {
|
if name != "native" {
|
||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
@ -217,3 +213,19 @@ pub fn target_cpu(sess: &Session) -> &str {
|
|||||||
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
|
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn target_cpu(sess: &Session) -> &str {
|
||||||
|
let name = match sess.opts.cg.target_cpu {
|
||||||
|
Some(ref s) => &**s,
|
||||||
|
None => &*sess.target.target.options.cpu,
|
||||||
|
};
|
||||||
|
|
||||||
|
handle_native(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn tune_cpu(sess: &Session) -> Option<&str> {
|
||||||
|
match sess.opts.debugging_opts.tune_cpu {
|
||||||
|
Some(ref s) => Some(handle_native(&**s)),
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -124,4 +124,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
|
|||||||
opt_level: config::OptLevel,
|
opt_level: config::OptLevel,
|
||||||
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
|
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
|
||||||
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
|
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
|
||||||
|
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
|
||||||
}
|
}
|
||||||
|
@ -585,6 +585,7 @@ fn test_debugging_options_tracking_hash() {
|
|||||||
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
|
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
|
||||||
tracked!(teach, true);
|
tracked!(teach, true);
|
||||||
tracked!(thinlto, Some(true));
|
tracked!(thinlto, Some(true));
|
||||||
|
tracked!(tune_cpu, Some(String::from("abc")));
|
||||||
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
|
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
|
||||||
tracked!(treat_err_as_bug, Some(1));
|
tracked!(treat_err_as_bug, Some(1));
|
||||||
tracked!(unleash_the_miri_inside_of_you, true);
|
tracked!(unleash_the_miri_inside_of_you, true);
|
||||||
|
@ -1078,6 +1078,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
|
|||||||
"show extended diagnostic help (default: no)"),
|
"show extended diagnostic help (default: no)"),
|
||||||
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
|
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
|
||||||
"set the current terminal width"),
|
"set the current terminal width"),
|
||||||
|
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
|
||||||
|
"select processor to schedule for (`rustc --print target-cpus` for details)"),
|
||||||
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
|
||||||
"enable ThinLTO when possible"),
|
"enable ThinLTO when possible"),
|
||||||
// We default to 1 here since we want to behave like
|
// We default to 1 here since we want to behave like
|
||||||
|
@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
|
|||||||
This instructs `rustc` to generate code specifically for a particular processor.
|
This instructs `rustc` to generate code specifically for a particular processor.
|
||||||
|
|
||||||
You can run `rustc --print target-cpus` to see the valid options to pass
|
You can run `rustc --print target-cpus` to see the valid options to pass
|
||||||
here. Additionally, `native` can be passed to use the processor of the host
|
here. Each target has a default base CPU. Special values include:
|
||||||
machine. Each target has a default base CPU.
|
|
||||||
|
* `native` can be passed to use the processor of the host machine.
|
||||||
|
* `generic` refers to an LLVM target with minimal features but modern tuning.
|
||||||
|
|
||||||
## target-feature
|
## target-feature
|
||||||
|
|
||||||
@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
|
|||||||
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
|
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
|
||||||
features.
|
features.
|
||||||
|
|
||||||
|
## tune-cpu
|
||||||
|
|
||||||
|
This instructs `rustc` to schedule code specifically for a particular
|
||||||
|
processor. This does not affect the compatibility (instruction sets or ABI),
|
||||||
|
but should make your code slightly more efficient on the selected CPU.
|
||||||
|
|
||||||
|
The valid options are the same as those for [`target-cpu`](#target-cpu).
|
||||||
|
The default is `None`, which LLVM translates as the `target-cpu`.
|
||||||
|
|
||||||
|
This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
|
||||||
|
|
||||||
|
Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
|
||||||
|
effective only for x86 targets.
|
||||||
|
|
||||||
[option-emit]: ../command-line-arguments.md#option-emit
|
[option-emit]: ../command-line-arguments.md#option-emit
|
||||||
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
|
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
|
||||||
[profile-guided optimization]: ../profile-guided-optimization.md
|
[profile-guided optimization]: ../profile-guided-optimization.md
|
||||||
|
21
src/test/codegen/tune-cpu-on-functions.rs
Normal file
21
src/test/codegen/tune-cpu-on-functions.rs
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
// This test makes sure that functions get annotated with the proper
|
||||||
|
// "tune-cpu" attribute in LLVM.
|
||||||
|
|
||||||
|
// no-prefer-dynamic
|
||||||
|
// ignore-tidy-linelength
|
||||||
|
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
|
||||||
|
|
||||||
|
#![crate_type = "staticlib"]
|
||||||
|
|
||||||
|
// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
|
||||||
|
#[no_mangle]
|
||||||
|
pub extern fn exported() {
|
||||||
|
not_exported();
|
||||||
|
}
|
||||||
|
|
||||||
|
// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
|
||||||
|
// CHECK-NEXT: ; Function Attrs:
|
||||||
|
// CHECK-NEXT: define {{.*}}() {{.*}} #0
|
||||||
|
fn not_exported() {}
|
||||||
|
|
||||||
|
// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"
|
Loading…
Reference in New Issue
Block a user