Pass tune-cpu to LLVM

I think this is how it should work...
This commit is contained in:
Mingye Wang 2020-09-17 17:39:26 +08:00 committed by Mingye Wang
parent 285fc7d704
commit a35a93f09c
9 changed files with 80 additions and 8 deletions

View File

@ -194,6 +194,18 @@ pub fn apply_target_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
);
}
pub fn apply_tune_cpu_attr(cx: &CodegenCx<'ll, '_>, llfn: &'ll Value) {
if let Some(tune) = llvm_util::tune_cpu(cx.tcx.sess) {
let tune_cpu = SmallCStr::new(tune);
llvm::AddFunctionAttrStringValue(
llfn,
llvm::AttributePlace::Function,
const_cstr!("tune-cpu"),
tune_cpu.as_c_str(),
);
}
}
/// Sets the `NonLazyBind` LLVM attribute on a given function,
/// assuming the codegen options allow skipping the PLT.
pub fn non_lazy_bind(sess: &Session, llfn: &'ll Value) {
@ -300,6 +312,9 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
// Without this, ThinLTO won't inline Rust functions into Clang generated
// functions (because Clang annotates functions this way too).
apply_target_cpu_attr(cx, llfn);
// tune-cpu is only conveyed through the attribute for our purpose.
// The target doesn't care; the subtarget reads our attribute.
apply_tune_cpu_attr(cx, llfn);
let features = llvm_target_features(cx.tcx.sess)
.map(|s| s.to_string())

View File

@ -417,7 +417,8 @@ impl MiscMethods<'tcx> for CodegenCx<'ll, 'tcx> {
}
fn apply_target_cpu_attr(&self, llfn: &'ll Value) {
attributes::apply_target_cpu_attr(self, llfn)
attributes::apply_target_cpu_attr(self, llfn);
attributes::apply_tune_cpu_attr(self, llfn);
}
fn create_used_variable(&self) {

View File

@ -116,6 +116,9 @@ impl ExtraBackendMethods for LlvmCodegenBackend {
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str {
llvm_util::target_cpu(sess)
}
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> {
llvm_util::tune_cpu(sess)
}
}
impl WriteBackendMethods for LlvmCodegenBackend {

View File

@ -351,11 +351,7 @@ pub(crate) fn print(req: PrintRequest, sess: &Session) {
}
}
pub fn target_cpu(sess: &Session) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};
fn handle_native(name: &str) -> &str {
if name != "native" {
return name;
}
@ -366,3 +362,19 @@ pub fn target_cpu(sess: &Session) -> &str {
str::from_utf8(slice::from_raw_parts(ptr as *const u8, len)).unwrap()
}
}
pub fn target_cpu(sess: &Session) -> &str {
let name = match sess.opts.cg.target_cpu {
Some(ref s) => &**s,
None => &*sess.target.target.options.cpu,
};
handle_native(name)
}
pub fn tune_cpu(sess: &Session) -> Option<&str> {
match sess.opts.debugging_opts.tune_cpu {
Some(ref s) => Some(handle_native(&**s)),
None => None,
}
}

View File

@ -116,4 +116,5 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se
opt_level: config::OptLevel,
) -> Arc<dyn Fn() -> Result<Self::TargetMachine, String> + Send + Sync>;
fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str;
fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>;
}

View File

@ -583,6 +583,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(symbol_mangling_version, SymbolManglingVersion::V0);
tracked!(teach, true);
tracked!(thinlto, Some(true));
tracked!(tune_cpu, Some(String::from("abc")));
tracked!(tls_model, Some(TlsModel::GeneralDynamic));
tracked!(treat_err_as_bug, Some(1));
tracked!(unleash_the_miri_inside_of_you, true);

View File

@ -1079,6 +1079,8 @@ options! {DebuggingOptions, DebuggingSetter, basic_debugging_options,
"show extended diagnostic help (default: no)"),
terminal_width: Option<usize> = (None, parse_opt_uint, [UNTRACKED],
"set the current terminal width"),
tune_cpu: Option<String> = (None, parse_opt_string, [TRACKED],
"select processor to schedule for (`rustc --print target-cpus` for details)"),
thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable ThinLTO when possible"),
// We default to 1 here since we want to behave like

View File

@ -497,8 +497,10 @@ point instructions in software. It takes one of the following values:
This instructs `rustc` to generate code specifically for a particular processor.
You can run `rustc --print target-cpus` to see the valid options to pass
here. Additionally, `native` can be passed to use the processor of the host
machine. Each target has a default base CPU.
here. Each target has a default base CPU. Special values include:
* `native` can be passed to use the processor of the host machine.
* `generic` refers to an LLVM target with minimal features but modern tuning.
## target-feature
@ -530,6 +532,20 @@ This also supports the feature `+crt-static` and `-crt-static` to control
Each target and [`target-cpu`](#target-cpu) has a default set of enabled
features.
## tune-cpu
This instructs `rustc` to schedule code specifically for a particular
processor. This does not affect the compatibility (instruction sets or ABI),
but should make your code slightly more efficient on the selected CPU.
The valid options are the same as those for [`target-cpu`](#target-cpu).
The default is `None`, which LLVM translates as the `target-cpu`.
This is an unstable option. Use `-Z tune-cpu=machine` to specify a value.
Due to limitations in LLVM (12.0.0-git9218f92), this option is currently
effective only for x86 targets.
[option-emit]: ../command-line-arguments.md#option-emit
[option-o-optimize]: ../command-line-arguments.md#option-o-optimize
[profile-guided optimization]: ../profile-guided-optimization.md

View File

@ -0,0 +1,21 @@
// This test makes sure that functions get annotated with the proper
// "tune-cpu" attribute in LLVM.
// no-prefer-dynamic
// ignore-tidy-linelength
// compile-flags: -C no-prepopulate-passes -C panic=abort -C linker-plugin-lto -Cpasses=name-anon-globals -Z tune-cpu=generic
#![crate_type = "staticlib"]
// CHECK-LABEL: define {{.*}} @exported() {{.*}} #0
#[no_mangle]
pub extern fn exported() {
not_exported();
}
// CHECK-LABEL: ; tune_cpu_on_functions::not_exported
// CHECK-NEXT: ; Function Attrs:
// CHECK-NEXT: define {{.*}}() {{.*}} #0
fn not_exported() {}
// CHECK: attributes #0 = {{.*}} "tune-cpu"="{{.*}}"