Rollup merge of #87918 - mikebenfield:pr-afdo, r=nikic

Enable AutoFDO.

This largely involves implementing the options debug-info-for-profiling
and profile-sample-use and forwarding them on to LLVM.

AutoFDO can be used on x86-64 Linux like this:
rustc -O -Clink-arg='Wl,--no-rosegment' -Cdebug-info-for-profiling main.rs -o main
perf record -b ./main
create_llvm_prof --binary=main --out=code.prof
rustc -O -Cprofile-sample-use=code.prof main.rs -o main2

Now `main2` will have feedback directed optimization applied to it.

The create_llvm_prof tool can be obtained from this github repository:
https://github.com/google/autofdo

The option -Clink-arg='Wl,--no-rosegment' is necessary to avoid lld
putting an extra RO segment before the executable code, which would make
the binary silently incompatible with create_llvm_prof.
This commit is contained in:
Jubilee 2021-10-07 20:26:09 -07:00 committed by GitHub
commit 6c2d4bf3f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 120 additions and 9 deletions

View File

@ -263,6 +263,10 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
attributes::emit_uwtable(llfn, true);
}
if cx.sess().opts.debugging_opts.profile_sample_use.is_some() {
llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile"));
}
// FIXME: none of these three functions interact with source level attributes.
set_frame_pointer_type(cx, llfn);
set_instrument_function(cx, llfn);

View File

@ -370,6 +370,13 @@ fn get_pgo_use_path(config: &ModuleConfig) -> Option<CString> {
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
}
fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option<CString> {
config
.pgo_sample_use
.as_ref()
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
}
pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool {
// The new pass manager is enabled by default for LLVM >= 13.
// This matches Clang, which also enables it since Clang 13.
@ -389,6 +396,7 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO;
// Sanitizer instrumentation is only inserted during the pre-link optimization stage.
let sanitizer_options = if !is_lto {
@ -439,6 +447,8 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
config.instrument_coverage,
config.instrument_gcov,
pgo_sample_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
config.debug_info_for_profiling,
llvm_selfprofiler,
selfprofile_before_pass_callback,
selfprofile_after_pass_callback,
@ -544,6 +554,9 @@ pub(crate) unsafe fn optimize(
if config.instrument_coverage {
llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap());
}
if config.debug_info_for_profiling {
llvm::LLVMRustAddPass(mpm, find_pass("add-discriminators").unwrap());
}
add_sanitizer_passes(config, &mut extra_passes);
@ -1001,6 +1014,7 @@ pub unsafe fn with_llvm_pmb(
let inline_threshold = config.inline_threshold;
let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
llvm::LLVMRustConfigurePassManagerBuilder(
builder,
@ -1011,6 +1025,7 @@ pub unsafe fn with_llvm_pmb(
prepare_for_thin_lto,
pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
pgo_sample_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
);
llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32);

View File

@ -2176,6 +2176,7 @@ extern "C" {
PrepareForThinLTO: bool,
PGOGenPath: *const c_char,
PGOUsePath: *const c_char,
PGOSampleUsePath: *const c_char,
);
pub fn LLVMRustAddLibraryInfo(
PM: &PassManager<'a>,
@ -2210,6 +2211,8 @@ extern "C" {
PGOUsePath: *const c_char,
InstrumentCoverage: bool,
InstrumentGCOV: bool,
PGOSampleUsePath: *const c_char,
DebugInfoForProfiling: bool,
llvm_selfprofiler: *mut c_void,
begin_callback: SelfProfileBeforePassCallback,
end_callback: SelfProfileAfterPassCallback,

View File

@ -286,6 +286,9 @@ impl<'a> GccLinker<'a> {
config::OptLevel::Aggressive => "O3",
};
if let Some(path) = &self.sess.opts.debugging_opts.profile_sample_use {
self.linker_arg(&format!("-plugin-opt=sample-profile={}", path.display()));
};
self.linker_arg(&format!("-plugin-opt={}", opt_level));
self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu));
}

View File

@ -83,6 +83,8 @@ pub struct ModuleConfig {
pub pgo_gen: SwitchWithOptPath,
pub pgo_use: Option<PathBuf>,
pub pgo_sample_use: Option<PathBuf>,
pub debug_info_for_profiling: bool,
pub instrument_coverage: bool,
pub instrument_gcov: bool,
@ -176,6 +178,8 @@ impl ModuleConfig {
SwitchWithOptPath::Disabled
),
pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None),
pgo_sample_use: if_regular!(sess.opts.debugging_opts.profile_sample_use.clone(), None),
debug_info_for_profiling: sess.opts.debugging_opts.debug_info_for_profiling,
instrument_coverage: if_regular!(sess.instrument_coverage(), false),
instrument_gcov: if_regular!(
// compiler_builtins overrides the codegen-units settings,

View File

@ -715,6 +715,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(chalk, true);
tracked!(codegen_backend, Some("abc".to_string()));
tracked!(crate_attr, vec!["abc".to_string()]);
tracked!(debug_info_for_profiling, true);
tracked!(debug_macros, true);
tracked!(dep_info_omit_d_target, true);
tracked!(dual_proc_macros, true);
@ -752,6 +753,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(profile, true);
tracked!(profile_emit, Some(PathBuf::from("abc")));
tracked!(profiler_runtime, "abc".to_string());
tracked!(profile_sample_use, Some(PathBuf::from("abc")));
tracked!(relax_elf_relocations, Some(true));
tracked!(relro_level, Some(RelroLevel::Full));
tracked!(remap_cwd_prefix, Some(PathBuf::from("abc")));

View File

@ -25,6 +25,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/LTO/LTO.h"
#include "llvm-c/Transforms/PassManagerBuilder.h"
@ -39,6 +40,7 @@
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm;
@ -523,7 +525,7 @@ extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) {
extern "C" void LLVMRustConfigurePassManagerBuilder(
LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel,
bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO,
const char* PGOGenPath, const char* PGOUsePath) {
const char* PGOGenPath, const char* PGOUsePath, const char* PGOSampleUsePath) {
unwrap(PMBR)->MergeFunctions = MergeFunctions;
unwrap(PMBR)->SLPVectorize = SLPVectorize;
unwrap(PMBR)->OptLevel = fromRust(OptLevel);
@ -531,13 +533,14 @@ extern "C" void LLVMRustConfigurePassManagerBuilder(
unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO;
if (PGOGenPath) {
assert(!PGOUsePath);
assert(!PGOUsePath && !PGOSampleUsePath);
unwrap(PMBR)->EnablePGOInstrGen = true;
unwrap(PMBR)->PGOInstrGen = PGOGenPath;
}
if (PGOUsePath) {
assert(!PGOGenPath);
} else if (PGOUsePath) {
assert(!PGOSampleUsePath);
unwrap(PMBR)->PGOInstrUse = PGOUsePath;
} else if (PGOSampleUsePath) {
unwrap(PMBR)->PGOSampleUse = PGOSampleUsePath;
}
}
@ -759,6 +762,7 @@ LLVMRustOptimizeWithNewPassManager(
LLVMRustSanitizerOptions *SanitizerOptions,
const char *PGOGenPath, const char *PGOUsePath,
bool InstrumentCoverage, bool InstrumentGCOV,
const char *PGOSampleUsePath, bool DebugInfoForProfiling,
void* LlvmSelfProfiler,
LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
LLVMRustSelfProfileAfterPassCallback AfterPassCallback,
@ -797,11 +801,19 @@ LLVMRustOptimizeWithNewPassManager(
Optional<PGOOptions> PGOOpt;
if (PGOGenPath) {
assert(!PGOUsePath);
PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr);
assert(!PGOUsePath && !PGOSampleUsePath);
PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (PGOUsePath) {
assert(!PGOGenPath);
PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse);
assert(!PGOSampleUsePath);
PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (PGOSampleUsePath) {
PGOOpt = PGOOptions(PGOSampleUsePath, "", "", PGOOptions::SampleUse,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (DebugInfoForProfiling) {
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, DebugInfoForProfiling);
}
#if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0)

View File

@ -2009,6 +2009,15 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
);
}
if debugging_opts.profile_sample_use.is_some()
&& (cg.profile_generate.enabled() || cg.profile_use.is_some())
{
early_error(
error_format,
"option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
);
}
if debugging_opts.instrument_coverage.is_some()
&& debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off)
{

View File

@ -1040,6 +1040,8 @@ options! {
"combine CGUs into a single one"),
crate_attr: Vec<String> = (Vec::new(), parse_string_push, [TRACKED],
"inject the given attribute in the crate"),
debug_info_for_profiling: bool = (false, parse_bool, [TRACKED],
"emit discriminators and other data necessary for AutoFDO"),
debug_macros: bool = (false, parse_bool, [TRACKED],
"emit line numbers debug info inside macros (default: no)"),
deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED],
@ -1242,6 +1244,8 @@ options! {
(default based on relative source path)"),
profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED],
"name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"),
profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"),
query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
"enable queries of the dependency graph for regression testing (default: no)"),
query_stats: bool = (false, parse_bool, [UNTRACKED],

View File

@ -1353,6 +1353,16 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
}
}
// Do the same for sample profile data.
if let Some(ref path) = sess.opts.debugging_opts.profile_sample_use {
if !path.exists() {
sess.err(&format!(
"File `{}` passed to `-C profile-sample-use` does not exist.",
path.display()
));
}
}
// Unwind tables cannot be disabled if the target requires them.
if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables {
if sess.target.requires_uwtable && !include_uwtables {

View File

@ -0,0 +1,35 @@
# `debug-info-for-profiling
---
## Introduction
Automatic Feedback Directed Optimization (AFDO) is a method for using sampling
based profiles to guide optimizations. This is contrasted with other methods of
FDO or profile-guided optimization (PGO) which use instrumented profiling.
Unlike PGO (controlled by the `rustc` flags `-Cprofile-generate` and
`-Cprofile-use`), a binary being profiled does not perform significantly worse,
and thus it's possible to profile binaries used in real workflows and not
necessary to construct artificial workflows.
## Use
In order to use AFDO, the target platform must be Linux running on an `x86_64`
architecture with the performance profiler `perf` available. In addition, the
external tool `create_llvm_prof` from [this repository] must be used.
Given a Rust file `main.rs`, we can produce an optimized binary as follows:
```shell
rustc -O -Zdebug-info-for-profiling main.rs -o main
perf record -b ./main
create_llvm_prof --binary=main --out=code.prof
rustc -O -Zprofile-sample-use=code.prof main.rs -o main2
```
The `perf` command produces a profile `perf.data`, which is then used by the
`create_llvm_prof` command to create `code.prof`. This final profile is then
used by `rustc` to guide optimizations in producing the binary `main2`.
[this repository]: https://github.com/google/autofdo

View File

@ -0,0 +1,10 @@
# `profile-sample-use
---
`-Zprofile-sample-use=code.prof` directs `rustc` to use the profile
`code.prof` as a source for Automatic Feedback Directed Optimization (AFDO).
See the documentation of [`-Zdebug-info-for-profiling`] for more information
on using AFDO.
[`-Zdebug-info-for-profiling`]: debug_info_for_profiling.html