Rollup merge of #87918 - mikebenfield:pr-afdo, r=nikic

Enable AutoFDO.

This largely involves implementing the options debug-info-for-profiling
and profile-sample-use and forwarding them on to LLVM.

AutoFDO can be used on x86-64 Linux like this:
rustc -O -Clink-arg='Wl,--no-rosegment' -Cdebug-info-for-profiling main.rs -o main
perf record -b ./main
create_llvm_prof --binary=main --out=code.prof
rustc -O -Cprofile-sample-use=code.prof main.rs -o main2

Now `main2` will have feedback directed optimization applied to it.

The create_llvm_prof tool can be obtained from this github repository:
https://github.com/google/autofdo

The option -Clink-arg='Wl,--no-rosegment' is necessary to avoid lld
putting an extra RO segment before the executable code, which would make
the binary silently incompatible with create_llvm_prof.
This commit is contained in:
Jubilee 2021-10-07 20:26:09 -07:00 committed by GitHub
commit 6c2d4bf3f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 120 additions and 9 deletions

View File

@ -263,6 +263,10 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty::
attributes::emit_uwtable(llfn, true); attributes::emit_uwtable(llfn, true);
} }
if cx.sess().opts.debugging_opts.profile_sample_use.is_some() {
llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile"));
}
// FIXME: none of these three functions interact with source level attributes. // FIXME: none of these three functions interact with source level attributes.
set_frame_pointer_type(cx, llfn); set_frame_pointer_type(cx, llfn);
set_instrument_function(cx, llfn); set_instrument_function(cx, llfn);

View File

@ -370,6 +370,13 @@ fn get_pgo_use_path(config: &ModuleConfig) -> Option<CString> {
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap()) .map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
} }
fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option<CString> {
config
.pgo_sample_use
.as_ref()
.map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap())
}
pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool { pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool {
// The new pass manager is enabled by default for LLVM >= 13. // The new pass manager is enabled by default for LLVM >= 13.
// This matches Clang, which also enables it since Clang 13. // This matches Clang, which also enables it since Clang 13.
@ -389,6 +396,7 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed();
let pgo_gen_path = get_pgo_gen_path(config); let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config); let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO; let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO;
// Sanitizer instrumentation is only inserted during the pre-link optimization stage. // Sanitizer instrumentation is only inserted during the pre-link optimization stage.
let sanitizer_options = if !is_lto { let sanitizer_options = if !is_lto {
@ -439,6 +447,8 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager(
pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()), pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
config.instrument_coverage, config.instrument_coverage,
config.instrument_gcov, config.instrument_gcov,
pgo_sample_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()),
config.debug_info_for_profiling,
llvm_selfprofiler, llvm_selfprofiler,
selfprofile_before_pass_callback, selfprofile_before_pass_callback,
selfprofile_after_pass_callback, selfprofile_after_pass_callback,
@ -544,6 +554,9 @@ pub(crate) unsafe fn optimize(
if config.instrument_coverage { if config.instrument_coverage {
llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap()); llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap());
} }
if config.debug_info_for_profiling {
llvm::LLVMRustAddPass(mpm, find_pass("add-discriminators").unwrap());
}
add_sanitizer_passes(config, &mut extra_passes); add_sanitizer_passes(config, &mut extra_passes);
@ -1001,6 +1014,7 @@ pub unsafe fn with_llvm_pmb(
let inline_threshold = config.inline_threshold; let inline_threshold = config.inline_threshold;
let pgo_gen_path = get_pgo_gen_path(config); let pgo_gen_path = get_pgo_gen_path(config);
let pgo_use_path = get_pgo_use_path(config); let pgo_use_path = get_pgo_use_path(config);
let pgo_sample_use_path = get_pgo_sample_use_path(config);
llvm::LLVMRustConfigurePassManagerBuilder( llvm::LLVMRustConfigurePassManagerBuilder(
builder, builder,
@ -1011,6 +1025,7 @@ pub unsafe fn with_llvm_pmb(
prepare_for_thin_lto, prepare_for_thin_lto,
pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()), pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()), pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
pgo_sample_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()),
); );
llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32); llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32);

View File

@ -2176,6 +2176,7 @@ extern "C" {
PrepareForThinLTO: bool, PrepareForThinLTO: bool,
PGOGenPath: *const c_char, PGOGenPath: *const c_char,
PGOUsePath: *const c_char, PGOUsePath: *const c_char,
PGOSampleUsePath: *const c_char,
); );
pub fn LLVMRustAddLibraryInfo( pub fn LLVMRustAddLibraryInfo(
PM: &PassManager<'a>, PM: &PassManager<'a>,
@ -2210,6 +2211,8 @@ extern "C" {
PGOUsePath: *const c_char, PGOUsePath: *const c_char,
InstrumentCoverage: bool, InstrumentCoverage: bool,
InstrumentGCOV: bool, InstrumentGCOV: bool,
PGOSampleUsePath: *const c_char,
DebugInfoForProfiling: bool,
llvm_selfprofiler: *mut c_void, llvm_selfprofiler: *mut c_void,
begin_callback: SelfProfileBeforePassCallback, begin_callback: SelfProfileBeforePassCallback,
end_callback: SelfProfileAfterPassCallback, end_callback: SelfProfileAfterPassCallback,

View File

@ -286,6 +286,9 @@ impl<'a> GccLinker<'a> {
config::OptLevel::Aggressive => "O3", config::OptLevel::Aggressive => "O3",
}; };
if let Some(path) = &self.sess.opts.debugging_opts.profile_sample_use {
self.linker_arg(&format!("-plugin-opt=sample-profile={}", path.display()));
};
self.linker_arg(&format!("-plugin-opt={}", opt_level)); self.linker_arg(&format!("-plugin-opt={}", opt_level));
self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu)); self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu));
} }

View File

@ -83,6 +83,8 @@ pub struct ModuleConfig {
pub pgo_gen: SwitchWithOptPath, pub pgo_gen: SwitchWithOptPath,
pub pgo_use: Option<PathBuf>, pub pgo_use: Option<PathBuf>,
pub pgo_sample_use: Option<PathBuf>,
pub debug_info_for_profiling: bool,
pub instrument_coverage: bool, pub instrument_coverage: bool,
pub instrument_gcov: bool, pub instrument_gcov: bool,
@ -176,6 +178,8 @@ impl ModuleConfig {
SwitchWithOptPath::Disabled SwitchWithOptPath::Disabled
), ),
pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None), pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None),
pgo_sample_use: if_regular!(sess.opts.debugging_opts.profile_sample_use.clone(), None),
debug_info_for_profiling: sess.opts.debugging_opts.debug_info_for_profiling,
instrument_coverage: if_regular!(sess.instrument_coverage(), false), instrument_coverage: if_regular!(sess.instrument_coverage(), false),
instrument_gcov: if_regular!( instrument_gcov: if_regular!(
// compiler_builtins overrides the codegen-units settings, // compiler_builtins overrides the codegen-units settings,

View File

@ -715,6 +715,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(chalk, true); tracked!(chalk, true);
tracked!(codegen_backend, Some("abc".to_string())); tracked!(codegen_backend, Some("abc".to_string()));
tracked!(crate_attr, vec!["abc".to_string()]); tracked!(crate_attr, vec!["abc".to_string()]);
tracked!(debug_info_for_profiling, true);
tracked!(debug_macros, true); tracked!(debug_macros, true);
tracked!(dep_info_omit_d_target, true); tracked!(dep_info_omit_d_target, true);
tracked!(dual_proc_macros, true); tracked!(dual_proc_macros, true);
@ -752,6 +753,7 @@ fn test_debugging_options_tracking_hash() {
tracked!(profile, true); tracked!(profile, true);
tracked!(profile_emit, Some(PathBuf::from("abc"))); tracked!(profile_emit, Some(PathBuf::from("abc")));
tracked!(profiler_runtime, "abc".to_string()); tracked!(profiler_runtime, "abc".to_string());
tracked!(profile_sample_use, Some(PathBuf::from("abc")));
tracked!(relax_elf_relocations, Some(true)); tracked!(relax_elf_relocations, Some(true));
tracked!(relro_level, Some(RelroLevel::Full)); tracked!(relro_level, Some(RelroLevel::Full));
tracked!(remap_cwd_prefix, Some(PathBuf::from("abc"))); tracked!(remap_cwd_prefix, Some(PathBuf::from("abc")));

View File

@ -25,6 +25,7 @@
#include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h"
#include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/AlwaysInliner.h"
#include "llvm/Transforms/IPO/FunctionImport.h" #include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/Utils/AddDiscriminators.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/LTO/LTO.h" #include "llvm/LTO/LTO.h"
#include "llvm-c/Transforms/PassManagerBuilder.h" #include "llvm-c/Transforms/PassManagerBuilder.h"
@ -39,6 +40,7 @@
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include "llvm/Transforms/Utils.h"
using namespace llvm; using namespace llvm;
@ -523,7 +525,7 @@ extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) {
extern "C" void LLVMRustConfigurePassManagerBuilder( extern "C" void LLVMRustConfigurePassManagerBuilder(
LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel, LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel,
bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO, bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO,
const char* PGOGenPath, const char* PGOUsePath) { const char* PGOGenPath, const char* PGOUsePath, const char* PGOSampleUsePath) {
unwrap(PMBR)->MergeFunctions = MergeFunctions; unwrap(PMBR)->MergeFunctions = MergeFunctions;
unwrap(PMBR)->SLPVectorize = SLPVectorize; unwrap(PMBR)->SLPVectorize = SLPVectorize;
unwrap(PMBR)->OptLevel = fromRust(OptLevel); unwrap(PMBR)->OptLevel = fromRust(OptLevel);
@ -531,13 +533,14 @@ extern "C" void LLVMRustConfigurePassManagerBuilder(
unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO; unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO;
if (PGOGenPath) { if (PGOGenPath) {
assert(!PGOUsePath); assert(!PGOUsePath && !PGOSampleUsePath);
unwrap(PMBR)->EnablePGOInstrGen = true; unwrap(PMBR)->EnablePGOInstrGen = true;
unwrap(PMBR)->PGOInstrGen = PGOGenPath; unwrap(PMBR)->PGOInstrGen = PGOGenPath;
} } else if (PGOUsePath) {
if (PGOUsePath) { assert(!PGOSampleUsePath);
assert(!PGOGenPath);
unwrap(PMBR)->PGOInstrUse = PGOUsePath; unwrap(PMBR)->PGOInstrUse = PGOUsePath;
} else if (PGOSampleUsePath) {
unwrap(PMBR)->PGOSampleUse = PGOSampleUsePath;
} }
} }
@ -759,6 +762,7 @@ LLVMRustOptimizeWithNewPassManager(
LLVMRustSanitizerOptions *SanitizerOptions, LLVMRustSanitizerOptions *SanitizerOptions,
const char *PGOGenPath, const char *PGOUsePath, const char *PGOGenPath, const char *PGOUsePath,
bool InstrumentCoverage, bool InstrumentGCOV, bool InstrumentCoverage, bool InstrumentGCOV,
const char *PGOSampleUsePath, bool DebugInfoForProfiling,
void* LlvmSelfProfiler, void* LlvmSelfProfiler,
LLVMRustSelfProfileBeforePassCallback BeforePassCallback, LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
LLVMRustSelfProfileAfterPassCallback AfterPassCallback, LLVMRustSelfProfileAfterPassCallback AfterPassCallback,
@ -797,11 +801,19 @@ LLVMRustOptimizeWithNewPassManager(
Optional<PGOOptions> PGOOpt; Optional<PGOOptions> PGOOpt;
if (PGOGenPath) { if (PGOGenPath) {
assert(!PGOUsePath); assert(!PGOUsePath && !PGOSampleUsePath);
PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr); PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (PGOUsePath) { } else if (PGOUsePath) {
assert(!PGOGenPath); assert(!PGOSampleUsePath);
PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse); PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (PGOSampleUsePath) {
PGOOpt = PGOOptions(PGOSampleUsePath, "", "", PGOOptions::SampleUse,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} else if (DebugInfoForProfiling) {
PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
PGOOptions::NoCSAction, DebugInfoForProfiling);
} }
#if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0) #if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0)

View File

@ -2009,6 +2009,15 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options {
); );
} }
if debugging_opts.profile_sample_use.is_some()
&& (cg.profile_generate.enabled() || cg.profile_use.is_some())
{
early_error(
error_format,
"option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`",
);
}
if debugging_opts.instrument_coverage.is_some() if debugging_opts.instrument_coverage.is_some()
&& debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off) && debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off)
{ {

View File

@ -1040,6 +1040,8 @@ options! {
"combine CGUs into a single one"), "combine CGUs into a single one"),
crate_attr: Vec<String> = (Vec::new(), parse_string_push, [TRACKED], crate_attr: Vec<String> = (Vec::new(), parse_string_push, [TRACKED],
"inject the given attribute in the crate"), "inject the given attribute in the crate"),
debug_info_for_profiling: bool = (false, parse_bool, [TRACKED],
"emit discriminators and other data necessary for AutoFDO"),
debug_macros: bool = (false, parse_bool, [TRACKED], debug_macros: bool = (false, parse_bool, [TRACKED],
"emit line numbers debug info inside macros (default: no)"), "emit line numbers debug info inside macros (default: no)"),
deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED], deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED],
@ -1242,6 +1244,8 @@ options! {
(default based on relative source path)"), (default based on relative source path)"),
profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED], profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED],
"name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"), "name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"),
profile_sample_use: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"),
query_dep_graph: bool = (false, parse_bool, [UNTRACKED], query_dep_graph: bool = (false, parse_bool, [UNTRACKED],
"enable queries of the dependency graph for regression testing (default: no)"), "enable queries of the dependency graph for regression testing (default: no)"),
query_stats: bool = (false, parse_bool, [UNTRACKED], query_stats: bool = (false, parse_bool, [UNTRACKED],

View File

@ -1353,6 +1353,16 @@ fn validate_commandline_args_with_session_available(sess: &Session) {
} }
} }
// Do the same for sample profile data.
if let Some(ref path) = sess.opts.debugging_opts.profile_sample_use {
if !path.exists() {
sess.err(&format!(
"File `{}` passed to `-C profile-sample-use` does not exist.",
path.display()
));
}
}
// Unwind tables cannot be disabled if the target requires them. // Unwind tables cannot be disabled if the target requires them.
if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables { if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables {
if sess.target.requires_uwtable && !include_uwtables { if sess.target.requires_uwtable && !include_uwtables {

View File

@ -0,0 +1,35 @@
# `debug-info-for-profiling
---
## Introduction
Automatic Feedback Directed Optimization (AFDO) is a method for using sampling
based profiles to guide optimizations. This is contrasted with other methods of
FDO or profile-guided optimization (PGO) which use instrumented profiling.
Unlike PGO (controlled by the `rustc` flags `-Cprofile-generate` and
`-Cprofile-use`), a binary being profiled does not perform significantly worse,
and thus it's possible to profile binaries used in real workflows and not
necessary to construct artificial workflows.
## Use
In order to use AFDO, the target platform must be Linux running on an `x86_64`
architecture with the performance profiler `perf` available. In addition, the
external tool `create_llvm_prof` from [this repository] must be used.
Given a Rust file `main.rs`, we can produce an optimized binary as follows:
```shell
rustc -O -Zdebug-info-for-profiling main.rs -o main
perf record -b ./main
create_llvm_prof --binary=main --out=code.prof
rustc -O -Zprofile-sample-use=code.prof main.rs -o main2
```
The `perf` command produces a profile `perf.data`, which is then used by the
`create_llvm_prof` command to create `code.prof`. This final profile is then
used by `rustc` to guide optimizations in producing the binary `main2`.
[this repository]: https://github.com/google/autofdo

View File

@ -0,0 +1,10 @@
# `profile-sample-use
---
`-Zprofile-sample-use=code.prof` directs `rustc` to use the profile
`code.prof` as a source for Automatic Feedback Directed Optimization (AFDO).
See the documentation of [`-Zdebug-info-for-profiling`] for more information
on using AFDO.
[`-Zdebug-info-for-profiling`]: debug_info_for_profiling.html