From a17193dbb931ea0c8b66d82f640385bce8b4929a Mon Sep 17 00:00:00 2001 From: Michael Benfield Date: Fri, 7 May 2021 07:41:37 +0000 Subject: [PATCH] Enable AutoFDO. This largely involves implementing the options debug-info-for-profiling and profile-sample-use and forwarding them on to LLVM. AutoFDO can be used on x86-64 Linux like this: rustc -O -Cdebug-info-for-profiling main.rs -o main perf record -b ./main create_llvm_prof --binary=main --out=code.prof rustc -O -Cprofile-sample-use=code.prof main.rs -o main2 Now `main2` will have feedback directed optimization applied to it. The create_llvm_prof tool can be obtained from this github repository: https://github.com/google/autofdo Fixes #64892. --- compiler/rustc_codegen_llvm/src/attributes.rs | 4 +++ compiler/rustc_codegen_llvm/src/back/write.rs | 15 ++++++++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 3 ++ compiler/rustc_codegen_ssa/src/back/linker.rs | 3 ++ compiler/rustc_codegen_ssa/src/back/write.rs | 4 +++ compiler/rustc_interface/src/tests.rs | 2 ++ .../rustc_llvm/llvm-wrapper/PassWrapper.cpp | 30 +++++++++++----- compiler/rustc_session/src/config.rs | 9 +++++ compiler/rustc_session/src/options.rs | 4 +++ compiler/rustc_session/src/session.rs | 10 ++++++ .../debug_info_for_profiling.md | 35 +++++++++++++++++++ .../src/compiler-flags/profile_sample_use.md | 10 ++++++ 12 files changed, 120 insertions(+), 9 deletions(-) create mode 100644 src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md create mode 100644 src/doc/unstable-book/src/compiler-flags/profile_sample_use.md diff --git a/compiler/rustc_codegen_llvm/src/attributes.rs b/compiler/rustc_codegen_llvm/src/attributes.rs index 51c70f0868f..659cf9ea070 100644 --- a/compiler/rustc_codegen_llvm/src/attributes.rs +++ b/compiler/rustc_codegen_llvm/src/attributes.rs @@ -263,6 +263,10 @@ pub fn from_fn_attrs(cx: &CodegenCx<'ll, 'tcx>, llfn: &'ll Value, instance: ty:: attributes::emit_uwtable(llfn, true); } + if cx.sess().opts.debugging_opts.profile_sample_use.is_some() { + llvm::AddFunctionAttrString(llfn, Function, cstr!("use-sample-profile")); + } + // FIXME: none of these three functions interact with source level attributes. set_frame_pointer_type(cx, llfn); set_instrument_function(cx, llfn); diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index ab48c56a626..ca78254f0c8 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -370,6 +370,13 @@ fn get_pgo_use_path(config: &ModuleConfig) -> Option { .map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap()) } +fn get_pgo_sample_use_path(config: &ModuleConfig) -> Option { + config + .pgo_sample_use + .as_ref() + .map(|path_buf| CString::new(path_buf.to_string_lossy().as_bytes()).unwrap()) +} + pub(crate) fn should_use_new_llvm_pass_manager(config: &ModuleConfig) -> bool { // The new pass manager is enabled by default for LLVM >= 13. // This matches Clang, which also enables it since Clang 13. @@ -389,6 +396,7 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager( let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); let pgo_gen_path = get_pgo_gen_path(config); let pgo_use_path = get_pgo_use_path(config); + let pgo_sample_use_path = get_pgo_sample_use_path(config); let is_lto = opt_stage == llvm::OptStage::ThinLTO || opt_stage == llvm::OptStage::FatLTO; // Sanitizer instrumentation is only inserted during the pre-link optimization stage. let sanitizer_options = if !is_lto { @@ -439,6 +447,8 @@ pub(crate) unsafe fn optimize_with_new_llvm_pass_manager( pgo_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()), config.instrument_coverage, config.instrument_gcov, + pgo_sample_use_path.as_ref().map_or(std::ptr::null(), |s| s.as_ptr()), + config.debug_info_for_profiling, llvm_selfprofiler, selfprofile_before_pass_callback, selfprofile_after_pass_callback, @@ -544,6 +554,9 @@ pub(crate) unsafe fn optimize( if config.instrument_coverage { llvm::LLVMRustAddPass(mpm, find_pass("instrprof").unwrap()); } + if config.debug_info_for_profiling { + llvm::LLVMRustAddPass(mpm, find_pass("add-discriminators").unwrap()); + } add_sanitizer_passes(config, &mut extra_passes); @@ -1001,6 +1014,7 @@ pub unsafe fn with_llvm_pmb( let inline_threshold = config.inline_threshold; let pgo_gen_path = get_pgo_gen_path(config); let pgo_use_path = get_pgo_use_path(config); + let pgo_sample_use_path = get_pgo_sample_use_path(config); llvm::LLVMRustConfigurePassManagerBuilder( builder, @@ -1011,6 +1025,7 @@ pub unsafe fn with_llvm_pmb( prepare_for_thin_lto, pgo_gen_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()), pgo_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()), + pgo_sample_use_path.as_ref().map_or(ptr::null(), |s| s.as_ptr()), ); llvm::LLVMPassManagerBuilderSetSizeLevel(builder, opt_size as u32); diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 436d906827b..5d437b2a068 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2176,6 +2176,7 @@ extern "C" { PrepareForThinLTO: bool, PGOGenPath: *const c_char, PGOUsePath: *const c_char, + PGOSampleUsePath: *const c_char, ); pub fn LLVMRustAddLibraryInfo( PM: &PassManager<'a>, @@ -2210,6 +2211,8 @@ extern "C" { PGOUsePath: *const c_char, InstrumentCoverage: bool, InstrumentGCOV: bool, + PGOSampleUsePath: *const c_char, + DebugInfoForProfiling: bool, llvm_selfprofiler: *mut c_void, begin_callback: SelfProfileBeforePassCallback, end_callback: SelfProfileAfterPassCallback, diff --git a/compiler/rustc_codegen_ssa/src/back/linker.rs b/compiler/rustc_codegen_ssa/src/back/linker.rs index e3b0eea0d89..429dc45d6a4 100644 --- a/compiler/rustc_codegen_ssa/src/back/linker.rs +++ b/compiler/rustc_codegen_ssa/src/back/linker.rs @@ -286,6 +286,9 @@ impl<'a> GccLinker<'a> { config::OptLevel::Aggressive => "O3", }; + if let Some(path) = &self.sess.opts.debugging_opts.profile_sample_use { + self.linker_arg(&format!("-plugin-opt=sample-profile={}", path.display())); + }; self.linker_arg(&format!("-plugin-opt={}", opt_level)); self.linker_arg(&format!("-plugin-opt=mcpu={}", self.target_cpu)); } diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 31722d07414..da34612ce76 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -83,6 +83,8 @@ pub struct ModuleConfig { pub pgo_gen: SwitchWithOptPath, pub pgo_use: Option, + pub pgo_sample_use: Option, + pub debug_info_for_profiling: bool, pub instrument_coverage: bool, pub instrument_gcov: bool, @@ -176,6 +178,8 @@ impl ModuleConfig { SwitchWithOptPath::Disabled ), pgo_use: if_regular!(sess.opts.cg.profile_use.clone(), None), + pgo_sample_use: if_regular!(sess.opts.debugging_opts.profile_sample_use.clone(), None), + debug_info_for_profiling: sess.opts.debugging_opts.debug_info_for_profiling, instrument_coverage: if_regular!(sess.instrument_coverage(), false), instrument_gcov: if_regular!( // compiler_builtins overrides the codegen-units settings, diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index cfe13b1fd4e..844e5ab56a4 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -715,6 +715,7 @@ fn test_debugging_options_tracking_hash() { tracked!(chalk, true); tracked!(codegen_backend, Some("abc".to_string())); tracked!(crate_attr, vec!["abc".to_string()]); + tracked!(debug_info_for_profiling, true); tracked!(debug_macros, true); tracked!(dep_info_omit_d_target, true); tracked!(dual_proc_macros, true); @@ -752,6 +753,7 @@ fn test_debugging_options_tracking_hash() { tracked!(profile, true); tracked!(profile_emit, Some(PathBuf::from("abc"))); tracked!(profiler_runtime, "abc".to_string()); + tracked!(profile_sample_use, Some(PathBuf::from("abc"))); tracked!(relax_elf_relocations, Some(true)); tracked!(relro_level, Some(RelroLevel::Full)); tracked!(remap_cwd_prefix, Some(PathBuf::from("abc"))); diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index ddb5f7dcebf..87f423fb2d5 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -25,6 +25,7 @@ #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/IPO/AlwaysInliner.h" #include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/Utils/AddDiscriminators.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/LTO/LTO.h" #include "llvm-c/Transforms/PassManagerBuilder.h" @@ -39,6 +40,7 @@ #include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils.h" using namespace llvm; @@ -523,7 +525,7 @@ extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) { extern "C" void LLVMRustConfigurePassManagerBuilder( LLVMPassManagerBuilderRef PMBR, LLVMRustCodeGenOptLevel OptLevel, bool MergeFunctions, bool SLPVectorize, bool LoopVectorize, bool PrepareForThinLTO, - const char* PGOGenPath, const char* PGOUsePath) { + const char* PGOGenPath, const char* PGOUsePath, const char* PGOSampleUsePath) { unwrap(PMBR)->MergeFunctions = MergeFunctions; unwrap(PMBR)->SLPVectorize = SLPVectorize; unwrap(PMBR)->OptLevel = fromRust(OptLevel); @@ -531,13 +533,14 @@ extern "C" void LLVMRustConfigurePassManagerBuilder( unwrap(PMBR)->PrepareForThinLTO = PrepareForThinLTO; if (PGOGenPath) { - assert(!PGOUsePath); + assert(!PGOUsePath && !PGOSampleUsePath); unwrap(PMBR)->EnablePGOInstrGen = true; unwrap(PMBR)->PGOInstrGen = PGOGenPath; - } - if (PGOUsePath) { - assert(!PGOGenPath); + } else if (PGOUsePath) { + assert(!PGOSampleUsePath); unwrap(PMBR)->PGOInstrUse = PGOUsePath; + } else if (PGOSampleUsePath) { + unwrap(PMBR)->PGOSampleUse = PGOSampleUsePath; } } @@ -759,6 +762,7 @@ LLVMRustOptimizeWithNewPassManager( LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath, const char *PGOUsePath, bool InstrumentCoverage, bool InstrumentGCOV, + const char *PGOSampleUsePath, bool DebugInfoForProfiling, void* LlvmSelfProfiler, LLVMRustSelfProfileBeforePassCallback BeforePassCallback, LLVMRustSelfProfileAfterPassCallback AfterPassCallback, @@ -797,11 +801,19 @@ LLVMRustOptimizeWithNewPassManager( Optional PGOOpt; if (PGOGenPath) { - assert(!PGOUsePath); - PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr); + assert(!PGOUsePath && !PGOSampleUsePath); + PGOOpt = PGOOptions(PGOGenPath, "", "", PGOOptions::IRInstr, + PGOOptions::NoCSAction, DebugInfoForProfiling); } else if (PGOUsePath) { - assert(!PGOGenPath); - PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse); + assert(!PGOSampleUsePath); + PGOOpt = PGOOptions(PGOUsePath, "", "", PGOOptions::IRUse, + PGOOptions::NoCSAction, DebugInfoForProfiling); + } else if (PGOSampleUsePath) { + PGOOpt = PGOOptions(PGOSampleUsePath, "", "", PGOOptions::SampleUse, + PGOOptions::NoCSAction, DebugInfoForProfiling); + } else if (DebugInfoForProfiling) { + PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction, + PGOOptions::NoCSAction, DebugInfoForProfiling); } #if LLVM_VERSION_GE(12, 0) && !LLVM_VERSION_GE(13,0) diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 32aa035e1cd..ac4bce7350b 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -2009,6 +2009,15 @@ pub fn build_session_options(matches: &getopts::Matches) -> Options { ); } + if debugging_opts.profile_sample_use.is_some() + && (cg.profile_generate.enabled() || cg.profile_use.is_some()) + { + early_error( + error_format, + "option `-Z profile-sample-use` cannot be used with `-C profile-generate` or `-C profile-use`", + ); + } + if debugging_opts.instrument_coverage.is_some() && debugging_opts.instrument_coverage != Some(InstrumentCoverage::Off) { diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 8ecb7a031ad..b3d36b396c5 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -1040,6 +1040,8 @@ options! { "combine CGUs into a single one"), crate_attr: Vec = (Vec::new(), parse_string_push, [TRACKED], "inject the given attribute in the crate"), + debug_info_for_profiling: bool = (false, parse_bool, [TRACKED], + "emit discriminators and other data necessary for AutoFDO"), debug_macros: bool = (false, parse_bool, [TRACKED], "emit line numbers debug info inside macros (default: no)"), deduplicate_diagnostics: bool = (true, parse_bool, [UNTRACKED], @@ -1242,6 +1244,8 @@ options! { (default based on relative source path)"), profiler_runtime: String = (String::from("profiler_builtins"), parse_string, [TRACKED], "name of the profiler runtime crate to automatically inject (default: `profiler_builtins`)"), + profile_sample_use: Option = (None, parse_opt_pathbuf, [TRACKED], + "use the given `.prof` file for sampled profile-guided optimization (also known as AutoFDO)"), query_dep_graph: bool = (false, parse_bool, [UNTRACKED], "enable queries of the dependency graph for regression testing (default: no)"), query_stats: bool = (false, parse_bool, [UNTRACKED], diff --git a/compiler/rustc_session/src/session.rs b/compiler/rustc_session/src/session.rs index bf04154a3da..b6ba6cc1dd6 100644 --- a/compiler/rustc_session/src/session.rs +++ b/compiler/rustc_session/src/session.rs @@ -1353,6 +1353,16 @@ fn validate_commandline_args_with_session_available(sess: &Session) { } } + // Do the same for sample profile data. + if let Some(ref path) = sess.opts.debugging_opts.profile_sample_use { + if !path.exists() { + sess.err(&format!( + "File `{}` passed to `-C profile-sample-use` does not exist.", + path.display() + )); + } + } + // Unwind tables cannot be disabled if the target requires them. if let Some(include_uwtables) = sess.opts.cg.force_unwind_tables { if sess.target.requires_uwtable && !include_uwtables { diff --git a/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md b/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md new file mode 100644 index 00000000000..44bd3baeeed --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/debug_info_for_profiling.md @@ -0,0 +1,35 @@ +# `debug-info-for-profiling + +--- + +## Introduction + +Automatic Feedback Directed Optimization (AFDO) is a method for using sampling +based profiles to guide optimizations. This is contrasted with other methods of +FDO or profile-guided optimization (PGO) which use instrumented profiling. + +Unlike PGO (controlled by the `rustc` flags `-Cprofile-generate` and +`-Cprofile-use`), a binary being profiled does not perform significantly worse, +and thus it's possible to profile binaries used in real workflows and not +necessary to construct artificial workflows. + +## Use + +In order to use AFDO, the target platform must be Linux running on an `x86_64` +architecture with the performance profiler `perf` available. In addition, the +external tool `create_llvm_prof` from [this repository] must be used. + +Given a Rust file `main.rs`, we can produce an optimized binary as follows: + +```shell +rustc -O -Zdebug-info-for-profiling main.rs -o main +perf record -b ./main +create_llvm_prof --binary=main --out=code.prof +rustc -O -Zprofile-sample-use=code.prof main.rs -o main2 +``` + +The `perf` command produces a profile `perf.data`, which is then used by the +`create_llvm_prof` command to create `code.prof`. This final profile is then +used by `rustc` to guide optimizations in producing the binary `main2`. + +[this repository]: https://github.com/google/autofdo diff --git a/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md b/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md new file mode 100644 index 00000000000..ce894ce6ac7 --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/profile_sample_use.md @@ -0,0 +1,10 @@ +# `profile-sample-use + +--- + +`-Zprofile-sample-use=code.prof` directs `rustc` to use the profile +`code.prof` as a source for Automatic Feedback Directed Optimization (AFDO). +See the documentation of [`-Zdebug-info-for-profiling`] for more information +on using AFDO. + +[`-Zdebug-info-for-profiling`]: debug_info_for_profiling.html