Auto merge of #113040 - Kobzol:llvm-remark-streamer, r=tmiasko

Add `-Zremark-dir` unstable flag to write LLVM optimization remarks to YAML

This PR adds an option for `rustc` to emit LLVM optimization remarks to a set of YAML files, which can then be digested by existing tools, like https://github.com/OfekShilon/optview2. When `-Cremark-dir` is passed, and remarks are enabled (`-Cremark=all`), the remarks will be now written to the specified directory, **instead** of being printed to standard error output.  The files are named based on the CGU from which they are being generated.

Currently, the remarks are written using the LLVM streaming machinery, directly in the diagnostics handler. It seemed easier than going back to Rust and then form there back to C++ to use the streamer from the diagnostics handler. But there are many ways to implement this, of course, so I'm open to suggestions :)

I included some comments with questions into the code. Also, I'm not sure how to test this.

r? `@tmiasko`
This commit is contained in:
bors 2023-07-02 12:48:44 +00:00
commit 131a03664e
11 changed files with 180 additions and 13 deletions

View File

@ -1,4 +1,4 @@
use crate::back::write::{self, save_temp_bitcode, DiagnosticHandlers};
use crate::back::write::{self, save_temp_bitcode, CodegenDiagnosticsStage, DiagnosticHandlers};
use crate::errors::{
DynamicLinkingWithLTO, LlvmError, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib,
};
@ -302,7 +302,13 @@ fn fat_lto(
// The linking steps below may produce errors and diagnostics within LLVM
// which we'd like to handle and print, so set up our diagnostic handlers
// (which get unregistered when they go out of scope below).
let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
let _handler = DiagnosticHandlers::new(
cgcx,
diag_handler,
llcx,
&module,
CodegenDiagnosticsStage::LTO,
);
// For all other modules we codegened we'll need to link them into our own
// bitcode. All modules were codegened in their own LLVM context, however,

View File

@ -268,6 +268,16 @@ pub(crate) fn save_temp_bitcode(
}
}
/// In what context is a dignostic handler being attached to a codegen unit?
pub enum CodegenDiagnosticsStage {
/// Prelink optimization stage.
Opt,
/// LTO/ThinLTO postlink optimization stage.
LTO,
/// Code generation.
Codegen,
}
pub struct DiagnosticHandlers<'a> {
data: *mut (&'a CodegenContext<LlvmCodegenBackend>, &'a Handler),
llcx: &'a llvm::Context,
@ -279,6 +289,8 @@ impl<'a> DiagnosticHandlers<'a> {
cgcx: &'a CodegenContext<LlvmCodegenBackend>,
handler: &'a Handler,
llcx: &'a llvm::Context,
module: &ModuleCodegen<ModuleLlvm>,
stage: CodegenDiagnosticsStage,
) -> Self {
let remark_passes_all: bool;
let remark_passes: Vec<CString>;
@ -295,6 +307,20 @@ impl<'a> DiagnosticHandlers<'a> {
};
let remark_passes: Vec<*const c_char> =
remark_passes.iter().map(|name: &CString| name.as_ptr()).collect();
let remark_file = cgcx
.remark_dir
.as_ref()
// Use the .opt.yaml file suffix, which is supported by LLVM's opt-viewer.
.map(|dir| {
let stage_suffix = match stage {
CodegenDiagnosticsStage::Codegen => "codegen",
CodegenDiagnosticsStage::Opt => "opt",
CodegenDiagnosticsStage::LTO => "lto",
};
dir.join(format!("{}.{stage_suffix}.opt.yaml", module.name))
})
.and_then(|dir| dir.to_str().and_then(|p| CString::new(p).ok()));
let data = Box::into_raw(Box::new((cgcx, handler)));
unsafe {
let old_handler = llvm::LLVMRustContextGetDiagnosticHandler(llcx);
@ -305,6 +331,9 @@ impl<'a> DiagnosticHandlers<'a> {
remark_passes_all,
remark_passes.as_ptr(),
remark_passes.len(),
// The `as_ref()` is important here, otherwise the `CString` will be dropped
// too soon!
remark_file.as_ref().map(|dir| dir.as_ptr()).unwrap_or(std::ptr::null()),
);
DiagnosticHandlers { data, llcx, old_handler }
}
@ -523,7 +552,8 @@ pub(crate) unsafe fn optimize(
let llmod = module.module_llvm.llmod();
let llcx = &*module.module_llvm.llcx;
let _handlers = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
let _handlers =
DiagnosticHandlers::new(cgcx, diag_handler, llcx, module, CodegenDiagnosticsStage::Opt);
let module_name = module.name.clone();
let module_name = Some(&module_name[..]);
@ -582,7 +612,13 @@ pub(crate) unsafe fn codegen(
let tm = &*module.module_llvm.tm;
let module_name = module.name.clone();
let module_name = Some(&module_name[..]);
let handlers = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
let _handlers = DiagnosticHandlers::new(
cgcx,
diag_handler,
llcx,
&module,
CodegenDiagnosticsStage::Codegen,
);
if cgcx.msvc_imps_needed {
create_msvc_imps(cgcx, llcx, llmod);
@ -775,7 +811,6 @@ pub(crate) unsafe fn codegen(
}
record_llvm_cgu_instructions_stats(&cgcx.prof, llmod);
drop(handlers);
}
// `.dwo` files are only emitted if:

View File

@ -2513,6 +2513,7 @@ extern "C" {
remark_all_passes: bool,
remark_passes: *const *const c_char,
remark_passes_len: usize,
remark_file: *const c_char,
);
#[allow(improper_ctypes)]

View File

@ -21,6 +21,8 @@ codegen_ssa_create_temp_dir = couldn't create a temp dir: {$error}
codegen_ssa_erroneous_constant = erroneous constant encountered
codegen_ssa_error_creating_remark_dir = failed to create remark directory: {$error}
codegen_ssa_expected_used_symbol = expected `used`, `used(compiler)` or `used(linker)`
codegen_ssa_extern_funcs_not_found = some `extern` functions couldn't be found; some native libraries may need to be installed or have their path specified

View File

@ -35,6 +35,7 @@ use rustc_span::symbol::sym;
use rustc_span::{BytePos, FileName, InnerSpan, Pos, Span};
use rustc_target::spec::{MergeFunctions, SanitizerSet};
use crate::errors::ErrorCreatingRemarkDir;
use std::any::Any;
use std::borrow::Cow;
use std::fs;
@ -345,6 +346,9 @@ pub struct CodegenContext<B: WriteBackendMethods> {
pub diag_emitter: SharedEmitter,
/// LLVM optimizations for which we want to print remarks.
pub remark: Passes,
/// Directory into which should the LLVM optimization remarks be written.
/// If `None`, they will be written to stderr.
pub remark_dir: Option<PathBuf>,
/// Worker thread number
pub worker: usize,
/// The incremental compilation session directory, or None if we are not
@ -1062,6 +1066,17 @@ fn start_executing_work<B: ExtraBackendMethods>(
tcx.backend_optimization_level(())
};
let backend_features = tcx.global_backend_features(());
let remark_dir = if let Some(ref dir) = sess.opts.unstable_opts.remark_dir {
let result = fs::create_dir_all(dir).and_then(|_| dir.canonicalize());
match result {
Ok(dir) => Some(dir),
Err(error) => sess.emit_fatal(ErrorCreatingRemarkDir { error }),
}
} else {
None
};
let cgcx = CodegenContext::<B> {
crate_types: sess.crate_types().to_vec(),
each_linked_rlib_for_lto,
@ -1073,6 +1088,7 @@ fn start_executing_work<B: ExtraBackendMethods>(
prof: sess.prof.clone(),
exported_symbols,
remark: sess.opts.cg.remark.clone(),
remark_dir,
worker: 0,
incr_comp_session_dir: sess.incr_comp_session_dir_opt().map(|r| r.clone()),
cgu_reuse_tracker: sess.cgu_reuse_tracker.clone(),

View File

@ -1023,3 +1023,9 @@ pub struct TargetFeatureSafeTrait {
#[label(codegen_ssa_label_def)]
pub def: Span,
}
#[derive(Diagnostic)]
#[diag(codegen_ssa_error_creating_remark_dir)]
pub struct ErrorCreatingRemarkDir {
pub error: std::io::Error,
}

View File

@ -7,7 +7,12 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsARM.h"
#include "llvm/IR/LLVMRemarkStreamer.h"
#include "llvm/IR/Mangler.h"
#include "llvm/Remarks/RemarkStreamer.h"
#include "llvm/Remarks/RemarkSerializer.h"
#include "llvm/Remarks/RemarkFormat.h"
#include "llvm/Support/ToolOutputFile.h"
#if LLVM_VERSION_GE(16, 0)
#include "llvm/Support/ModRef.h"
#endif
@ -1855,23 +1860,44 @@ using LLVMDiagnosticHandlerTy = DiagnosticHandler::DiagnosticHandlerTy;
// When RemarkAllPasses is true, remarks are enabled for all passes. Otherwise
// the RemarkPasses array specifies individual passes for which remarks will be
// enabled.
//
// If RemarkFilePath is not NULL, optimization remarks will be streamed directly into this file,
// bypassing the diagnostics handler.
extern "C" void LLVMRustContextConfigureDiagnosticHandler(
LLVMContextRef C, LLVMDiagnosticHandlerTy DiagnosticHandlerCallback,
void *DiagnosticHandlerContext, bool RemarkAllPasses,
const char * const * RemarkPasses, size_t RemarkPassesLen) {
const char * const * RemarkPasses, size_t RemarkPassesLen,
const char * RemarkFilePath
) {
class RustDiagnosticHandler final : public DiagnosticHandler {
public:
RustDiagnosticHandler(LLVMDiagnosticHandlerTy DiagnosticHandlerCallback,
void *DiagnosticHandlerContext,
bool RemarkAllPasses,
std::vector<std::string> RemarkPasses)
RustDiagnosticHandler(
LLVMDiagnosticHandlerTy DiagnosticHandlerCallback,
void *DiagnosticHandlerContext,
bool RemarkAllPasses,
std::vector<std::string> RemarkPasses,
std::unique_ptr<ToolOutputFile> RemarksFile,
std::unique_ptr<llvm::remarks::RemarkStreamer> RemarkStreamer,
std::unique_ptr<LLVMRemarkStreamer> LlvmRemarkStreamer
)
: DiagnosticHandlerCallback(DiagnosticHandlerCallback),
DiagnosticHandlerContext(DiagnosticHandlerContext),
RemarkAllPasses(RemarkAllPasses),
RemarkPasses(RemarkPasses) {}
RemarkPasses(std::move(RemarkPasses)),
RemarksFile(std::move(RemarksFile)),
RemarkStreamer(std::move(RemarkStreamer)),
LlvmRemarkStreamer(std::move(LlvmRemarkStreamer)) {}
virtual bool handleDiagnostics(const DiagnosticInfo &DI) override {
if (this->LlvmRemarkStreamer) {
if (auto *OptDiagBase = dyn_cast<DiagnosticInfoOptimizationBase>(&DI)) {
if (OptDiagBase->isEnabled()) {
this->LlvmRemarkStreamer->emit(*OptDiagBase);
return true;
}
}
}
if (DiagnosticHandlerCallback) {
DiagnosticHandlerCallback(DI, DiagnosticHandlerContext);
return true;
@ -1912,14 +1938,64 @@ extern "C" void LLVMRustContextConfigureDiagnosticHandler(
bool RemarkAllPasses = false;
std::vector<std::string> RemarkPasses;
// Since LlvmRemarkStreamer contains a pointer to RemarkStreamer, the ordering of the three
// members below is important.
std::unique_ptr<ToolOutputFile> RemarksFile;
std::unique_ptr<llvm::remarks::RemarkStreamer> RemarkStreamer;
std::unique_ptr<LLVMRemarkStreamer> LlvmRemarkStreamer;
};
std::vector<std::string> Passes;
for (size_t I = 0; I != RemarkPassesLen; ++I)
{
Passes.push_back(RemarkPasses[I]);
}
// We need to hold onto both the streamers and the opened file
std::unique_ptr<ToolOutputFile> RemarkFile;
std::unique_ptr<llvm::remarks::RemarkStreamer> RemarkStreamer;
std::unique_ptr<LLVMRemarkStreamer> LlvmRemarkStreamer;
if (RemarkFilePath != nullptr) {
std::error_code EC;
RemarkFile = std::make_unique<ToolOutputFile>(
RemarkFilePath,
EC,
llvm::sys::fs::OF_TextWithCRLF
);
if (EC) {
std::string Error = std::string("Cannot create remark file: ") +
toString(errorCodeToError(EC));
report_fatal_error(Twine(Error));
}
// Do not delete the file after we gather remarks
RemarkFile->keep();
auto RemarkSerializer = remarks::createRemarkSerializer(
llvm::remarks::Format::YAML,
remarks::SerializerMode::Separate,
RemarkFile->os()
);
if (Error E = RemarkSerializer.takeError())
{
std::string Error = std::string("Cannot create remark serializer: ") + toString(std::move(E));
report_fatal_error(Twine(Error));
}
RemarkStreamer = std::make_unique<llvm::remarks::RemarkStreamer>(std::move(*RemarkSerializer));
LlvmRemarkStreamer = std::make_unique<LLVMRemarkStreamer>(*RemarkStreamer);
}
unwrap(C)->setDiagnosticHandler(std::make_unique<RustDiagnosticHandler>(
DiagnosticHandlerCallback, DiagnosticHandlerContext, RemarkAllPasses, Passes));
DiagnosticHandlerCallback,
DiagnosticHandlerContext,
RemarkAllPasses,
Passes,
std::move(RemarkFile),
std::move(RemarkStreamer),
std::move(LlvmRemarkStreamer)
));
}
extern "C" void LLVMRustGetMangledName(LLVMValueRef V, RustStringRef Str) {

View File

@ -2707,6 +2707,10 @@ pub fn build_session_options(
handler.early_warn("-C remark requires \"-C debuginfo=n\" to show source locations");
}
if cg.remark.is_empty() && unstable_opts.remark_dir.is_some() {
handler.early_warn("using -Z remark-dir without enabling remarks using e.g. -C remark=all");
}
let externs = parse_externs(handler, matches, &unstable_opts);
let crate_name = matches.opt_str("crate-name");

View File

@ -1344,7 +1344,7 @@ options! {
"control generation of position-independent code (PIC) \
(`rustc --print relocation-models` for details)"),
remark: Passes = (Passes::Some(Vec::new()), parse_passes, [UNTRACKED],
"print remarks for these optimization passes (space separated, or \"all\")"),
"output remarks for these optimization passes (space separated, or \"all\")"),
rpath: bool = (false, parse_bool, [UNTRACKED],
"set rpath values in libs/exes (default: no)"),
save_temps: bool = (false, parse_bool, [UNTRACKED],
@ -1689,6 +1689,9 @@ options! {
"choose which RELRO level to use"),
remap_cwd_prefix: Option<PathBuf> = (None, parse_opt_pathbuf, [TRACKED],
"remap paths under the current working directory to this path prefix"),
remark_dir: Option<PathBuf> = (None, parse_opt_pathbuf, [UNTRACKED],
"directory into which to write optimization remarks (if not specified, they will be \
written to standard error output)"),
report_delayed_bugs: bool = (false, parse_bool, [TRACKED],
"immediately print bugs registered with `delay_span_bug` (default: no)"),
sanitizer: SanitizerSet = (SanitizerSet::empty(), parse_sanitizers, [TRACKED],

View File

@ -0,0 +1,12 @@
include ../tools.mk
PROFILE_DIR=$(TMPDIR)/profiles
all: check_inline check_filter
check_inline:
$(RUSTC) -O foo.rs --crate-type=lib -Cremark=all -Zremark-dir=$(PROFILE_DIR)
cat $(PROFILE_DIR)/*.opt.yaml | $(CGREP) -e "inline"
check_filter:
$(RUSTC) -O foo.rs --crate-type=lib -Cremark=foo -Zremark-dir=$(PROFILE_DIR)
cat $(PROFILE_DIR)/*.opt.yaml | $(CGREP) -e -v "inline"

View File

@ -0,0 +1,6 @@
#[inline(never)]
pub fn bar() {}
pub fn foo() {
bar();
}