From 1a99ca8da98061a52b9b429d1a859b1e1475cae9 Mon Sep 17 00:00:00 2001 From: DianQK Date: Sun, 17 Nov 2024 14:21:23 +0800 Subject: [PATCH] The embedded bitcode should always be prepared for LTO/ThinLTO --- .../rustc_codegen_cranelift/src/driver/aot.rs | 2 +- compiler/rustc_codegen_llvm/src/back/lto.rs | 10 +- compiler/rustc_codegen_llvm/src/back/write.rs | 133 ++++++++++++------ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 4 +- compiler/rustc_codegen_ssa/src/back/write.rs | 3 + .../rustc_llvm/llvm-wrapper/PassWrapper.cpp | 66 ++++++--- compiler/rustc_session/src/config.rs | 11 +- tests/codegen/overaligned-constant.rs | 4 +- tests/codegen/uninit-consts.rs | 8 +- .../run-make/pgo-embed-bc-lto/interesting.rs | 16 +++ tests/run-make/pgo-embed-bc-lto/main.rs | 5 + tests/run-make/pgo-embed-bc-lto/opaque.rs | 5 + tests/run-make/pgo-embed-bc-lto/rmake.rs | 67 +++++++++ 13 files changed, 265 insertions(+), 69 deletions(-) create mode 100644 tests/run-make/pgo-embed-bc-lto/interesting.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/main.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/opaque.rs create mode 100644 tests/run-make/pgo-embed-bc-lto/rmake.rs diff --git a/compiler/rustc_codegen_cranelift/src/driver/aot.rs b/compiler/rustc_codegen_cranelift/src/driver/aot.rs index a52b18573b1..366e83853e5 100644 --- a/compiler/rustc_codegen_cranelift/src/driver/aot.rs +++ b/compiler/rustc_codegen_cranelift/src/driver/aot.rs @@ -210,7 +210,7 @@ fn produce_final_output_artifacts( // to get rid of it. for output_type in crate_output.outputs.keys() { match *output_type { - OutputType::Bitcode | OutputType::ThinLinkBitcode => { + OutputType::Bitcode | OutputType::ThinLinkBitcode | OutputType::ThinBitcode => { // Cranelift doesn't have bitcode // user_wants_bitcode = true; // // Copy to .bc, but always keep the .0.bc. There is a later diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 99906ea7bce..6676afe489f 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -2,6 +2,7 @@ use std::collections::BTreeMap; use std::ffi::{CStr, CString}; use std::fs::File; use std::path::Path; +use std::ptr::NonNull; use std::sync::Arc; use std::{io, iter, slice}; @@ -655,14 +656,14 @@ pub(crate) fn run_pass_manager( } unsafe { - write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?; + write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } if cfg!(llvm_enzyme) && enable_ad { let opt_stage = llvm::OptStage::FatLTO; let stage = write::AutodiffStage::PostAD; unsafe { - write::llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, stage)?; + write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } // This is the final IR, so people should be able to inspect the optimized autodiff output. @@ -729,6 +730,11 @@ impl ThinBuffer { ThinBuffer(buffer) } } + + pub unsafe fn from_raw_ptr(ptr: *mut llvm::ThinLTOBuffer) -> ThinBuffer { + let mut ptr = NonNull::new(ptr).unwrap(); + ThinBuffer(unsafe { ptr.as_mut() }) + } } impl ThinBufferMethods for ThinBuffer { diff --git a/compiler/rustc_codegen_llvm/src/back/write.rs b/compiler/rustc_codegen_llvm/src/back/write.rs index b67890c0465..1ad256d3b75 100644 --- a/compiler/rustc_codegen_llvm/src/back/write.rs +++ b/compiler/rustc_codegen_llvm/src/back/write.rs @@ -1,6 +1,7 @@ use std::ffi::{CStr, CString}; use std::io::{self, Write}; use std::path::{Path, PathBuf}; +use std::ptr::null_mut; use std::sync::Arc; use std::{fs, slice, str}; @@ -15,7 +16,7 @@ use rustc_codegen_ssa::back::write::{ TargetMachineFactoryFn, }; use rustc_codegen_ssa::traits::*; -use rustc_codegen_ssa::{CompiledModule, ModuleCodegen}; +use rustc_codegen_ssa::{CompiledModule, ModuleCodegen, ModuleKind}; use rustc_data_structures::profiling::SelfProfilerRef; use rustc_data_structures::small_c_str::SmallCStr; use rustc_errors::{DiagCtxtHandle, FatalError, Level}; @@ -551,6 +552,7 @@ pub(crate) unsafe fn llvm_optimize( cgcx: &CodegenContext, dcx: DiagCtxtHandle<'_>, module: &ModuleCodegen, + thin_lto_buffer: Option<&mut *mut llvm::ThinLTOBuffer>, config: &ModuleConfig, opt_level: config::OptLevel, opt_stage: llvm::OptStage, @@ -584,7 +586,17 @@ pub(crate) unsafe fn llvm_optimize( vectorize_loop = config.vectorize_loop; } trace!(?unroll_loops, ?vectorize_slp, ?vectorize_loop, ?run_enzyme); - let using_thin_buffers = opt_stage == llvm::OptStage::PreLinkThinLTO || config.bitcode_needed(); + if thin_lto_buffer.is_some() { + assert!( + matches!( + opt_stage, + llvm::OptStage::PreLinkNoLTO + | llvm::OptStage::PreLinkFatLTO + | llvm::OptStage::PreLinkThinLTO + ), + "the bitcode for LTO can only be obtained at the pre-link stage" + ); + } let pgo_gen_path = get_pgo_gen_path(config); let pgo_use_path = get_pgo_use_path(config); let pgo_sample_use_path = get_pgo_sample_use_path(config); @@ -644,7 +656,9 @@ pub(crate) unsafe fn llvm_optimize( config.no_prepopulate_passes, config.verify_llvm_ir, config.lint_llvm_ir, - using_thin_buffers, + thin_lto_buffer, + config.emit_thin_lto, + config.emit_thin_lto_summary, config.merge_functions, unroll_loops, vectorize_slp, @@ -705,9 +719,56 @@ pub(crate) unsafe fn optimize( // Otherwise we pretend AD is already done and run the normal opt pipeline (=PostAD). let consider_ad = cfg!(llvm_enzyme) && config.autodiff.contains(&config::AutoDiff::Enable); let autodiff_stage = if consider_ad { AutodiffStage::PreAD } else { AutodiffStage::PostAD }; - return unsafe { - llvm_optimize(cgcx, dcx, module, config, opt_level, opt_stage, autodiff_stage) + // The embedded bitcode is used to run LTO/ThinLTO. + // The bitcode obtained during the `codegen` phase is no longer suitable for performing LTO. + // It may have undergone LTO due to ThinLocal, so we need to obtain the embedded bitcode at + // this point. + let mut thin_lto_buffer = if (module.kind == ModuleKind::Regular + && config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full)) + || config.emit_thin_lto_summary + { + Some(null_mut()) + } else { + None }; + unsafe { + llvm_optimize( + cgcx, + dcx, + module, + thin_lto_buffer.as_mut(), + config, + opt_level, + opt_stage, + autodiff_stage, + ) + }?; + if let Some(thin_lto_buffer) = thin_lto_buffer { + let thin_lto_buffer = unsafe { ThinBuffer::from_raw_ptr(thin_lto_buffer) }; + let thin_bc_out = cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name); + if let Err(err) = fs::write(&thin_bc_out, thin_lto_buffer.data()) { + dcx.emit_err(WriteBytecode { path: &thin_bc_out, err }); + } + let bc_summary_out = + cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name); + if config.emit_thin_lto_summary + && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name() + { + let summary_data = thin_lto_buffer.thin_link_data(); + cgcx.prof.artifact_size( + "llvm_bitcode_summary", + thin_link_bitcode_filename.to_string_lossy(), + summary_data.len() as u64, + ); + let _timer = cgcx.prof.generic_activity_with_arg( + "LLVM_module_codegen_emit_bitcode_summary", + &*module.name, + ); + if let Err(err) = fs::write(&bc_summary_out, summary_data) { + dcx.emit_err(WriteBytecode { path: &bc_summary_out, err }); + } + } + } } Ok(()) } @@ -760,59 +821,47 @@ pub(crate) unsafe fn codegen( // otherwise requested. let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name); - let bc_summary_out = - cgcx.output_filenames.temp_path(OutputType::ThinLinkBitcode, module_name); let obj_out = cgcx.output_filenames.temp_path(OutputType::Object, module_name); if config.bitcode_needed() { - let _timer = cgcx - .prof - .generic_activity_with_arg("LLVM_module_codegen_make_bitcode", &*module.name); - let thin = ThinBuffer::new(llmod, config.emit_thin_lto, config.emit_thin_lto_summary); - let data = thin.data(); - - if let Some(bitcode_filename) = bc_out.file_name() { - cgcx.prof.artifact_size( - "llvm_bitcode", - bitcode_filename.to_string_lossy(), - data.len() as u64, - ); - } - - if config.emit_thin_lto_summary - && let Some(thin_link_bitcode_filename) = bc_summary_out.file_name() - { - let summary_data = thin.thin_link_data(); - cgcx.prof.artifact_size( - "llvm_bitcode_summary", - thin_link_bitcode_filename.to_string_lossy(), - summary_data.len() as u64, - ); - - let _timer = cgcx.prof.generic_activity_with_arg( - "LLVM_module_codegen_emit_bitcode_summary", - &*module.name, - ); - if let Err(err) = fs::write(&bc_summary_out, summary_data) { - dcx.emit_err(WriteBytecode { path: &bc_summary_out, err }); - } - } - if config.emit_bc || config.emit_obj == EmitObj::Bitcode { + let thin = { + let _timer = cgcx.prof.generic_activity_with_arg( + "LLVM_module_codegen_make_bitcode", + &*module.name, + ); + ThinBuffer::new(llmod, config.emit_thin_lto, false) + }; + let data = thin.data(); let _timer = cgcx .prof .generic_activity_with_arg("LLVM_module_codegen_emit_bitcode", &*module.name); + if let Some(bitcode_filename) = bc_out.file_name() { + cgcx.prof.artifact_size( + "llvm_bitcode", + bitcode_filename.to_string_lossy(), + data.len() as u64, + ); + } if let Err(err) = fs::write(&bc_out, data) { dcx.emit_err(WriteBytecode { path: &bc_out, err }); } } - if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) { + if config.emit_obj == EmitObj::ObjectCode(BitcodeSection::Full) + && module.kind == ModuleKind::Regular + { let _timer = cgcx .prof .generic_activity_with_arg("LLVM_module_codegen_embed_bitcode", &*module.name); + let thin_bc_out = + cgcx.output_filenames.temp_path(OutputType::ThinBitcode, module_name); + assert!(thin_bc_out.exists(), "cannot find {:?} as embedded bitcode", thin_bc_out); + let data = fs::read(&thin_bc_out).unwrap(); + debug!("removing embed bitcode file {:?}", thin_bc_out); + ensure_removed(dcx, &thin_bc_out); unsafe { - embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, data); + embed_bitcode(cgcx, llcx, llmod, &config.bc_cmdline, &data); } } } diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 3b0187b9d37..ac91a416edc 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -2421,7 +2421,9 @@ unsafe extern "C" { NoPrepopulatePasses: bool, VerifyIR: bool, LintIR: bool, - UseThinLTOBuffers: bool, + ThinLTOBuffer: Option<&mut *mut ThinLTOBuffer>, + EmitThinLTO: bool, + EmitThinLTOSummary: bool, MergeFunctions: bool, UnrollLoops: bool, SLPVectorize: bool, diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index d2548deb8c7..47e7dec48e4 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -626,6 +626,9 @@ fn produce_final_output_artifacts( // them for making an rlib. copy_if_one_unit(OutputType::Bitcode, true); } + OutputType::ThinBitcode => { + copy_if_one_unit(OutputType::ThinBitcode, true); + } OutputType::ThinLinkBitcode => { copy_if_one_unit(OutputType::ThinLinkBitcode, false); } diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 9ce4abdb432..6e607baebb5 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -7,6 +7,7 @@ #include "llvm/Analysis/Lint.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Bitcode/BitcodeWriter.h" +#include "llvm/Bitcode/BitcodeWriterPass.h" #include "llvm/CodeGen/CommandFlags.h" #include "llvm/IR/AssemblyAnnotationWriter.h" #include "llvm/IR/AutoUpgrade.h" @@ -37,6 +38,7 @@ #include "llvm/Transforms/Instrumentation/InstrProfiling.h" #include "llvm/Transforms/Instrumentation/MemorySanitizer.h" #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" +#include "llvm/Transforms/Scalar/AnnotationRemarks.h" #include "llvm/Transforms/Utils/CanonicalizeAliases.h" #include "llvm/Transforms/Utils/FunctionImportUtils.h" #include "llvm/Transforms/Utils/NameAnonGlobals.h" @@ -195,6 +197,19 @@ extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) { GEN_SUBTARGETS #undef SUBTARGET +// This struct and various functions are sort of a hack right now, but the +// problem is that we've got in-memory LLVM modules after we generate and +// optimize all codegen-units for one compilation in rustc. To be compatible +// with the LTO support above we need to serialize the modules plus their +// ThinLTO summary into memory. +// +// This structure is basically an owned version of a serialize module, with +// a ThinLTO summary attached. +struct LLVMRustThinLTOBuffer { + std::string data; + std::string thin_link_data; +}; + extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM, const char *Feature) { TargetMachine *Target = unwrap(TM); @@ -704,7 +719,8 @@ extern "C" LLVMRustResult LLVMRustOptimize( LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef, LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage, bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR, - bool LintIR, bool UseThinLTOBuffers, bool MergeFunctions, bool UnrollLoops, + bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO, + bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops, bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls, bool EmitLifetimeMarkers, bool RunEnzyme, LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath, @@ -952,7 +968,10 @@ extern "C" LLVMRustResult LLVMRustOptimize( } ModulePassManager MPM; - bool NeedThinLTOBufferPasses = UseThinLTOBuffers; + bool NeedThinLTOBufferPasses = EmitThinLTO; + auto ThinLTOBuffer = std::make_unique(); + raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data); + raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data); if (!NoPrepopulatePasses) { // The pre-link pipelines don't support O0 and require using // buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do @@ -976,7 +995,25 @@ extern "C" LLVMRustResult LLVMRustOptimize( switch (OptStage) { case LLVMRustOptStage::PreLinkNoLTO: - MPM = PB.buildPerModuleDefaultPipeline(OptLevel); + if (ThinLTOBufferRef) { + // This is similar to LLVM's `buildFatLTODefaultPipeline`, where the + // bitcode for embedding is obtained after performing + // `ThinLTOPreLinkDefaultPipeline`. + MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel)); + if (EmitThinLTO) { + MPM.addPass(ThinLTOBitcodeWriterPass( + ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr)); + } else { + MPM.addPass(BitcodeWriterPass(ThinLTODataOS)); + } + *ThinLTOBufferRef = ThinLTOBuffer.release(); + MPM.addPass(PB.buildModuleOptimizationPipeline( + OptLevel, ThinOrFullLTOPhase::None)); + MPM.addPass( + createModuleToFunctionPassAdaptor(AnnotationRemarksPass())); + } else { + MPM = PB.buildPerModuleDefaultPipeline(OptLevel); + } break; case LLVMRustOptStage::PreLinkThinLTO: MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel); @@ -1022,6 +1059,16 @@ extern "C" LLVMRustResult LLVMRustOptimize( MPM.addPass(CanonicalizeAliasesPass()); MPM.addPass(NameAnonGlobalPass()); } + // For `-Copt-level=0`, ThinLTO, or LTO. + if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) { + if (EmitThinLTO) { + MPM.addPass(ThinLTOBitcodeWriterPass( + ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr)); + } else { + MPM.addPass(BitcodeWriterPass(ThinLTODataOS)); + } + *ThinLTOBufferRef = ThinLTOBuffer.release(); + } // now load "-enzyme" pass: #ifdef ENZYME @@ -1500,19 +1547,6 @@ extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data, return true; } -// This struct and various functions are sort of a hack right now, but the -// problem is that we've got in-memory LLVM modules after we generate and -// optimize all codegen-units for one compilation in rustc. To be compatible -// with the LTO support above we need to serialize the modules plus their -// ThinLTO summary into memory. -// -// This structure is basically an owned version of a serialize module, with -// a ThinLTO summary attached. -struct LLVMRustThinLTOBuffer { - std::string data; - std::string thin_link_data; -}; - extern "C" LLVMRustThinLTOBuffer * LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) { auto Ret = std::make_unique(); diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index 8f0b17b5e88..d94338443b3 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -541,6 +541,7 @@ impl FromStr for SplitDwarfKind { pub enum OutputType { Bitcode, ThinLinkBitcode, + ThinBitcode, Assembly, LlvmAssembly, Mir, @@ -571,6 +572,7 @@ impl OutputType { OutputType::Exe | OutputType::DepInfo | OutputType::Metadata => true, OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir @@ -582,6 +584,7 @@ impl OutputType { match *self { OutputType::Bitcode => "llvm-bc", OutputType::ThinLinkBitcode => "thin-link-bitcode", + OutputType::ThinBitcode => "thin-llvm-bc", OutputType::Assembly => "asm", OutputType::LlvmAssembly => "llvm-ir", OutputType::Mir => "mir", @@ -599,6 +602,7 @@ impl OutputType { "mir" => OutputType::Mir, "llvm-bc" => OutputType::Bitcode, "thin-link-bitcode" => OutputType::ThinLinkBitcode, + "thin-llvm-bc" => OutputType::ThinBitcode, "obj" => OutputType::Object, "metadata" => OutputType::Metadata, "link" => OutputType::Exe, @@ -609,9 +613,10 @@ impl OutputType { fn shorthands_display() -> String { format!( - "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`", + "`{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`, `{}`", OutputType::Bitcode.shorthand(), OutputType::ThinLinkBitcode.shorthand(), + OutputType::ThinBitcode.shorthand(), OutputType::Assembly.shorthand(), OutputType::LlvmAssembly.shorthand(), OutputType::Mir.shorthand(), @@ -626,6 +631,7 @@ impl OutputType { match *self { OutputType::Bitcode => "bc", OutputType::ThinLinkBitcode => "indexing.o", + OutputType::ThinBitcode => "thin.bc", OutputType::Assembly => "s", OutputType::LlvmAssembly => "ll", OutputType::Mir => "mir", @@ -644,6 +650,7 @@ impl OutputType { | OutputType::DepInfo => true, OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Object | OutputType::Metadata | OutputType::Exe => false, @@ -731,6 +738,7 @@ impl OutputTypes { self.0.keys().any(|k| match *k { OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir @@ -745,6 +753,7 @@ impl OutputTypes { self.0.keys().any(|k| match *k { OutputType::Bitcode | OutputType::ThinLinkBitcode + | OutputType::ThinBitcode | OutputType::Assembly | OutputType::LlvmAssembly | OutputType::Mir diff --git a/tests/codegen/overaligned-constant.rs b/tests/codegen/overaligned-constant.rs index e5540aca387..0f5977880f2 100644 --- a/tests/codegen/overaligned-constant.rs +++ b/tests/codegen/overaligned-constant.rs @@ -9,14 +9,14 @@ struct S(i32); struct SmallStruct(f32, Option, &'static [f32]); -// CHECK: @0 = private unnamed_addr constant +// CHECK: [[const:@.*]] = private unnamed_addr constant // CHECK-SAME: , align 8 #[no_mangle] pub fn overaligned_constant() { // CHECK-LABEL: @overaligned_constant // CHECK: [[full:%_.*]] = alloca [32 x i8], align 8 - // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 @0, i64 32, i1 false) + // CHECK: call void @llvm.memcpy.p0.p0.i64(ptr align 8 [[full]], ptr align 8 [[const]], i64 32, i1 false) let mut s = S(1); s.0 = 3; diff --git a/tests/codegen/uninit-consts.rs b/tests/codegen/uninit-consts.rs index 649927b87b4..a58008e171e 100644 --- a/tests/codegen/uninit-consts.rs +++ b/tests/codegen/uninit-consts.rs @@ -11,15 +11,15 @@ pub struct PartiallyUninit { y: MaybeUninit<[u8; 10]>, } -// CHECK: [[FULLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [10 x i8] }> undef +// CHECK: [[FULLY_UNINIT:@.*]] = private unnamed_addr constant <{ [10 x i8] }> undef -// CHECK: [[PARTIALLY_UNINIT:@[0-9]+]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4 +// CHECK: [[PARTIALLY_UNINIT:@.*]] = private unnamed_addr constant <{ [4 x i8], [12 x i8] }> <{ [4 x i8] c"{{\\EF\\BE\\AD\\DE|\\DE\\AD\\BE\\EF}}", [12 x i8] undef }>, align 4 // This shouldn't contain undef, since it contains more chunks // than the default value of uninit_const_chunk_threshold. -// CHECK: [[UNINIT_PADDING_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 +// CHECK: [[UNINIT_PADDING_HUGE:@.*]] = private unnamed_addr constant <{ [32768 x i8] }> <{ [32768 x i8] c"{{.+}}" }>, align 4 -// CHECK: [[FULLY_UNINIT_HUGE:@[0-9]+]] = private unnamed_addr constant <{ [16384 x i8] }> undef +// CHECK: [[FULLY_UNINIT_HUGE:@.*]] = private unnamed_addr constant <{ [16384 x i8] }> undef // CHECK-LABEL: @fully_uninit #[no_mangle] diff --git a/tests/run-make/pgo-embed-bc-lto/interesting.rs b/tests/run-make/pgo-embed-bc-lto/interesting.rs new file mode 100644 index 00000000000..13105c17e12 --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/interesting.rs @@ -0,0 +1,16 @@ +#![crate_name = "interesting"] +#![crate_type = "rlib"] + +extern crate opaque; + +#[no_mangle] +#[inline(never)] +pub fn function_called_once() { + opaque::foo(); +} + +// CHECK-LABEL: @function_called_once +// CHECK-SAME: !prof [[function_called_once_id:![0-9]+]] { +// CHECK: "CG Profile" +// CHECK-NOT: "CG Profile" +// CHECK-DAG: [[function_called_once_id]] = !{!"function_entry_count", i64 1} diff --git a/tests/run-make/pgo-embed-bc-lto/main.rs b/tests/run-make/pgo-embed-bc-lto/main.rs new file mode 100644 index 00000000000..ce8747bef3c --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/main.rs @@ -0,0 +1,5 @@ +extern crate interesting; + +fn main() { + interesting::function_called_once(); +} diff --git a/tests/run-make/pgo-embed-bc-lto/opaque.rs b/tests/run-make/pgo-embed-bc-lto/opaque.rs new file mode 100644 index 00000000000..b4467dc7796 --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/opaque.rs @@ -0,0 +1,5 @@ +#![crate_name = "opaque"] +#![crate_type = "rlib"] + +#[inline(never)] +pub fn foo() {} diff --git a/tests/run-make/pgo-embed-bc-lto/rmake.rs b/tests/run-make/pgo-embed-bc-lto/rmake.rs new file mode 100644 index 00000000000..b7eba0c68e3 --- /dev/null +++ b/tests/run-make/pgo-embed-bc-lto/rmake.rs @@ -0,0 +1,67 @@ +// This test case verifies that we successfully complete an LTO build with PGO +// using the embedded bitcode. +// It also ensures that the generated IR correctly includes the call results. + +//@ needs-profiler-runtime +//@ ignore-cross-compile + +use std::path::Path; + +use run_make_support::{ + has_extension, has_prefix, llvm_filecheck, llvm_profdata, rfs, run, rustc, shallow_find_files, +}; + +fn run_test(cg_units: usize) { + let path_prof_data_dir = Path::new("prof_data_dir"); + if path_prof_data_dir.exists() { + rfs::remove_dir_all(path_prof_data_dir); + } + rfs::create_dir_all(&path_prof_data_dir); + let path_merged_profdata = path_prof_data_dir.join("merged.profdata"); + rustc().input("opaque.rs").codegen_units(1).run(); + rustc() + .input("interesting.rs") + .profile_generate(&path_prof_data_dir) + .opt() + .crate_type("lib,cdylib") + .codegen_units(cg_units) + .run(); + rustc() + .input("main.rs") + .arg("-Clto=thin") + .opt() + .codegen_units(cg_units) + .profile_generate(&path_prof_data_dir) + .opt() + .run(); + run("main"); + llvm_profdata().merge().output(&path_merged_profdata).input(path_prof_data_dir).run(); + rustc() + .input("interesting.rs") + .profile_use(&path_merged_profdata) + .opt() + .crate_type("lib,cdylib") + .codegen_units(cg_units) + .emit("link") + .run(); + rustc() + .input("main.rs") + .arg("-Clto=thin") + .opt() + .codegen_units(cg_units) + .profile_use(&path_merged_profdata) + .emit("llvm-ir,link") + .opt() + .run(); + let files = shallow_find_files(".", |path| { + has_prefix(path, "main.interesting.interesting") && has_extension(path, "ll") + }); + assert_eq!(files.len(), 1); + let llvm_ir = &files[0]; + llvm_filecheck().patterns("interesting.rs").stdin_buf(rfs::read(llvm_ir)).run(); +} + +fn main() { + run_test(1); + run_test(16); +}