rust/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp

#include "LLVMWrapper.h"

#include "llvm-c/Core.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/Lint.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Bitcode/BitcodeWriter.h"
#include "llvm/Bitcode/BitcodeWriterPass.h"
#include "llvm/CodeGen/CommandFlags.h"
#include "llvm/IR/AssemblyAnnotationWriter.h"
#include "llvm/IR/AutoUpgrade.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Verifier.h"
#include "llvm/LTO/LTO.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/TargetRegistry.h"
#include "llvm/Object/ObjectFile.h"
#include "llvm/Passes/PassBuilder.h"
#include "llvm/Passes/PassPlugin.h"
#include "llvm/Passes/StandardInstrumentations.h"
#include "llvm/Support/CBindingWrapping.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Program.h"
#include "llvm/Support/TimeProfiler.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/TargetParser/Host.h"
#include "llvm/Transforms/IPO/FunctionImport.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
#include "llvm/Transforms/Scalar/AnnotationRemarks.h"
#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
#include "llvm/Transforms/Utils/FunctionImportUtils.h"
#include "llvm/Transforms/Utils/NameAnonGlobals.h"
#include <set>
#include <string>
#include <vector>

// Conditional includes prevent clang-format from fully sorting the list,
// so if any are needed, keep them separate down here.

using namespace llvm;

static codegen::RegisterCodeGenFlags CGF;

typedef struct LLVMOpaquePass *LLVMPassRef;
typedef struct LLVMOpaqueTargetMachine *LLVMTargetMachineRef;

DEFINE_STDCXX_CONVERSION_FUNCTIONS(Pass, LLVMPassRef)
DEFINE_STDCXX_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef)

extern "C" void LLVMRustTimeTraceProfilerInitialize() {
  timeTraceProfilerInitialize(
      /* TimeTraceGranularity */ 0,
      /* ProcName */ "rustc");
}

extern "C" void LLVMRustTimeTraceProfilerFinishThread() {
  timeTraceProfilerFinishThread();
}

extern "C" void LLVMRustTimeTraceProfilerFinish(const char *FileName) {
  auto FN = StringRef(FileName);
  std::error_code EC;
  auto OS = raw_fd_ostream(FN, EC, sys::fs::CD_CreateAlways);

  timeTraceProfilerWrite(OS);
  timeTraceProfilerCleanup();
}

#ifdef LLVM_COMPONENT_X86
#define SUBTARGET_X86 SUBTARGET(X86)
#else
#define SUBTARGET_X86
#endif

#ifdef LLVM_COMPONENT_ARM
#define SUBTARGET_ARM SUBTARGET(ARM)
#else
#define SUBTARGET_ARM
#endif

#ifdef LLVM_COMPONENT_AARCH64
#define SUBTARGET_AARCH64 SUBTARGET(AArch64)
#else
#define SUBTARGET_AARCH64
#endif

#ifdef LLVM_COMPONENT_AVR
#define SUBTARGET_AVR SUBTARGET(AVR)
#else
#define SUBTARGET_AVR
#endif

#ifdef LLVM_COMPONENT_M68k
#define SUBTARGET_M68K SUBTARGET(M68k)
#else
#define SUBTARGET_M68K
#endif

#ifdef LLVM_COMPONENT_CSKY
#define SUBTARGET_CSKY SUBTARGET(CSKY)
#else
#define SUBTARGET_CSKY
#endif

#ifdef LLVM_COMPONENT_MIPS
#define SUBTARGET_MIPS SUBTARGET(Mips)
#else
#define SUBTARGET_MIPS
#endif

#ifdef LLVM_COMPONENT_POWERPC
#define SUBTARGET_PPC SUBTARGET(PPC)
#else
#define SUBTARGET_PPC
#endif

#ifdef LLVM_COMPONENT_SYSTEMZ
#define SUBTARGET_SYSTEMZ SUBTARGET(SystemZ)
#else
#define SUBTARGET_SYSTEMZ
#endif

#ifdef LLVM_COMPONENT_MSP430
#define SUBTARGET_MSP430 SUBTARGET(MSP430)
#else
#define SUBTARGET_MSP430
#endif

#ifdef LLVM_COMPONENT_RISCV
#define SUBTARGET_RISCV SUBTARGET(RISCV)
#else
#define SUBTARGET_RISCV
#endif

#ifdef LLVM_COMPONENT_SPARC
#define SUBTARGET_SPARC SUBTARGET(Sparc)
#else
#define SUBTARGET_SPARC
#endif

#ifdef LLVM_COMPONENT_XTENSA
#define SUBTARGET_XTENSA SUBTARGET(XTENSA)
#else
#define SUBTARGET_XTENSA
#endif

#ifdef LLVM_COMPONENT_HEXAGON
#define SUBTARGET_HEXAGON SUBTARGET(Hexagon)
#else
#define SUBTARGET_HEXAGON
#endif

#ifdef LLVM_COMPONENT_LOONGARCH
#define SUBTARGET_LOONGARCH SUBTARGET(LoongArch)
#else
#define SUBTARGET_LOONGARCH
#endif

#define GEN_SUBTARGETS                                                         \
  SUBTARGET_X86                                                                \
  SUBTARGET_ARM                                                                \
  SUBTARGET_AARCH64                                                            \
  SUBTARGET_AVR                                                                \
  SUBTARGET_M68K                                                               \
  SUBTARGET_CSKY                                                               \
  SUBTARGET_MIPS                                                               \
  SUBTARGET_PPC                                                                \
  SUBTARGET_SYSTEMZ                                                            \
  SUBTARGET_MSP430                                                             \
  SUBTARGET_SPARC                                                              \
  SUBTARGET_HEXAGON                                                            \
  SUBTARGET_XTENSA                                                             \
  SUBTARGET_RISCV                                                              \
  SUBTARGET_LOONGARCH

#define SUBTARGET(x)                                                           \
  namespace llvm {                                                             \
  extern const SubtargetFeatureKV x##FeatureKV[];                              \
  extern const SubtargetFeatureKV x##SubTypeKV[];                              \
  }

GEN_SUBTARGETS
#undef SUBTARGET

// This struct and various functions are sort of a hack right now, but the
// problem is that we've got in-memory LLVM modules after we generate and
// optimize all codegen-units for one compilation in rustc. To be compatible
// with the LTO support above we need to serialize the modules plus their
// ThinLTO summary into memory.
//
// This structure is basically an owned version of a serialize module, with
// a ThinLTO summary attached.
struct LLVMRustThinLTOBuffer {
  std::string data;
  std::string thin_link_data;
};

extern "C" bool LLVMRustHasFeature(LLVMTargetMachineRef TM,
                                   const char *Feature) {
  TargetMachine *Target = unwrap(TM);
  const MCSubtargetInfo *MCInfo = Target->getMCSubtargetInfo();
  return MCInfo->checkFeatures(std::string("+") + Feature);
}

enum class LLVMRustCodeModel {
  Tiny,
  Small,
  Kernel,
  Medium,
  Large,
  None,
};

static std::optional<CodeModel::Model> fromRust(LLVMRustCodeModel Model) {
  switch (Model) {
  case LLVMRustCodeModel::Tiny:
    return CodeModel::Tiny;
  case LLVMRustCodeModel::Small:
    return CodeModel::Small;
  case LLVMRustCodeModel::Kernel:
    return CodeModel::Kernel;
  case LLVMRustCodeModel::Medium:
    return CodeModel::Medium;
  case LLVMRustCodeModel::Large:
    return CodeModel::Large;
  case LLVMRustCodeModel::None:
    return std::nullopt;
  default:
    report_fatal_error("Bad CodeModel.");
  }
}

enum class LLVMRustCodeGenOptLevel {
  None,
  Less,
  Default,
  Aggressive,
};

using CodeGenOptLevelEnum = llvm::CodeGenOptLevel;

static CodeGenOptLevelEnum fromRust(LLVMRustCodeGenOptLevel Level) {
  switch (Level) {
  case LLVMRustCodeGenOptLevel::None:
    return CodeGenOptLevelEnum::None;
  case LLVMRustCodeGenOptLevel::Less:
    return CodeGenOptLevelEnum::Less;
  case LLVMRustCodeGenOptLevel::Default:
    return CodeGenOptLevelEnum::Default;
  case LLVMRustCodeGenOptLevel::Aggressive:
    return CodeGenOptLevelEnum::Aggressive;
  default:
    report_fatal_error("Bad CodeGenOptLevel.");
  }
}

enum class LLVMRustPassBuilderOptLevel {
  O0,
  O1,
  O2,
  O3,
  Os,
  Oz,
};

static OptimizationLevel fromRust(LLVMRustPassBuilderOptLevel Level) {
  switch (Level) {
  case LLVMRustPassBuilderOptLevel::O0:
    return OptimizationLevel::O0;
  case LLVMRustPassBuilderOptLevel::O1:
    return OptimizationLevel::O1;
  case LLVMRustPassBuilderOptLevel::O2:
    return OptimizationLevel::O2;
  case LLVMRustPassBuilderOptLevel::O3:
    return OptimizationLevel::O3;
  case LLVMRustPassBuilderOptLevel::Os:
    return OptimizationLevel::Os;
  case LLVMRustPassBuilderOptLevel::Oz:
    return OptimizationLevel::Oz;
  default:
    report_fatal_error("Bad PassBuilderOptLevel.");
  }
}

enum class LLVMRustRelocModel {
  Static,
  PIC,
  DynamicNoPic,
  ROPI,
  RWPI,
  ROPIRWPI,
};

static Reloc::Model fromRust(LLVMRustRelocModel RustReloc) {
  switch (RustReloc) {
  case LLVMRustRelocModel::Static:
    return Reloc::Static;
  case LLVMRustRelocModel::PIC:
    return Reloc::PIC_;
  case LLVMRustRelocModel::DynamicNoPic:
    return Reloc::DynamicNoPIC;
  case LLVMRustRelocModel::ROPI:
    return Reloc::ROPI;
  case LLVMRustRelocModel::RWPI:
    return Reloc::RWPI;
  case LLVMRustRelocModel::ROPIRWPI:
    return Reloc::ROPI_RWPI;
  }
  report_fatal_error("Bad RelocModel.");
}

enum class LLVMRustFloatABI {
  Default,
  Soft,
  Hard,
};

static FloatABI::ABIType fromRust(LLVMRustFloatABI RustFloatAbi) {
  switch (RustFloatAbi) {
  case LLVMRustFloatABI::Default:
    return FloatABI::Default;
  case LLVMRustFloatABI::Soft:
    return FloatABI::Soft;
  case LLVMRustFloatABI::Hard:
    return FloatABI::Hard;
  }
  report_fatal_error("Bad FloatABI.");
}

/// getLongestEntryLength - Return the length of the longest entry in the table.
template <typename KV> static size_t getLongestEntryLength(ArrayRef<KV> Table) {
  size_t MaxLen = 0;
  for (auto &I : Table)
    MaxLen = std::max(MaxLen, std::strlen(I.Key));
  return MaxLen;
}

extern "C" void LLVMRustPrintTargetCPUs(LLVMTargetMachineRef TM,
                                        RustStringRef OutStr) {
  ArrayRef<SubtargetSubTypeKV> CPUTable =
      unwrap(TM)->getMCSubtargetInfo()->getAllProcessorDescriptions();
  auto OS = RawRustStringOstream(OutStr);

  // Just print a bare list of target CPU names, and let Rust-side code handle
  // the full formatting of `--print=target-cpus`.
  for (auto &CPU : CPUTable) {
    OS << CPU.Key << "\n";
  }
}

extern "C" size_t LLVMRustGetTargetFeaturesCount(LLVMTargetMachineRef TM) {
  const TargetMachine *Target = unwrap(TM);
  const MCSubtargetInfo *MCInfo = Target->getMCSubtargetInfo();
  const ArrayRef<SubtargetFeatureKV> FeatTable =
      MCInfo->getAllProcessorFeatures();
  return FeatTable.size();
}

extern "C" void LLVMRustGetTargetFeature(LLVMTargetMachineRef TM, size_t Index,
                                         const char **Feature,
                                         const char **Desc) {
  const TargetMachine *Target = unwrap(TM);
  const MCSubtargetInfo *MCInfo = Target->getMCSubtargetInfo();
  const ArrayRef<SubtargetFeatureKV> FeatTable =
      MCInfo->getAllProcessorFeatures();
  const SubtargetFeatureKV Feat = FeatTable[Index];
  *Feature = Feat.Key;
  *Desc = Feat.Desc;
}

extern "C" const char *LLVMRustGetHostCPUName(size_t *OutLen) {
  StringRef Name = sys::getHostCPUName();
  *OutLen = Name.size();
  return Name.data();
}

extern "C" LLVMTargetMachineRef LLVMRustCreateTargetMachine(
    const char *TripleStr, const char *CPU, const char *Feature,
    const char *ABIStr, LLVMRustCodeModel RustCM, LLVMRustRelocModel RustReloc,
    LLVMRustCodeGenOptLevel RustOptLevel, LLVMRustFloatABI RustFloatABIType,
    bool FunctionSections, bool DataSections, bool UniqueSectionNames,
    bool TrapUnreachable, bool Singlethread, bool VerboseAsm,
    bool EmitStackSizeSection, bool RelaxELFRelocations, bool UseInitArray,
    const char *SplitDwarfFile, const char *OutputObjFile,
    const char *DebugInfoCompression, bool UseEmulatedTls,
    const char *ArgsCstrBuff, size_t ArgsCstrBuffLen) {

  auto OptLevel = fromRust(RustOptLevel);
  auto RM = fromRust(RustReloc);
  auto CM = fromRust(RustCM);
  auto FloatABIType = fromRust(RustFloatABIType);

  std::string Error;
  auto Trip = Triple(Triple::normalize(TripleStr));
  const llvm::Target *TheTarget =
      TargetRegistry::lookupTarget(Trip.getTriple(), Error);
  if (TheTarget == nullptr) {
    LLVMRustSetLastError(Error.c_str());
    return nullptr;
  }

  TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(Trip);

  Options.FloatABIType = FloatABIType;
  Options.DataSections = DataSections;
  Options.FunctionSections = FunctionSections;
  Options.UniqueSectionNames = UniqueSectionNames;
  Options.MCOptions.AsmVerbose = VerboseAsm;
  // Always preserve comments that were written by the user
  Options.MCOptions.PreserveAsmComments = true;
  Options.MCOptions.ABIName = ABIStr;
  if (SplitDwarfFile) {
    Options.MCOptions.SplitDwarfFile = SplitDwarfFile;
  }
  if (OutputObjFile) {
    Options.ObjectFilenameForDebug = OutputObjFile;
  }
  if (!strcmp("zlib", DebugInfoCompression) &&
      llvm::compression::zlib::isAvailable()) {
    Options.MCOptions.CompressDebugSections = DebugCompressionType::Zlib;
  } else if (!strcmp("zstd", DebugInfoCompression) &&
             llvm::compression::zstd::isAvailable()) {
    Options.MCOptions.CompressDebugSections = DebugCompressionType::Zstd;
  } else if (!strcmp("none", DebugInfoCompression)) {
    Options.MCOptions.CompressDebugSections = DebugCompressionType::None;
  }

  Options.MCOptions.X86RelaxRelocations = RelaxELFRelocations;
  Options.UseInitArray = UseInitArray;
  Options.EmulatedTLS = UseEmulatedTls;

  if (TrapUnreachable) {
    // Tell LLVM to codegen `unreachable` into an explicit trap instruction.
    // This limits the extent of possible undefined behavior in some cases, as
    // it prevents control flow from "falling through" into whatever code
    // happens to be laid out next in memory.
    Options.TrapUnreachable = true;
    // But don't emit traps after other traps or no-returns unnecessarily.
    // ...except for when targeting WebAssembly, because the NoTrapAfterNoreturn
    // option causes bugs in the LLVM WebAssembly backend. You should be able to
    // remove this check when Rust's minimum supported LLVM version is >= 18
    // https://github.com/llvm/llvm-project/pull/65876
    if (!Trip.isWasm()) {
      Options.NoTrapAfterNoreturn = true;
    }
  }

  if (Singlethread) {
    Options.ThreadModel = ThreadModel::Single;
  }

  Options.EmitStackSizeSection = EmitStackSizeSection;

  if (ArgsCstrBuff != nullptr) {
#if LLVM_VERSION_GE(20, 0)
    size_t buffer_offset = 0;
    assert(ArgsCstrBuff[ArgsCstrBuffLen - 1] == '\0');
    auto Arg0 = std::string(ArgsCstrBuff);
    buffer_offset = Arg0.size() + 1;

    std::string CommandlineArgs;
    raw_string_ostream OS(CommandlineArgs);
    ListSeparator LS(" ");
    for (StringRef Arg : split(StringRef(ArgsCstrBuff + buffer_offset,
                                         ArgsCstrBuffLen - buffer_offset),
                               '\0')) {
      OS << LS;
      sys::printArg(OS, Arg, /*Quote=*/true);
    }
    OS.flush();
    Options.MCOptions.Argv0 = Arg0;
    Options.MCOptions.CommandlineArgs = CommandlineArgs;
#else
    size_t buffer_offset = 0;
    assert(ArgsCstrBuff[ArgsCstrBuffLen - 1] == '\0');

    const size_t arg0_len = std::strlen(ArgsCstrBuff);
    char *arg0 = new char[arg0_len + 1];
    memcpy(arg0, ArgsCstrBuff, arg0_len);
    arg0[arg0_len] = '\0';
    buffer_offset += arg0_len + 1;

    const size_t num_cmd_arg_strings = std::count(
        &ArgsCstrBuff[buffer_offset], &ArgsCstrBuff[ArgsCstrBuffLen], '\0');

    std::string *cmd_arg_strings = new std::string[num_cmd_arg_strings];
    for (size_t i = 0; i < num_cmd_arg_strings; ++i) {
      assert(buffer_offset < ArgsCstrBuffLen);
      const size_t len = std::strlen(ArgsCstrBuff + buffer_offset);
      cmd_arg_strings[i] = std::string(&ArgsCstrBuff[buffer_offset], len);
      buffer_offset += len + 1;
    }

    assert(buffer_offset == ArgsCstrBuffLen);

    Options.MCOptions.Argv0 = arg0;
    Options.MCOptions.CommandLineArgs =
        llvm::ArrayRef<std::string>(cmd_arg_strings, num_cmd_arg_strings);
#endif
  }

  TargetMachine *TM = TheTarget->createTargetMachine(
      Trip.getTriple(), CPU, Feature, Options, RM, CM, OptLevel);
  return wrap(TM);
}

extern "C" void LLVMRustDisposeTargetMachine(LLVMTargetMachineRef TM) {
#if LLVM_VERSION_LT(20, 0)
  MCTargetOptions &MCOptions = unwrap(TM)->Options.MCOptions;
  delete[] MCOptions.Argv0;
  delete[] MCOptions.CommandLineArgs.data();
#endif

  delete unwrap(TM);
}

// Unfortunately, the LLVM C API doesn't provide a way to create the
// TargetLibraryInfo pass, so we use this method to do so.
extern "C" void LLVMRustAddLibraryInfo(LLVMPassManagerRef PMR, LLVMModuleRef M,
                                       bool DisableSimplifyLibCalls) {
  auto TargetTriple = Triple(unwrap(M)->getTargetTriple());
  auto TLII = TargetLibraryInfoImpl(TargetTriple);
  if (DisableSimplifyLibCalls)
    TLII.disableAllFunctions();
  unwrap(PMR)->add(new TargetLibraryInfoWrapperPass(TLII));
}

extern "C" void LLVMRustSetLLVMOptions(int Argc, char **Argv) {
  // Initializing the command-line options more than once is not allowed. So,
  // check if they've already been initialized. (This could happen if we're
  // being called from rustpkg, for example). If the arguments change, then
  // that's just kinda unfortunate.
  static bool Initialized = false;
  if (Initialized)
    return;
  Initialized = true;
  cl::ParseCommandLineOptions(Argc, Argv);
}

enum class LLVMRustFileType {
  AssemblyFile,
  ObjectFile,
};

static CodeGenFileType fromRust(LLVMRustFileType Type) {
  switch (Type) {
  case LLVMRustFileType::AssemblyFile:
    return CodeGenFileType::AssemblyFile;
  case LLVMRustFileType::ObjectFile:
    return CodeGenFileType::ObjectFile;
  default:
    report_fatal_error("Bad FileType.");
  }
}

extern "C" LLVMRustResult
LLVMRustWriteOutputFile(LLVMTargetMachineRef Target, LLVMPassManagerRef PMR,
                        LLVMModuleRef M, const char *Path, const char *DwoPath,
                        LLVMRustFileType RustFileType, bool VerifyIR) {
  llvm::legacy::PassManager *PM = unwrap<llvm::legacy::PassManager>(PMR);
  auto FileType = fromRust(RustFileType);

  std::string ErrorInfo;
  std::error_code EC;
  auto OS = raw_fd_ostream(Path, EC, sys::fs::OF_None);
  if (EC)
    ErrorInfo = EC.message();
  if (ErrorInfo != "") {
    LLVMRustSetLastError(ErrorInfo.c_str());
    return LLVMRustResult::Failure;
  }

  auto BOS = buffer_ostream(OS);
  if (DwoPath) {
    auto DOS = raw_fd_ostream(DwoPath, EC, sys::fs::OF_None);
    EC.clear();
    if (EC)
      ErrorInfo = EC.message();
    if (ErrorInfo != "") {
      LLVMRustSetLastError(ErrorInfo.c_str());
      return LLVMRustResult::Failure;
    }
    auto DBOS = buffer_ostream(DOS);
    unwrap(Target)->addPassesToEmitFile(*PM, BOS, &DBOS, FileType, !VerifyIR);
    PM->run(*unwrap(M));
  } else {
    unwrap(Target)->addPassesToEmitFile(*PM, BOS, nullptr, FileType, !VerifyIR);
    PM->run(*unwrap(M));
  }

  // Apparently `addPassesToEmitFile` adds a pointer to our on-the-stack output
  // stream (OS), so the only real safe place to delete this is here? Don't we
  // wish this was written in Rust?
  LLVMDisposePassManager(PMR);
  return LLVMRustResult::Success;
}

extern "C" typedef void (*LLVMRustSelfProfileBeforePassCallback)(
    void *,        // LlvmSelfProfiler
    const char *,  // pass name
    const char *); // IR name
extern "C" typedef void (*LLVMRustSelfProfileAfterPassCallback)(
    void *); // LlvmSelfProfiler

std::string LLVMRustwrappedIrGetName(const llvm::Any &WrappedIr) {
  if (const auto *Cast = any_cast<const Module *>(&WrappedIr))
    return (*Cast)->getName().str();
  if (const auto *Cast = any_cast<const Function *>(&WrappedIr))
    return (*Cast)->getName().str();
  if (const auto *Cast = any_cast<const Loop *>(&WrappedIr))
    return (*Cast)->getName().str();
  if (const auto *Cast = any_cast<const LazyCallGraph::SCC *>(&WrappedIr))
    return (*Cast)->getName();
  return "<UNKNOWN>";
}

void LLVMSelfProfileInitializeCallbacks(
    PassInstrumentationCallbacks &PIC, void *LlvmSelfProfiler,
    LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
    LLVMRustSelfProfileAfterPassCallback AfterPassCallback) {
  PIC.registerBeforeNonSkippedPassCallback(
      [LlvmSelfProfiler, BeforePassCallback](StringRef Pass, llvm::Any Ir) {
        std::string PassName = Pass.str();
        std::string IrName = LLVMRustwrappedIrGetName(Ir);
        BeforePassCallback(LlvmSelfProfiler, PassName.c_str(), IrName.c_str());
      });

  PIC.registerAfterPassCallback(
      [LlvmSelfProfiler, AfterPassCallback](
          StringRef Pass, llvm::Any IR, const PreservedAnalyses &Preserved) {
        AfterPassCallback(LlvmSelfProfiler);
      });

  PIC.registerAfterPassInvalidatedCallback(
      [LlvmSelfProfiler,
       AfterPassCallback](StringRef Pass, const PreservedAnalyses &Preserved) {
        AfterPassCallback(LlvmSelfProfiler);
      });

  PIC.registerBeforeAnalysisCallback(
      [LlvmSelfProfiler, BeforePassCallback](StringRef Pass, llvm::Any Ir) {
        std::string PassName = Pass.str();
        std::string IrName = LLVMRustwrappedIrGetName(Ir);
        BeforePassCallback(LlvmSelfProfiler, PassName.c_str(), IrName.c_str());
      });

  PIC.registerAfterAnalysisCallback(
      [LlvmSelfProfiler, AfterPassCallback](StringRef Pass, llvm::Any Ir) {
        AfterPassCallback(LlvmSelfProfiler);
      });
}

enum class LLVMRustOptStage {
  PreLinkNoLTO,
  PreLinkThinLTO,
  PreLinkFatLTO,
  ThinLTO,
  FatLTO,
};

struct LLVMRustSanitizerOptions {
  bool SanitizeAddress;
  bool SanitizeAddressRecover;
  bool SanitizeCFI;
  bool SanitizeDataFlow;
  char **SanitizeDataFlowABIList;
  size_t SanitizeDataFlowABIListLen;
  bool SanitizeKCFI;
  bool SanitizeMemory;
  bool SanitizeMemoryRecover;
  int SanitizeMemoryTrackOrigins;
  bool SanitizeThread;
  bool SanitizeHWAddress;
  bool SanitizeHWAddressRecover;
  bool SanitizeKernelAddress;
  bool SanitizeKernelAddressRecover;
};

// This symbol won't be available or used when Enzyme is not enabled.
// Always set AugmentPassBuilder to true, since it registers optimizations which
// will improve the performance for Enzyme.
#ifdef ENZYME
extern "C" void registerEnzymeAndPassPipeline(llvm::PassBuilder &PB,
                                              /* augmentPassBuilder */ bool);
#endif

extern "C" LLVMRustResult LLVMRustOptimize(
    LLVMModuleRef ModuleRef, LLVMTargetMachineRef TMRef,
    LLVMRustPassBuilderOptLevel OptLevelRust, LLVMRustOptStage OptStage,
    bool IsLinkerPluginLTO, bool NoPrepopulatePasses, bool VerifyIR,
    bool LintIR, LLVMRustThinLTOBuffer **ThinLTOBufferRef, bool EmitThinLTO,
    bool EmitThinLTOSummary, bool MergeFunctions, bool UnrollLoops,
    bool SLPVectorize, bool LoopVectorize, bool DisableSimplifyLibCalls,
    bool EmitLifetimeMarkers, bool RunEnzyme,
    LLVMRustSanitizerOptions *SanitizerOptions, const char *PGOGenPath,
    const char *PGOUsePath, bool InstrumentCoverage,
    const char *InstrProfileOutput, const char *PGOSampleUsePath,
    bool DebugInfoForProfiling, void *LlvmSelfProfiler,
    LLVMRustSelfProfileBeforePassCallback BeforePassCallback,
    LLVMRustSelfProfileAfterPassCallback AfterPassCallback,
    const char *ExtraPasses, size_t ExtraPassesLen, const char *LLVMPlugins,
    size_t LLVMPluginsLen) {
  Module *TheModule = unwrap(ModuleRef);
  TargetMachine *TM = unwrap(TMRef);
  OptimizationLevel OptLevel = fromRust(OptLevelRust);

  PipelineTuningOptions PTO;
  PTO.LoopUnrolling = UnrollLoops;
  PTO.LoopInterleaving = UnrollLoops;
  PTO.LoopVectorization = LoopVectorize;
  PTO.SLPVectorization = SLPVectorize;
  PTO.MergeFunctions = MergeFunctions;

  PassInstrumentationCallbacks PIC;

  if (LlvmSelfProfiler) {
    LLVMSelfProfileInitializeCallbacks(PIC, LlvmSelfProfiler,
                                       BeforePassCallback, AfterPassCallback);
  }

  std::optional<PGOOptions> PGOOpt;
  auto FS = vfs::getRealFileSystem();
  if (PGOGenPath) {
    assert(!PGOUsePath && !PGOSampleUsePath);
    PGOOpt = PGOOptions(
        PGOGenPath, "", "", "", FS, PGOOptions::IRInstr, PGOOptions::NoCSAction,
        PGOOptions::ColdFuncOpt::Default, DebugInfoForProfiling);
  } else if (PGOUsePath) {
    assert(!PGOSampleUsePath);
    PGOOpt = PGOOptions(
        PGOUsePath, "", "", "", FS, PGOOptions::IRUse, PGOOptions::NoCSAction,
        PGOOptions::ColdFuncOpt::Default, DebugInfoForProfiling);
  } else if (PGOSampleUsePath) {
    PGOOpt =
        PGOOptions(PGOSampleUsePath, "", "", "", FS, PGOOptions::SampleUse,
                   PGOOptions::NoCSAction, PGOOptions::ColdFuncOpt::Default,
                   DebugInfoForProfiling);
  } else if (DebugInfoForProfiling) {
    PGOOpt = PGOOptions(
        "", "", "", "", FS, PGOOptions::NoAction, PGOOptions::NoCSAction,
        PGOOptions::ColdFuncOpt::Default, DebugInfoForProfiling);
  }

  auto PB = PassBuilder(TM, PTO, PGOOpt, &PIC);
  LoopAnalysisManager LAM;
  FunctionAnalysisManager FAM;
  CGSCCAnalysisManager CGAM;
  ModuleAnalysisManager MAM;

  StandardInstrumentations SI(TheModule->getContext(),
                              /*DebugLogging=*/false);
  SI.registerCallbacks(PIC, &MAM);

  if (LLVMPluginsLen) {
    auto PluginsStr = StringRef(LLVMPlugins, LLVMPluginsLen);
    SmallVector<StringRef> Plugins;
    PluginsStr.split(Plugins, ',', -1, false);
    for (auto PluginPath : Plugins) {
      auto Plugin = PassPlugin::Load(PluginPath.str());
      if (!Plugin) {
        auto Err = Plugin.takeError();
        auto ErrMsg = llvm::toString(std::move(Err));
        LLVMRustSetLastError(ErrMsg.c_str());
        return LLVMRustResult::Failure;
      }
      Plugin->registerPassBuilderCallbacks(PB);
    }
  }

  FAM.registerPass([&] { return PB.buildDefaultAAPipeline(); });

  Triple TargetTriple(TheModule->getTargetTriple());
  std::unique_ptr<TargetLibraryInfoImpl> TLII(
      new TargetLibraryInfoImpl(TargetTriple));
  if (DisableSimplifyLibCalls)
    TLII->disableAllFunctions();
  FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });

  PB.registerModuleAnalyses(MAM);
  PB.registerCGSCCAnalyses(CGAM);
  PB.registerFunctionAnalyses(FAM);
  PB.registerLoopAnalyses(LAM);
  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);

  // We manually collect pipeline callbacks so we can apply them at O0, where
  // the PassBuilder does not create a pipeline.
  std::vector<std::function<void(ModulePassManager &, OptimizationLevel)>>
      PipelineStartEPCallbacks;
#if LLVM_VERSION_GE(20, 0)
  std::vector<std::function<void(ModulePassManager &, OptimizationLevel,
                                 ThinOrFullLTOPhase)>>
      OptimizerLastEPCallbacks;
#else
  std::vector<std::function<void(ModulePassManager &, OptimizationLevel)>>
      OptimizerLastEPCallbacks;
#endif

  if (!IsLinkerPluginLTO && SanitizerOptions && SanitizerOptions->SanitizeCFI &&
      !NoPrepopulatePasses) {
    PipelineStartEPCallbacks.push_back(
        [](ModulePassManager &MPM, OptimizationLevel Level) {
          MPM.addPass(LowerTypeTestsPass(
              /*ExportSummary=*/nullptr,
              /*ImportSummary=*/nullptr));
        });
  }

  if (VerifyIR) {
    PipelineStartEPCallbacks.push_back(
        [VerifyIR](ModulePassManager &MPM, OptimizationLevel Level) {
          MPM.addPass(VerifierPass());
        });
  }

  if (LintIR) {
    PipelineStartEPCallbacks.push_back([](ModulePassManager &MPM,
                                          OptimizationLevel Level) {
#if LLVM_VERSION_GE(21, 0)
      MPM.addPass(
          createModuleToFunctionPassAdaptor(LintPass(/*AbortOnError=*/true)));
#else
      MPM.addPass(createModuleToFunctionPassAdaptor(LintPass()));
#endif
    });
  }

  if (InstrumentCoverage) {
    PipelineStartEPCallbacks.push_back(
        [InstrProfileOutput](ModulePassManager &MPM, OptimizationLevel Level) {
          InstrProfOptions Options;
          if (InstrProfileOutput) {
            Options.InstrProfileOutput = InstrProfileOutput;
          }
          // cargo run tests in multhreading mode by default
          // so use atomics for coverage counters
          Options.Atomic = true;
          MPM.addPass(InstrProfilingLoweringPass(Options, false));
        });
  }

  if (SanitizerOptions) {
    if (SanitizerOptions->SanitizeDataFlow) {
      std::vector<std::string> ABIListFiles(
          SanitizerOptions->SanitizeDataFlowABIList,
          SanitizerOptions->SanitizeDataFlowABIList +
              SanitizerOptions->SanitizeDataFlowABIListLen);
      OptimizerLastEPCallbacks.push_back(
#if LLVM_VERSION_GE(20, 0)
          [ABIListFiles](ModulePassManager &MPM, OptimizationLevel Level,
                         ThinOrFullLTOPhase phase) {
#else
          [ABIListFiles](ModulePassManager &MPM, OptimizationLevel Level) {
#endif
            MPM.addPass(DataFlowSanitizerPass(ABIListFiles));
          });
    }

    if (SanitizerOptions->SanitizeMemory) {
      MemorySanitizerOptions Options(
          SanitizerOptions->SanitizeMemoryTrackOrigins,
          SanitizerOptions->SanitizeMemoryRecover,
          /*CompileKernel=*/false,
          /*EagerChecks=*/true);
      OptimizerLastEPCallbacks.push_back(
#if LLVM_VERSION_GE(20, 0)
          [Options](ModulePassManager &MPM, OptimizationLevel Level,
                    ThinOrFullLTOPhase phase) {
#else
          [Options](ModulePassManager &MPM, OptimizationLevel Level) {
#endif
            MPM.addPass(MemorySanitizerPass(Options));
          });
    }

    if (SanitizerOptions->SanitizeThread) {
      OptimizerLastEPCallbacks.push_back(
#if LLVM_VERSION_GE(20, 0)
          [](ModulePassManager &MPM, OptimizationLevel Level,
             ThinOrFullLTOPhase phase) {
#else
          [](ModulePassManager &MPM, OptimizationLevel Level) {
#endif
            MPM.addPass(ModuleThreadSanitizerPass());
            MPM.addPass(
                createModuleToFunctionPassAdaptor(ThreadSanitizerPass()));
          });
    }

    if (SanitizerOptions->SanitizeAddress ||
        SanitizerOptions->SanitizeKernelAddress) {
      OptimizerLastEPCallbacks.push_back(
#if LLVM_VERSION_GE(20, 0)
          [SanitizerOptions, TM](ModulePassManager &MPM,
                                 OptimizationLevel Level,
                                 ThinOrFullLTOPhase phase) {
#else
          [SanitizerOptions, TM](ModulePassManager &MPM,
                                 OptimizationLevel Level) {
#endif
            auto CompileKernel = SanitizerOptions->SanitizeKernelAddress;
            AddressSanitizerOptions opts = AddressSanitizerOptions{
                CompileKernel,
                SanitizerOptions->SanitizeAddressRecover ||
                    SanitizerOptions->SanitizeKernelAddressRecover,
                /*UseAfterScope=*/true,
                AsanDetectStackUseAfterReturnMode::Runtime,
            };
            MPM.addPass(AddressSanitizerPass(
                opts,
                /*UseGlobalGC*/ true,
                // UseOdrIndicator should be false on windows machines
                // https://reviews.llvm.org/D137227
                !TM->getTargetTriple().isOSWindows()));
          });
    }
    if (SanitizerOptions->SanitizeHWAddress) {
      OptimizerLastEPCallbacks.push_back(
#if LLVM_VERSION_GE(20, 0)
          [SanitizerOptions](ModulePassManager &MPM, OptimizationLevel Level,
                             ThinOrFullLTOPhase phase) {
#else
          [SanitizerOptions](ModulePassManager &MPM, OptimizationLevel Level) {
#endif
            HWAddressSanitizerOptions opts(
                /*CompileKernel=*/false,
                SanitizerOptions->SanitizeHWAddressRecover,
                /*DisableOptimization=*/false);
            MPM.addPass(HWAddressSanitizerPass(opts));
          });
    }
  }

  ModulePassManager MPM;
  bool NeedThinLTOBufferPasses = EmitThinLTO;
  auto ThinLTOBuffer = std::make_unique<LLVMRustThinLTOBuffer>();
  raw_string_ostream ThinLTODataOS(ThinLTOBuffer->data);
  raw_string_ostream ThinLinkDataOS(ThinLTOBuffer->thin_link_data);
  if (!NoPrepopulatePasses) {
    // The pre-link pipelines don't support O0 and require using
    // buildO0DefaultPipeline() instead. At the same time, the LTO pipelines do
    // support O0 and using them is required.
    bool IsLTO = OptStage == LLVMRustOptStage::ThinLTO ||
                 OptStage == LLVMRustOptStage::FatLTO;
    if (OptLevel == OptimizationLevel::O0 && !IsLTO) {
      for (const auto &C : PipelineStartEPCallbacks)
        PB.registerPipelineStartEPCallback(C);
      for (const auto &C : OptimizerLastEPCallbacks)
        PB.registerOptimizerLastEPCallback(C);

      // We manually schedule ThinLTOBufferPasses below, so don't pass the value
      // to enable it here.
      MPM = PB.buildO0DefaultPipeline(OptLevel);
    } else {
      for (const auto &C : PipelineStartEPCallbacks)
        PB.registerPipelineStartEPCallback(C);
      for (const auto &C : OptimizerLastEPCallbacks)
        PB.registerOptimizerLastEPCallback(C);

      switch (OptStage) {
      case LLVMRustOptStage::PreLinkNoLTO:
        if (ThinLTOBufferRef) {
          // This is similar to LLVM's `buildFatLTODefaultPipeline`, where the
          // bitcode for embedding is obtained after performing
          // `ThinLTOPreLinkDefaultPipeline`.
          MPM.addPass(PB.buildThinLTOPreLinkDefaultPipeline(OptLevel));
          if (EmitThinLTO) {
            MPM.addPass(ThinLTOBitcodeWriterPass(
                ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
          } else {
            MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
          }
          *ThinLTOBufferRef = ThinLTOBuffer.release();
          MPM.addPass(PB.buildModuleOptimizationPipeline(
              OptLevel, ThinOrFullLTOPhase::None));
          MPM.addPass(
              createModuleToFunctionPassAdaptor(AnnotationRemarksPass()));
        } else {
          MPM = PB.buildPerModuleDefaultPipeline(OptLevel);
        }
        break;
      case LLVMRustOptStage::PreLinkThinLTO:
        MPM = PB.buildThinLTOPreLinkDefaultPipeline(OptLevel);
        NeedThinLTOBufferPasses = false;
        break;
      case LLVMRustOptStage::PreLinkFatLTO:
        MPM = PB.buildLTOPreLinkDefaultPipeline(OptLevel);
        NeedThinLTOBufferPasses = false;
        break;
      case LLVMRustOptStage::ThinLTO:
        // FIXME: Does it make sense to pass the ModuleSummaryIndex?
        // It only seems to be needed for C++ specific optimizations.
        MPM = PB.buildThinLTODefaultPipeline(OptLevel, nullptr);
        break;
      case LLVMRustOptStage::FatLTO:
        MPM = PB.buildLTODefaultPipeline(OptLevel, nullptr);
        break;
      }
    }
  } else {
    // We're not building any of the default pipelines but we still want to
    // add the verifier, instrumentation, etc passes if they were requested
    for (const auto &C : PipelineStartEPCallbacks)
      C(MPM, OptLevel);
    for (const auto &C : OptimizerLastEPCallbacks)
#if LLVM_VERSION_GE(20, 0)
      C(MPM, OptLevel, ThinOrFullLTOPhase::None);
#else
      C(MPM, OptLevel);
#endif
  }

  if (ExtraPassesLen) {
    if (auto Err =
            PB.parsePassPipeline(MPM, StringRef(ExtraPasses, ExtraPassesLen))) {
      std::string ErrMsg = toString(std::move(Err));
      LLVMRustSetLastError(ErrMsg.c_str());
      return LLVMRustResult::Failure;
    }
  }

  if (NeedThinLTOBufferPasses) {
    MPM.addPass(CanonicalizeAliasesPass());
    MPM.addPass(NameAnonGlobalPass());
  }
  // For `-Copt-level=0`, ThinLTO, or LTO.
  if (ThinLTOBufferRef && *ThinLTOBufferRef == nullptr) {
    if (EmitThinLTO) {
      MPM.addPass(ThinLTOBitcodeWriterPass(
          ThinLTODataOS, EmitThinLTOSummary ? &ThinLinkDataOS : nullptr));
    } else {
      MPM.addPass(BitcodeWriterPass(ThinLTODataOS));
    }
    *ThinLTOBufferRef = ThinLTOBuffer.release();
  }

  // now load "-enzyme" pass:
#ifdef ENZYME
  if (RunEnzyme) {
    registerEnzymeAndPassPipeline(PB, true);
    if (auto Err = PB.parsePassPipeline(MPM, "enzyme")) {
      std::string ErrMsg = toString(std::move(Err));
      LLVMRustSetLastError(ErrMsg.c_str());
      return LLVMRustResult::Failure;
    }
  }
#endif

  // Upgrade all calls to old intrinsics first.
  for (Module::iterator I = TheModule->begin(), E = TheModule->end(); I != E;)
    UpgradeCallsToIntrinsic(&*I++); // must be post-increment, as we remove

  MPM.run(*TheModule, MAM);
  return LLVMRustResult::Success;
}

// Callback to demangle function name
// Parameters:
// * name to be demangled
// * name len
// * output buffer
// * output buffer len
// Returns len of demangled string, or 0 if demangle failed.
typedef size_t (*DemangleFn)(const char *, size_t, char *, size_t);

namespace {

class RustAssemblyAnnotationWriter : public AssemblyAnnotationWriter {
  DemangleFn Demangle;
  std::vector<char> Buf;

public:
  RustAssemblyAnnotationWriter(DemangleFn Demangle) : Demangle(Demangle) {}

  // Return empty string if demangle failed
  // or if name does not need to be demangled
  StringRef CallDemangle(StringRef name) {
    if (!Demangle) {
      return StringRef();
    }

    if (Buf.size() < name.size() * 2) {
      // Semangled name usually shorter than mangled,
      // but allocate twice as much memory just in case
      Buf.resize(name.size() * 2);
    }

    auto R = Demangle(name.data(), name.size(), Buf.data(), Buf.size());
    if (!R) {
      // Demangle failed.
      return StringRef();
    }

    auto Demangled = StringRef(Buf.data(), R);
    if (Demangled == name) {
      // Do not print anything if demangled name is equal to mangled.
      return StringRef();
    }

    return Demangled;
  }

  void emitFunctionAnnot(const Function *F,
                         formatted_raw_ostream &OS) override {
    StringRef Demangled = CallDemangle(F->getName());
    if (Demangled.empty()) {
      return;
    }

    OS << "; " << Demangled << "\n";
  }

  void emitInstructionAnnot(const Instruction *I,
                            formatted_raw_ostream &OS) override {
    const char *Name;
    const Value *Value;
    if (const CallInst *CI = dyn_cast<CallInst>(I)) {
      Name = "call";
      Value = CI->getCalledOperand();
    } else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) {
      Name = "invoke";
      Value = II->getCalledOperand();
    } else {
      // Could demangle more operations, e. g.
      // `store %place, @function`.
      return;
    }

    if (!Value->hasName()) {
      return;
    }

    StringRef Demangled = CallDemangle(Value->getName());
    if (Demangled.empty()) {
      return;
    }

    OS << "; " << Name << " " << Demangled << "\n";
  }
};

} // namespace

extern "C" LLVMRustResult LLVMRustPrintModule(LLVMModuleRef M, const char *Path,
                                              DemangleFn Demangle) {
  std::string ErrorInfo;
  std::error_code EC;
  auto OS = raw_fd_ostream(Path, EC, sys::fs::OF_None);
  if (EC)
    ErrorInfo = EC.message();
  if (ErrorInfo != "") {
    LLVMRustSetLastError(ErrorInfo.c_str());
    return LLVMRustResult::Failure;
  }

  auto AAW = RustAssemblyAnnotationWriter(Demangle);
  auto FOS = formatted_raw_ostream(OS);
  unwrap(M)->print(FOS, &AAW);

  return LLVMRustResult::Success;
}

extern "C" void LLVMRustPrintPasses() {
  PassBuilder PB;
  PB.printPassNames(outs());
}

extern "C" void LLVMRustRunRestrictionPass(LLVMModuleRef M, char **Symbols,
                                           size_t Len) {
  auto PreserveFunctions = [=](const GlobalValue &GV) {
    // Preserve LLVM-injected, ASAN-related symbols.
    // See also https://github.com/rust-lang/rust/issues/113404.
    if (GV.getName() == "___asan_globals_registered") {
      return true;
    }

    // Preserve symbols exported from Rust modules.
    for (size_t I = 0; I < Len; I++) {
      if (GV.getName() == Symbols[I]) {
        return true;
      }
    }
    return false;
  };

  internalizeModule(*unwrap(M), PreserveFunctions);
}

extern "C" void
LLVMRustSetDataLayoutFromTargetMachine(LLVMModuleRef Module,
                                       LLVMTargetMachineRef TMR) {
  TargetMachine *Target = unwrap(TMR);
  unwrap(Module)->setDataLayout(Target->createDataLayout());
}

extern "C" void LLVMRustSetModulePICLevel(LLVMModuleRef M) {
  unwrap(M)->setPICLevel(PICLevel::Level::BigPIC);
}

extern "C" void LLVMRustSetModulePIELevel(LLVMModuleRef M) {
  unwrap(M)->setPIELevel(PIELevel::Level::Large);
}

extern "C" void LLVMRustSetModuleCodeModel(LLVMModuleRef M,
                                           LLVMRustCodeModel Model) {
  auto CM = fromRust(Model);
  if (!CM)
    return;
  unwrap(M)->setCodeModel(*CM);
}

// Here you'll find an implementation of ThinLTO as used by the Rust compiler
// right now. This ThinLTO support is only enabled on "recent ish" versions of
// LLVM, and otherwise it's just blanket rejected from other compilers.
//
// Most of this implementation is straight copied from LLVM. At the time of
// this writing it wasn't *quite* suitable to reuse more code from upstream
// for our purposes, but we should strive to upstream this support once it's
// ready to go! I figure we may want a bit of testing locally first before
// sending this upstream to LLVM. I hear though they're quite eager to receive
// feedback like this!
//
// If you're reading this code and wondering "what in the world" or you're
// working "good lord by LLVM upgrade is *still* failing due to these bindings"
// then fear not! (ok maybe fear a little). All code here is mostly based
// on `lib/LTO/ThinLTOCodeGenerator.cpp` in LLVM.
//
// You'll find that the general layout here roughly corresponds to the `run`
// method in that file as well as `ProcessThinLTOModule`. Functions are
// specifically commented below as well, but if you're updating this code
// or otherwise trying to understand it, the LLVM source will be useful in
// interpreting the mysteries within.
//
// Otherwise I'll apologize in advance, it probably requires a relatively
// significant investment on your part to "truly understand" what's going on
// here. Not saying I do myself, but it took me awhile staring at LLVM's source
// and various online resources about ThinLTO to make heads or tails of all
// this.

// This is a shared data structure which *must* be threadsafe to share
// read-only amongst threads. This also corresponds basically to the arguments
// of the `ProcessThinLTOModule` function in the LLVM source.
struct LLVMRustThinLTOData {
  // The combined index that is the global analysis over all modules we're
  // performing ThinLTO for. This is mostly managed by LLVM.
  ModuleSummaryIndex Index;

  // All modules we may look at, stored as in-memory serialized versions. This
  // is later used when inlining to ensure we can extract any module to inline
  // from.
  StringMap<MemoryBufferRef> ModuleMap;

  // A set that we manage of everything we *don't* want internalized. Note that
  // this includes all transitive references right now as well, but it may not
  // always!
  DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;

  // Not 100% sure what these are, but they impact what's internalized and
  // what's inlined across modules, I believe.
#if LLVM_VERSION_GE(20, 0)
  FunctionImporter::ImportListsTy ImportLists;
#else
  DenseMap<StringRef, FunctionImporter::ImportMapTy> ImportLists;
#endif
  DenseMap<StringRef, FunctionImporter::ExportSetTy> ExportLists;
  DenseMap<StringRef, GVSummaryMapTy> ModuleToDefinedGVSummaries;
  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;

  LLVMRustThinLTOData() : Index(/* HaveGVs = */ false) {}
};

// Just an argument to the `LLVMRustCreateThinLTOData` function below.
struct LLVMRustThinLTOModule {
  const char *identifier;
  const char *data;
  size_t len;
};

// This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`, not sure what it
// does.
static const GlobalValueSummary *
getFirstDefinitionForLinker(const GlobalValueSummaryList &GVSummaryList) {
  auto StrongDefForLinker = llvm::find_if(
      GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
        auto Linkage = Summary->linkage();
        return !GlobalValue::isAvailableExternallyLinkage(Linkage) &&
               !GlobalValue::isWeakForLinker(Linkage);
      });
  if (StrongDefForLinker != GVSummaryList.end())
    return StrongDefForLinker->get();

  auto FirstDefForLinker = llvm::find_if(
      GVSummaryList, [](const std::unique_ptr<GlobalValueSummary> &Summary) {
        auto Linkage = Summary->linkage();
        return !GlobalValue::isAvailableExternallyLinkage(Linkage);
      });
  if (FirstDefForLinker == GVSummaryList.end())
    return nullptr;
  return FirstDefForLinker->get();
}

// The main entry point for creating the global ThinLTO analysis. The structure
// here is basically the same as before threads are spawned in the `run`
// function of `lib/LTO/ThinLTOCodeGenerator.cpp`.
extern "C" LLVMRustThinLTOData *
LLVMRustCreateThinLTOData(LLVMRustThinLTOModule *modules, size_t num_modules,
                          const char **preserved_symbols, size_t num_symbols) {
  auto Ret = std::make_unique<LLVMRustThinLTOData>();

  // Load each module's summary and merge it into one combined index
  for (size_t i = 0; i < num_modules; i++) {
    auto module = &modules[i];
    auto buffer = StringRef(module->data, module->len);
    auto mem_buffer = MemoryBufferRef(buffer, module->identifier);

    Ret->ModuleMap[module->identifier] = mem_buffer;

    if (Error Err = readModuleSummaryIndex(mem_buffer, Ret->Index)) {
      LLVMRustSetLastError(toString(std::move(Err)).c_str());
      return nullptr;
    }
  }

  // Collect for each module the list of function it defines (GUID -> Summary)
  Ret->Index.collectDefinedGVSummariesPerModule(
      Ret->ModuleToDefinedGVSummaries);

  // Convert the preserved symbols set from string to GUID, this is then needed
  // for internalization.
  for (size_t i = 0; i < num_symbols; i++) {
    auto GUID = GlobalValue::getGUID(preserved_symbols[i]);
    Ret->GUIDPreservedSymbols.insert(GUID);
  }

  // Collect the import/export lists for all modules from the call-graph in the
  // combined index
  //
  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp`
  auto deadIsPrevailing = [&](GlobalValue::GUID G) {
    return PrevailingType::Unknown;
  };
  // We don't have a complete picture in our use of ThinLTO, just our immediate
  // crate, so we need `ImportEnabled = false` to limit internalization.
  // Otherwise, we sometimes lose `static` values -- see #60184.
  computeDeadSymbolsWithConstProp(Ret->Index, Ret->GUIDPreservedSymbols,
                                  deadIsPrevailing,
                                  /* ImportEnabled = */ false);
  // Resolve LinkOnce/Weak symbols, this has to be computed early be cause it
  // impacts the caching.
  //
  // This is copied from `lib/LTO/ThinLTOCodeGenerator.cpp` with some of this
  // being lifted from `lib/LTO/LTO.cpp` as well
  DenseMap<GlobalValue::GUID, const GlobalValueSummary *> PrevailingCopy;
  for (auto &I : Ret->Index) {
    if (I.second.SummaryList.size() > 1)
      PrevailingCopy[I.first] =
          getFirstDefinitionForLinker(I.second.SummaryList);
  }
  auto isPrevailing = [&](GlobalValue::GUID GUID, const GlobalValueSummary *S) {
    const auto &Prevailing = PrevailingCopy.find(GUID);
    if (Prevailing == PrevailingCopy.end())
      return true;
    return Prevailing->second == S;
  };
  ComputeCrossModuleImport(Ret->Index, Ret->ModuleToDefinedGVSummaries,
                           isPrevailing, Ret->ImportLists, Ret->ExportLists);

  auto recordNewLinkage = [&](StringRef ModuleIdentifier,
                              GlobalValue::GUID GUID,
                              GlobalValue::LinkageTypes NewLinkage) {
    Ret->ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
  };

  // Uses FromPrevailing visibility scheme which works for many binary
  // formats. We probably could and should use ELF visibility scheme for many of
  // our targets, however.
  lto::Config conf;
  thinLTOResolvePrevailingInIndex(conf, Ret->Index, isPrevailing,
                                  recordNewLinkage, Ret->GUIDPreservedSymbols);

  // Here we calculate an `ExportedGUIDs` set for use in the `isExported`
  // callback below. This callback below will dictate the linkage for all
  // summaries in the index, and we basically just only want to ensure that dead
  // symbols are internalized. Otherwise everything that's already external
  // linkage will stay as external, and internal will stay as internal.
  std::set<GlobalValue::GUID> ExportedGUIDs;
  for (auto &List : Ret->Index) {
    for (auto &GVS : List.second.SummaryList) {
      if (GlobalValue::isLocalLinkage(GVS->linkage()))
        continue;
      auto GUID = GVS->getOriginalName();
      if (GVS->flags().Live)
        ExportedGUIDs.insert(GUID);
    }
  }
  auto isExported = [&](StringRef ModuleIdentifier, ValueInfo VI) {
    const auto &ExportList = Ret->ExportLists.find(ModuleIdentifier);
    return (ExportList != Ret->ExportLists.end() &&
            ExportList->second.count(VI)) ||
           ExportedGUIDs.count(VI.getGUID());
  };
  thinLTOInternalizeAndPromoteInIndex(Ret->Index, isExported, isPrevailing);

  return Ret.release();
}

extern "C" void LLVMRustFreeThinLTOData(LLVMRustThinLTOData *Data) {
  delete Data;
}

// Below are the various passes that happen *per module* when doing ThinLTO.
//
// In other words, these are the functions that are all run concurrently
// with one another, one per module. The passes here correspond to the analysis
// passes in `lib/LTO/ThinLTOCodeGenerator.cpp`, currently found in the
// `ProcessThinLTOModule` function. Here they're split up into separate steps
// so rustc can save off the intermediate bytecode between each step.

static bool clearDSOLocalOnDeclarations(Module &Mod, TargetMachine &TM) {
  // When linking an ELF shared object, dso_local should be dropped. We
  // conservatively do this for -fpic.
  bool ClearDSOLocalOnDeclarations = TM.getTargetTriple().isOSBinFormatELF() &&
                                     TM.getRelocationModel() != Reloc::Static &&
                                     Mod.getPIELevel() == PIELevel::Default;
  return ClearDSOLocalOnDeclarations;
}

extern "C" void LLVMRustPrepareThinLTORename(const LLVMRustThinLTOData *Data,
                                             LLVMModuleRef M,
                                             LLVMTargetMachineRef TM) {
  Module &Mod = *unwrap(M);
  TargetMachine &Target = *unwrap(TM);

  bool ClearDSOLocal = clearDSOLocalOnDeclarations(Mod, Target);
  renameModuleForThinLTO(Mod, Data->Index, ClearDSOLocal);
}

extern "C" bool
LLVMRustPrepareThinLTOResolveWeak(const LLVMRustThinLTOData *Data,
                                  LLVMModuleRef M) {
  Module &Mod = *unwrap(M);
  const auto &DefinedGlobals =
      Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier());
  thinLTOFinalizeInModule(Mod, DefinedGlobals, /*PropagateAttrs=*/true);
  return true;
}

extern "C" bool
LLVMRustPrepareThinLTOInternalize(const LLVMRustThinLTOData *Data,
                                  LLVMModuleRef M) {
  Module &Mod = *unwrap(M);
  const auto &DefinedGlobals =
      Data->ModuleToDefinedGVSummaries.lookup(Mod.getModuleIdentifier());
  thinLTOInternalizeModule(Mod, DefinedGlobals);
  return true;
}

extern "C" bool LLVMRustPrepareThinLTOImport(const LLVMRustThinLTOData *Data,
                                             LLVMModuleRef M,
                                             LLVMTargetMachineRef TM) {
  Module &Mod = *unwrap(M);
  TargetMachine &Target = *unwrap(TM);

  const auto &ImportList = Data->ImportLists.lookup(Mod.getModuleIdentifier());
  auto Loader = [&](StringRef Identifier) {
    const auto &Memory = Data->ModuleMap.lookup(Identifier);
    auto &Context = Mod.getContext();
    auto MOrErr = getLazyBitcodeModule(Memory, Context, true, true);

    if (!MOrErr)
      return MOrErr;

    // The rest of this closure is a workaround for
    // https://bugs.llvm.org/show_bug.cgi?id=38184 where during ThinLTO imports
    // we accidentally import wasm custom sections into different modules,
    // duplicating them by in the final output artifact.
    //
    // The issue is worked around here by manually removing the
    // `wasm.custom_sections` named metadata node from any imported module. This
    // we know isn't used by any optimization pass so there's no need for it to
    // be imported.
    //
    // Note that the metadata is currently lazily loaded, so we materialize it
    // here before looking up if there's metadata inside. The `FunctionImporter`
    // will immediately materialize metadata anyway after an import, so this
    // shouldn't be a perf hit.
    if (Error Err = (*MOrErr)->materializeMetadata()) {
      Expected<std::unique_ptr<Module>> Ret(std::move(Err));
      return Ret;
    }

    auto *WasmCustomSections =
        (*MOrErr)->getNamedMetadata("wasm.custom_sections");
    if (WasmCustomSections)
      WasmCustomSections->eraseFromParent();

    // `llvm.ident` named metadata also gets duplicated.
    auto *llvmIdent = (*MOrErr)->getNamedMetadata("llvm.ident");
    if (llvmIdent)
      llvmIdent->eraseFromParent();

    return MOrErr;
  };
  bool ClearDSOLocal = clearDSOLocalOnDeclarations(Mod, Target);
  auto Importer = FunctionImporter(Data->Index, Loader, ClearDSOLocal);
  Expected<bool> Result = Importer.importFunctions(Mod, ImportList);
  if (!Result) {
    LLVMRustSetLastError(toString(Result.takeError()).c_str());
    return false;
  }
  return true;
}

extern "C" LLVMRustThinLTOBuffer *
LLVMRustThinLTOBufferCreate(LLVMModuleRef M, bool is_thin, bool emit_summary) {
  auto Ret = std::make_unique<LLVMRustThinLTOBuffer>();
  {
    auto OS = raw_string_ostream(Ret->data);
    auto ThinLinkOS = raw_string_ostream(Ret->thin_link_data);
    {
      if (is_thin) {
        PassBuilder PB;
        LoopAnalysisManager LAM;
        FunctionAnalysisManager FAM;
        CGSCCAnalysisManager CGAM;
        ModuleAnalysisManager MAM;
        PB.registerModuleAnalyses(MAM);
        PB.registerCGSCCAnalyses(CGAM);
        PB.registerFunctionAnalyses(FAM);
        PB.registerLoopAnalyses(LAM);
        PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
        ModulePassManager MPM;
        // We only pass ThinLinkOS to be filled in if we want the summary,
        // because otherwise LLVM does extra work and may double-emit some
        // errors or warnings.
        MPM.addPass(
            ThinLTOBitcodeWriterPass(OS, emit_summary ? &ThinLinkOS : nullptr));
        MPM.run(*unwrap(M), MAM);
      } else {
        WriteBitcodeToFile(*unwrap(M), OS);
      }
    }
  }
  return Ret.release();
}

extern "C" void LLVMRustThinLTOBufferFree(LLVMRustThinLTOBuffer *Buffer) {
  delete Buffer;
}

extern "C" const void *
LLVMRustThinLTOBufferPtr(const LLVMRustThinLTOBuffer *Buffer) {
  return Buffer->data.data();
}

extern "C" size_t
LLVMRustThinLTOBufferLen(const LLVMRustThinLTOBuffer *Buffer) {
  return Buffer->data.length();
}

extern "C" const void *
LLVMRustThinLTOBufferThinLinkDataPtr(const LLVMRustThinLTOBuffer *Buffer) {
  return Buffer->thin_link_data.data();
}

extern "C" size_t
LLVMRustThinLTOBufferThinLinkDataLen(const LLVMRustThinLTOBuffer *Buffer) {
  return Buffer->thin_link_data.length();
}

// This is what we used to parse upstream bitcode for actual ThinLTO
// processing. We'll call this once per module optimized through ThinLTO, and
// it'll be called concurrently on many threads.
extern "C" LLVMModuleRef LLVMRustParseBitcodeForLTO(LLVMContextRef Context,
                                                    const char *data,
                                                    size_t len,
                                                    const char *identifier) {
  auto Data = StringRef(data, len);
  auto Buffer = MemoryBufferRef(Data, identifier);
  unwrap(Context)->enableDebugTypeODRUniquing();
  Expected<std::unique_ptr<Module>> SrcOrError =
      parseBitcodeFile(Buffer, *unwrap(Context));
  if (!SrcOrError) {
    LLVMRustSetLastError(toString(SrcOrError.takeError()).c_str());
    return nullptr;
  }
  return wrap(std::move(*SrcOrError).release());
}

// Find a section of an object file by name. Fail if the section is missing or
// empty.
extern "C" const char *LLVMRustGetSliceFromObjectDataByName(const char *data,
                                                            size_t len,
                                                            const char *name,
                                                            size_t name_len,
                                                            size_t *out_len) {
  *out_len = 0;
  auto Name = StringRef(name, name_len);
  auto Data = StringRef(data, len);
  auto Buffer = MemoryBufferRef(Data, ""); // The id is unused.
  file_magic Type = identify_magic(Buffer.getBuffer());
  Expected<std::unique_ptr<object::ObjectFile>> ObjFileOrError =
      object::ObjectFile::createObjectFile(Buffer, Type);
  if (!ObjFileOrError) {
    LLVMRustSetLastError(toString(ObjFileOrError.takeError()).c_str());
    return nullptr;
  }
  for (const object::SectionRef &Sec : (*ObjFileOrError)->sections()) {
    Expected<StringRef> SecName = Sec.getName();
    if (SecName && *SecName == Name) {
      Expected<StringRef> SectionOrError = Sec.getContents();
      if (!SectionOrError) {
        LLVMRustSetLastError(toString(SectionOrError.takeError()).c_str());
        return nullptr;
      }
      *out_len = SectionOrError->size();
      return SectionOrError->data();
    }
  }
  LLVMRustSetLastError("could not find requested section");
  return nullptr;
}

// Computes the LTO cache key for the provided 'ModId' in the given 'Data',
// storing the result in 'KeyOut'.
// Currently, this cache key is a SHA-1 hash of anything that could affect
// the result of optimizing this module (e.g. module imports, exports, liveness
// of access globals, etc).
// The precise details are determined by LLVM in `computeLTOCacheKey`, which is
// used during the normal linker-plugin incremental thin-LTO process.
extern "C" void LLVMRustComputeLTOCacheKey(RustStringRef KeyOut,
                                           const char *ModId,
                                           LLVMRustThinLTOData *Data) {
  SmallString<40> Key;
  llvm::lto::Config conf;
  const auto &ImportList = Data->ImportLists.lookup(ModId);
  const auto &ExportList = Data->ExportLists.lookup(ModId);
  const auto &ResolvedODR = Data->ResolvedODR.lookup(ModId);
  const auto &DefinedGlobals = Data->ModuleToDefinedGVSummaries.lookup(ModId);
#if LLVM_VERSION_GE(20, 0)
  DenseSet<GlobalValue::GUID> CfiFunctionDefs;
  DenseSet<GlobalValue::GUID> CfiFunctionDecls;
#else
  std::set<GlobalValue::GUID> CfiFunctionDefs;
  std::set<GlobalValue::GUID> CfiFunctionDecls;
#endif

  // Based on the 'InProcessThinBackend' constructor in LLVM
#if LLVM_VERSION_GE(21, 0)
  for (auto &Name : Data->Index.cfiFunctionDefs().symbols())
    CfiFunctionDefs.insert(
        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
  for (auto &Name : Data->Index.cfiFunctionDecls().symbols())
    CfiFunctionDecls.insert(
        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
#else
  for (auto &Name : Data->Index.cfiFunctionDefs())
    CfiFunctionDefs.insert(
        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
  for (auto &Name : Data->Index.cfiFunctionDecls())
    CfiFunctionDecls.insert(
        GlobalValue::getGUID(GlobalValue::dropLLVMManglingEscape(Name)));
#endif

#if LLVM_VERSION_GE(20, 0)
  Key = llvm::computeLTOCacheKey(conf, Data->Index, ModId, ImportList,
                                 ExportList, ResolvedODR, DefinedGlobals,
                                 CfiFunctionDefs, CfiFunctionDecls);
#else
  llvm::computeLTOCacheKey(Key, conf, Data->Index, ModId, ImportList,
                           ExportList, ResolvedODR, DefinedGlobals,
                           CfiFunctionDefs, CfiFunctionDecls);
#endif

  auto OS = RawRustStringOstream(KeyOut);
  OS << Key.str();
}