rust/compiler/rustc_codegen_llvm/src/back/lto.rs

use crate::back::write::{
    self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
};
use crate::llvm::archive_ro::ArchiveRO;
use crate::llvm::{self, False, True};
use crate::{LlvmCodegenBackend, ModuleLlvm};
use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
use rustc_codegen_ssa::back::symbol_export;
use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig};
use rustc_codegen_ssa::traits::*;
use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_errors::{FatalError, Handler};
use rustc_hir::def_id::LOCAL_CRATE;
use rustc_middle::bug;
use rustc_middle::dep_graph::WorkProduct;
use rustc_middle::middle::exported_symbols::SymbolExportLevel;
use rustc_session::cgu_reuse_tracker::CguReuse;
use rustc_session::config::{self, CrateType, Lto};
use tracing::{debug, info};

use std::ffi::{CStr, CString};
use std::fs::File;
use std::io;
use std::mem;
use std::path::Path;
use std::ptr;
use std::slice;
use std::sync::Arc;

/// We keep track of past LTO imports that were used to produce the current set
/// of compiled object files that we might choose to reuse during this
/// compilation session.
pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-imports.bin";

pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
    match crate_type {
        CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => true,
        CrateType::Dylib | CrateType::Rlib | CrateType::ProcMacro => false,
    }
}

fn prepare_lto(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    diag_handler: &Handler,
) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
    let export_threshold = match cgcx.lto {
        // We're just doing LTO for our one crate
        Lto::ThinLocal => SymbolExportLevel::Rust,

        // We're doing LTO for the entire crate graph
        Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),

        Lto::No => panic!("didn't request LTO but we're doing LTO"),
    };

    let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
        if level.is_below_threshold(export_threshold) {
            Some(CString::new(name.as_str()).unwrap())
        } else {
            None
        }
    };
    let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
    let mut symbols_below_threshold = {
        let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
        exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
    };
    info!("{} symbols to preserve in this crate", symbols_below_threshold.len());

    // If we're performing LTO for the entire crate graph, then for each of our
    // upstream dependencies, find the corresponding rlib and load the bitcode
    // from the archive.
    //
    // We save off all the bytecode and LLVM module ids for later processing
    // with either fat or thin LTO
    let mut upstream_modules = Vec::new();
    if cgcx.lto != Lto::ThinLocal {
        if cgcx.opts.cg.prefer_dynamic {
            diag_handler
                .struct_err("cannot prefer dynamic linking when performing LTO")
                .note(
                    "only 'staticlib', 'bin', and 'cdylib' outputs are \
                               supported with LTO",
                )
                .emit();
            return Err(FatalError);
        }

        // Make sure we actually can run LTO
        for crate_type in cgcx.crate_types.iter() {
            if !crate_type_allows_lto(*crate_type) {
                let e = diag_handler.fatal(
                    "lto can only be run for executables, cdylibs and \
                                            static library outputs",
                );
                return Err(e);
            }
        }

        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
            let exported_symbols =
                cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
            {
                let _timer =
                    cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
                symbols_below_threshold
                    .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
            }

            let archive = ArchiveRO::open(&path).expect("wanted an rlib");
            let obj_files = archive
                .iter()
                .filter_map(|child| child.ok().and_then(|c| c.name().map(|name| (name, c))))
                .filter(|&(name, _)| looks_like_rust_object_file(name));
            for (name, child) in obj_files {
                info!("adding bitcode from {}", name);
                match get_bitcode_slice_from_object_data(child.data()) {
                    Ok(data) => {
                        let module = SerializedModule::FromRlib(data.to_vec());
                        upstream_modules.push((module, CString::new(name).unwrap()));
                    }
                    Err(msg) => return Err(diag_handler.fatal(&msg)),
                }
            }
        }
    }

    Ok((symbols_below_threshold, upstream_modules))
}

fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
    let mut len = 0;
    let data =
        unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
    if !data.is_null() {
        assert!(len != 0);
        let bc = unsafe { slice::from_raw_parts(data, len) };

        // `bc` must be a sub-slice of `obj`.
        assert!(obj.as_ptr() <= bc.as_ptr());
        assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());

        Ok(bc)
    } else {
        assert!(len == 0);
        let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
        Err(format!("failed to get bitcode from object file for LTO ({})", msg))
    }
}

/// Performs fat LTO by merging all modules into a single one and returning it
/// for further optimization.
pub(crate) fn run_fat(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
    let diag_handler = cgcx.create_diag_handler();
    let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
    let symbols_below_threshold =
        symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
    fat_lto(
        cgcx,
        &diag_handler,
        modules,
        cached_modules,
        upstream_modules,
        &symbols_below_threshold,
    )
}

/// Performs thin LTO by performing necessary global analysis and returning two
/// lists, one of the modules that need optimization and another for modules that
/// can simply be copied over from the incr. comp. cache.
pub(crate) fn run_thin(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    modules: Vec<(String, ThinBuffer)>,
    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
    let diag_handler = cgcx.create_diag_handler();
    let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
    let symbols_below_threshold =
        symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
    if cgcx.opts.cg.linker_plugin_lto.enabled() {
        unreachable!(
            "We should never reach this case if the LTO step \
                      is deferred to the linker"
        );
    }
    thin_lto(
        cgcx,
        &diag_handler,
        modules,
        upstream_modules,
        cached_modules,
        &symbols_below_threshold,
    )
}

pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
    let name = module.name.clone();
    let buffer = ThinBuffer::new(module.module_llvm.llmod());
    (name, buffer)
}

fn fat_lto(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    diag_handler: &Handler,
    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
    mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
    symbols_below_threshold: &[*const libc::c_char],
) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
    info!("going for a fat lto");

    // Sort out all our lists of incoming modules into two lists.
    //
    // * `serialized_modules` (also and argument to this function) contains all
    //   modules that are serialized in-memory.
    // * `in_memory` contains modules which are already parsed and in-memory,
    //   such as from multi-CGU builds.
    //
    // All of `cached_modules` (cached from previous incremental builds) can
    // immediately go onto the `serialized_modules` modules list and then we can
    // split the `modules` array into these two lists.
    let mut in_memory = Vec::new();
    serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
        info!("pushing cached module {:?}", wp.cgu_name);
        (buffer, CString::new(wp.cgu_name).unwrap())
    }));
    for module in modules {
        match module {
            FatLTOInput::InMemory(m) => in_memory.push(m),
            FatLTOInput::Serialized { name, buffer } => {
                info!("pushing serialized module {:?}", name);
                let buffer = SerializedModule::Local(buffer);
                serialized_modules.push((buffer, CString::new(name).unwrap()));
            }
        }
    }

    // Find the "costliest" module and merge everything into that codegen unit.
    // All the other modules will be serialized and reparsed into the new
    // context, so this hopefully avoids serializing and parsing the largest
    // codegen unit.
    //
    // Additionally use a regular module as the base here to ensure that various
    // file copy operations in the backend work correctly. The only other kind
    // of module here should be an allocator one, and if your crate is smaller
    // than the allocator module then the size doesn't really matter anyway.
    let costliest_module = in_memory
        .iter()
        .enumerate()
        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
        .map(|(i, module)| {
            let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
            (cost, i)
        })
        .max();

    // If we found a costliest module, we're good to go. Otherwise all our
    // inputs were serialized which could happen in the case, for example, that
    // all our inputs were incrementally reread from the cache and we're just
    // re-executing the LTO passes. If that's the case deserialize the first
    // module and create a linker with it.
    let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
        Some((_cost, i)) => in_memory.remove(i),
        None => {
            assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
            let (buffer, name) = serialized_modules.remove(0);
            info!("no in-memory regular modules to choose from, parsing {:?}", name);
            ModuleCodegen {
                module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
                name: name.into_string().unwrap(),
                kind: ModuleKind::Regular,
            }
        }
    };
    let mut serialized_bitcode = Vec::new();
    {
        let (llcx, llmod) = {
            let llvm = &module.module_llvm;
            (&llvm.llcx, llvm.llmod())
        };
        info!("using {:?} as a base module", module.name);

        // The linking steps below may produce errors and diagnostics within LLVM
        // which we'd like to handle and print, so set up our diagnostic handlers
        // (which get unregistered when they go out of scope below).
        let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);

        // For all other modules we codegened we'll need to link them into our own
        // bitcode. All modules were codegened in their own LLVM context, however,
        // and we want to move everything to the same LLVM context. Currently the
        // way we know of to do that is to serialize them to a string and them parse
        // them later. Not great but hey, that's why it's "fat" LTO, right?
        for module in in_memory {
            let buffer = ModuleBuffer::new(module.module_llvm.llmod());
            let llmod_id = CString::new(&module.name[..]).unwrap();
            serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
        }
        // Sort the modules to ensure we produce deterministic results.
        serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));

        // For all serialized bitcode files we parse them and link them in as we did
        // above, this is all mostly handled in C++. Like above, though, we don't
        // know much about the memory management here so we err on the side of being
        // save and persist everything with the original module.
        let mut linker = Linker::new(llmod);
        for (bc_decoded, name) in serialized_modules {
            let _timer = cgcx
                .prof
                .generic_activity_with_arg("LLVM_fat_lto_link_module", format!("{:?}", name));
            info!("linking {:?}", name);
            let data = bc_decoded.data();
            linker.add(&data).map_err(|()| {
                let msg = format!("failed to load bc of {:?}", name);
                write::llvm_err(&diag_handler, &msg)
            })?;
            serialized_bitcode.push(bc_decoded);
        }
        drop(linker);
        save_temp_bitcode(&cgcx, &module, "lto.input");

        // Internalize everything below threshold to help strip out more modules and such.
        unsafe {
            let ptr = symbols_below_threshold.as_ptr();
            llvm::LLVMRustRunRestrictionPass(
                llmod,
                ptr as *const *const libc::c_char,
                symbols_below_threshold.len() as libc::size_t,
            );
            save_temp_bitcode(&cgcx, &module, "lto.after-restriction");
        }

        if cgcx.no_landing_pads {
            unsafe {
                llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
            }
            save_temp_bitcode(&cgcx, &module, "lto.after-nounwind");
        }
    }

    Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode })
}

struct Linker<'a>(&'a mut llvm::Linker<'a>);

impl Linker<'a> {
    fn new(llmod: &'a llvm::Module) -> Self {
        unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
    }

    fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
        unsafe {
            if llvm::LLVMRustLinkerAdd(
                self.0,
                bytecode.as_ptr() as *const libc::c_char,
                bytecode.len(),
            ) {
                Ok(())
            } else {
                Err(())
            }
        }
    }
}

impl Drop for Linker<'a> {
    fn drop(&mut self) {
        unsafe {
            llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
        }
    }
}

/// Prepare "thin" LTO to get run on these modules.
///
/// The general structure of ThinLTO is quite different from the structure of
/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
/// one giant LLVM module, and then we run more optimization passes over this
/// big module after internalizing most symbols. Thin LTO, on the other hand,
/// avoid this large bottleneck through more targeted optimization.
///
/// At a high level Thin LTO looks like:
///
///     1. Prepare a "summary" of each LLVM module in question which describes
///        the values inside, cost of the values, etc.
///     2. Merge the summaries of all modules in question into one "index"
///     3. Perform some global analysis on this index
///     4. For each module, use the index and analysis calculated previously to
///        perform local transformations on the module, for example inlining
///        small functions from other modules.
///     5. Run thin-specific optimization passes over each module, and then code
///        generate everything at the end.
///
/// The summary for each module is intended to be quite cheap, and the global
/// index is relatively quite cheap to create as well. As a result, the goal of
/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
/// situations. For example one cheap optimization is that we can parallelize
/// all codegen modules, easily making use of all the cores on a machine.
///
/// With all that in mind, the function here is designed at specifically just
/// calculating the *index* for ThinLTO. This index will then be shared amongst
/// all of the `LtoModuleCodegen` units returned below and destroyed once
/// they all go out of scope.
fn thin_lto(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    diag_handler: &Handler,
    modules: Vec<(String, ThinBuffer)>,
    serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
    symbols_below_threshold: &[*const libc::c_char],
) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
    unsafe {
        info!("going for that thin, thin LTO");

        let green_modules: FxHashMap<_, _> =
            cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();

        let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
        let mut thin_buffers = Vec::with_capacity(modules.len());
        let mut module_names = Vec::with_capacity(full_scope_len);
        let mut thin_modules = Vec::with_capacity(full_scope_len);

        for (i, (name, buffer)) in modules.into_iter().enumerate() {
            info!("local module: {} - {}", i, name);
            let cname = CString::new(name.clone()).unwrap();
            thin_modules.push(llvm::ThinLTOModule {
                identifier: cname.as_ptr(),
                data: buffer.data().as_ptr(),
                len: buffer.data().len(),
            });
            thin_buffers.push(buffer);
            module_names.push(cname);
        }

        // FIXME: All upstream crates are deserialized internally in the
        //        function below to extract their summary and modules. Note that
        //        unlike the loop above we *must* decode and/or read something
        //        here as these are all just serialized files on disk. An
        //        improvement, however, to make here would be to store the
        //        module summary separately from the actual module itself. Right
        //        now this is store in one large bitcode file, and the entire
        //        file is deflate-compressed. We could try to bypass some of the
        //        decompression by storing the index uncompressed and only
        //        lazily decompressing the bytecode if necessary.
        //
        //        Note that truly taking advantage of this optimization will
        //        likely be further down the road. We'd have to implement
        //        incremental ThinLTO first where we could actually avoid
        //        looking at upstream modules entirely sometimes (the contents,
        //        we must always unconditionally look at the index).
        let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());

        let cached_modules =
            cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));

        for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
            info!("upstream or cached module {:?}", name);
            thin_modules.push(llvm::ThinLTOModule {
                identifier: name.as_ptr(),
                data: module.data().as_ptr(),
                len: module.data().len(),
            });
            serialized.push(module);
            module_names.push(name);
        }

        // Sanity check
        assert_eq!(thin_modules.len(), module_names.len());

        // Delegate to the C++ bindings to create some data here. Once this is a
        // tried-and-true interface we may wish to try to upstream some of this
        // to LLVM itself, right now we reimplement a lot of what they do
        // upstream...
        let data = llvm::LLVMRustCreateThinLTOData(
            thin_modules.as_ptr(),
            thin_modules.len() as u32,
            symbols_below_threshold.as_ptr(),
            symbols_below_threshold.len() as u32,
        )
        .ok_or_else(|| write::llvm_err(&diag_handler, "failed to prepare thin LTO context"))?;

        info!("thin LTO data created");

        let (import_map_path, prev_import_map, curr_import_map) =
            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
                let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
                // If previous imports have been deleted, or we get an IO error
                // reading the file storing them, then we'll just use `None` as the
                // prev_import_map, which will force the code to be recompiled.
                let prev = if path.exists() {
                    ThinLTOImportMaps::load_from_file(&path).ok()
                } else {
                    None
                };
                let curr = ThinLTOImportMaps::from_thin_lto_data(data);
                (Some(path), prev, curr)
            } else {
                // If we don't compile incrementally, we don't need to load the
                // import data from LLVM.
                assert!(green_modules.is_empty());
                let curr = ThinLTOImportMaps::default();
                (None, None, curr)
            };
        info!("thin LTO import map loaded");

        let data = ThinData(data);

        // Throw our data in an `Arc` as we'll be sharing it across threads. We
        // also put all memory referenced by the C++ data (buffers, ids, etc)
        // into the arc as well. After this we'll create a thin module
        // codegen per module in this data.
        let shared = Arc::new(ThinShared {
            data,
            thin_buffers,
            serialized_modules: serialized,
            module_names,
        });

        let mut copy_jobs = vec![];
        let mut opt_jobs = vec![];

        info!("checking which modules can be-reused and which have to be re-optimized.");
        for (module_index, module_name) in shared.module_names.iter().enumerate() {
            let module_name = module_name_to_str(module_name);

            // If (1.) the module hasn't changed, and (2.) none of the modules
            // it imports from have changed, *and* (3.) the import and export
            // sets themselves have not changed from the previous compile when
            // it was last ThinLTO'ed, then we can re-use the post-ThinLTO
            // version of the module. Otherwise, freshly perform LTO
            // optimization.
            //
            // (Note that globally, the export set is just the inverse of the
            // import set.)
            //
            // For further justification of why the above is necessary and sufficient,
            // see the LLVM blog post on ThinLTO:
            //
            // http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html
            //
            // which states the following:
            //
            // ```quote
            // any particular ThinLTO backend must be redone iff:
            //
            // 1. The corresponding (primary) module’s bitcode changed
            // 2. The list of imports into or exports from the module changed
            // 3. The bitcode for any module being imported from has changed
            // 4. Any global analysis result affecting either the primary module
            //    or anything it imports has changed.
            // ```
            //
            // This strategy means we can always save the computed imports as
            // canon: when we reuse the post-ThinLTO version, condition (3.)
            // ensures that the current import set is the same as the previous
            // one. (And of course, when we don't reuse the post-ThinLTO
            // version, the current import set *is* the correct one, since we
            // are doing the ThinLTO in this current compilation cycle.)
            //
            // For more discussion, see rust-lang/rust#59535 (where the import
            // issue was discovered) and rust-lang/rust#69798 (where the
            // analogous export issue was discovered).
            if let (Some(prev_import_map), true) =
                (prev_import_map.as_ref(), green_modules.contains_key(module_name))
            {
                assert!(cgcx.incr_comp_session_dir.is_some());

                let prev_imports = prev_import_map.imports_of(module_name);
                let curr_imports = curr_import_map.imports_of(module_name);
                let prev_exports = prev_import_map.exports_of(module_name);
                let curr_exports = curr_import_map.exports_of(module_name);
                let imports_all_green = curr_imports
                    .iter()
                    .all(|imported_module| green_modules.contains_key(imported_module));
                if imports_all_green
                    && equivalent_as_sets(prev_imports, curr_imports)
                    && equivalent_as_sets(prev_exports, curr_exports)
                {
                    let work_product = green_modules[module_name].clone();
                    copy_jobs.push(work_product);
                    info!(" - {}: re-used", module_name);
                    assert!(cgcx.incr_comp_session_dir.is_some());
                    cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
                    continue;
                }
            }

            info!(" - {}: re-compiled", module_name);
            opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
                shared: shared.clone(),
                idx: module_index,
            }));
        }

        // Save the current ThinLTO import information for the next compilation
        // session, overwriting the previous serialized imports (if any).
        if let Some(path) = import_map_path {
            if let Err(err) = curr_import_map.save_to_file(&path) {
                let msg = format!("Error while writing ThinLTO import data: {}", err);
                return Err(write::llvm_err(&diag_handler, &msg));
            }
        }

        Ok((opt_jobs, copy_jobs))
    }
}

/// Given two slices, each with no repeat elements. returns true if and only if
/// the two slices have the same contents when considered as sets (i.e. when
/// element order is disregarded).
fn equivalent_as_sets(a: &[String], b: &[String]) -> bool {
    // cheap path: unequal lengths means cannot possibly be set equivalent.
    if a.len() != b.len() {
        return false;
    }
    // fast path: before building new things, check if inputs are equivalent as is.
    if a == b {
        return true;
    }
    // slow path: general set comparison.
    let a: FxHashSet<&str> = a.iter().map(|s| s.as_str()).collect();
    let b: FxHashSet<&str> = b.iter().map(|s| s.as_str()).collect();
    a == b
}

pub(crate) fn run_pass_manager(
    cgcx: &CodegenContext<LlvmCodegenBackend>,
    module: &ModuleCodegen<ModuleLlvm>,
    config: &ModuleConfig,
    thin: bool,
) {
    let _timer = cgcx.prof.extra_verbose_generic_activity("LLVM_lto_optimize", &module.name[..]);

    // Now we have one massive module inside of llmod. Time to run the
    // LTO-specific optimization passes that LLVM provides.
    //
    // This code is based off the code found in llvm's LTO code generator:
    //      tools/lto/LTOCodeGenerator.cpp
    debug!("running the pass manager");
    unsafe {
        if write::should_use_new_llvm_pass_manager(config) {
            let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
            let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
            // See comment below for why this is necessary.
            let opt_level = if let config::OptLevel::No = opt_level {
                config::OptLevel::Less
            } else {
                opt_level
            };
            write::optimize_with_new_llvm_pass_manager(cgcx, module, config, opt_level, opt_stage);
            debug!("lto done");
            return;
        }

        let pm = llvm::LLVMCreatePassManager();
        llvm::LLVMAddAnalysisPasses(module.module_llvm.tm, pm);

        if config.verify_llvm_ir {
            let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
            llvm::LLVMRustAddPass(pm, pass.unwrap());
        }

        // When optimizing for LTO we don't actually pass in `-O0`, but we force
        // it to always happen at least with `-O1`.
        //
        // With ThinLTO we mess around a lot with symbol visibility in a way
        // that will actually cause linking failures if we optimize at O0 which
        // notable is lacking in dead code elimination. To ensure we at least
        // get some optimizations and correctly link we forcibly switch to `-O1`
        // to get dead code elimination.
        //
        // Note that in general this shouldn't matter too much as you typically
        // only turn on ThinLTO when you're compiling with optimizations
        // otherwise.
        let opt_level = config
            .opt_level
            .map(|x| to_llvm_opt_settings(x).0)
            .unwrap_or(llvm::CodeGenOptLevel::None);
        let opt_level = match opt_level {
            llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
            level => level,
        };
        with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
            if thin {
                llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
            } else {
                llvm::LLVMPassManagerBuilderPopulateLTOPassManager(
                    b, pm, /* Internalize = */ False, /* RunInliner = */ True,
                );
            }
        });

        // We always generate bitcode through ThinLTOBuffers,
        // which do not support anonymous globals
        if config.bitcode_needed() {
            let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
            llvm::LLVMRustAddPass(pm, pass.unwrap());
        }

        if config.verify_llvm_ir {
            let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
            llvm::LLVMRustAddPass(pm, pass.unwrap());
        }

        llvm::LLVMRunPassManager(pm, module.module_llvm.llmod());

        llvm::LLVMDisposePassManager(pm);
    }
    debug!("lto done");
}

pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);

unsafe impl Send for ModuleBuffer {}
unsafe impl Sync for ModuleBuffer {}

impl ModuleBuffer {
    pub fn new(m: &llvm::Module) -> ModuleBuffer {
        ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
    }
}

impl ModuleBufferMethods for ModuleBuffer {
    fn data(&self) -> &[u8] {
        unsafe {
            let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
            let len = llvm::LLVMRustModuleBufferLen(self.0);
            slice::from_raw_parts(ptr, len)
        }
    }
}

impl Drop for ModuleBuffer {
    fn drop(&mut self) {
        unsafe {
            llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
        }
    }
}

pub struct ThinData(&'static mut llvm::ThinLTOData);

unsafe impl Send for ThinData {}
unsafe impl Sync for ThinData {}

impl Drop for ThinData {
    fn drop(&mut self) {
        unsafe {
            llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
        }
    }
}

pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);

unsafe impl Send for ThinBuffer {}
unsafe impl Sync for ThinBuffer {}

impl ThinBuffer {
    pub fn new(m: &llvm::Module) -> ThinBuffer {
        unsafe {
            let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
            ThinBuffer(buffer)
        }
    }
}

impl ThinBufferMethods for ThinBuffer {
    fn data(&self) -> &[u8] {
        unsafe {
            let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
            let len = llvm::LLVMRustThinLTOBufferLen(self.0);
            slice::from_raw_parts(ptr, len)
        }
    }
}

impl Drop for ThinBuffer {
    fn drop(&mut self) {
        unsafe {
            llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
        }
    }
}

pub unsafe fn optimize_thin_module(
    thin_module: &mut ThinModule<LlvmCodegenBackend>,
    cgcx: &CodegenContext<LlvmCodegenBackend>,
) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
    let diag_handler = cgcx.create_diag_handler();
    let tm = (cgcx.tm_factory.0)().map_err(|e| write::llvm_err(&diag_handler, &e))?;

    // Right now the implementation we've got only works over serialized
    // modules, so we create a fresh new LLVM context and parse the module
    // into that context. One day, however, we may do this for upstream
    // crates but for locally codegened modules we may be able to reuse
    // that LLVM Context and Module.
    let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
    let llmod_raw = parse_module(
        llcx,
        &thin_module.shared.module_names[thin_module.idx],
        thin_module.data(),
        &diag_handler,
    )? as *const _;
    let module = ModuleCodegen {
        module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
        name: thin_module.name().to_string(),
        kind: ModuleKind::Regular,
    };
    {
        let target = &*module.module_llvm.tm;
        let llmod = module.module_llvm.llmod();
        save_temp_bitcode(&cgcx, &module, "thin-lto-input");

        // Before we do much else find the "main" `DICompileUnit` that we'll be
        // using below. If we find more than one though then rustc has changed
        // in a way we're not ready for, so generate an ICE by returning
        // an error.
        let mut cu1 = ptr::null_mut();
        let mut cu2 = ptr::null_mut();
        llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
        if !cu2.is_null() {
            let msg = "multiple source DICompileUnits found";
            return Err(write::llvm_err(&diag_handler, msg));
        }

        // Like with "fat" LTO, get some better optimizations if landing pads
        // are disabled by removing all landing pads.
        if cgcx.no_landing_pads {
            let _timer = cgcx
                .prof
                .generic_activity_with_arg("LLVM_thin_lto_remove_landing_pads", thin_module.name());
            llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
            save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
        }

        // Up next comes the per-module local analyses that we do for Thin LTO.
        // Each of these functions is basically copied from the LLVM
        // implementation and then tailored to suit this implementation. Ideally
        // each of these would be supported by upstream LLVM but that's perhaps
        // a patch for another day!
        //
        // You can find some more comments about these functions in the LLVM
        // bindings we've got (currently `PassWrapper.cpp`)
        {
            let _timer =
                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
                let msg = "failed to prepare thin LTO module";
                return Err(write::llvm_err(&diag_handler, msg));
            }
            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
        }

        {
            let _timer = cgcx
                .prof
                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
                let msg = "failed to prepare thin LTO module";
                return Err(write::llvm_err(&diag_handler, msg));
            }
            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
        }

        {
            let _timer = cgcx
                .prof
                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
                let msg = "failed to prepare thin LTO module";
                return Err(write::llvm_err(&diag_handler, msg));
            }
            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
        }

        {
            let _timer =
                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
                let msg = "failed to prepare thin LTO module";
                return Err(write::llvm_err(&diag_handler, msg));
            }
            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
        }

        // Ok now this is a bit unfortunate. This is also something you won't
        // find upstream in LLVM's ThinLTO passes! This is a hack for now to
        // work around bugs in LLVM.
        //
        // First discovered in #45511 it was found that as part of ThinLTO
        // importing passes LLVM will import `DICompileUnit` metadata
        // information across modules. This means that we'll be working with one
        // LLVM module that has multiple `DICompileUnit` instances in it (a
        // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
        // bugs in LLVM's backend which generates invalid DWARF in a situation
        // like this:
        //
        //  https://bugs.llvm.org/show_bug.cgi?id=35212
        //  https://bugs.llvm.org/show_bug.cgi?id=35562
        //
        // While the first bug there is fixed the second ended up causing #46346
        // which was basically a resurgence of #45511 after LLVM's bug 35212 was
        // fixed.
        //
        // This function below is a huge hack around this problem. The function
        // below is defined in `PassWrapper.cpp` and will basically "merge"
        // all `DICompileUnit` instances in a module. Basically it'll take all
        // the objects, rewrite all pointers of `DISubprogram` to point to the
        // first `DICompileUnit`, and then delete all the other units.
        //
        // This is probably mangling to the debug info slightly (but hopefully
        // not too much) but for now at least gets LLVM to emit valid DWARF (or
        // so it appears). Hopefully we can remove this once upstream bugs are
        // fixed in LLVM.
        {
            let _timer = cgcx
                .prof
                .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
            llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
            save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
        }

        // Alright now that we've done everything related to the ThinLTO
        // analysis it's time to run some optimizations! Here we use the same
        // `run_pass_manager` as the "fat" LTO above except that we tell it to
        // populate a thin-specific pass manager, which presumably LLVM treats a
        // little differently.
        {
            info!("running thin lto passes over {}", module.name);
            let config = cgcx.config(module.kind);
            run_pass_manager(cgcx, &module, config, true);
            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
        }
    }
    Ok(module)
}

/// Summarizes module import/export relationships used by LLVM's ThinLTO pass.
///
/// Note that we tend to have two such instances of `ThinLTOImportMaps` in use:
/// one loaded from a file that represents the relationships used during the
/// compilation associated with the incremetnal build artifacts we are
/// attempting to reuse, and another constructed via `from_thin_lto_data`, which
/// captures the relationships of ThinLTO in the current compilation.
#[derive(Debug, Default)]
pub struct ThinLTOImportMaps {
    // key = llvm name of importing module, value = list of modules it imports from
    imports: FxHashMap<String, Vec<String>>,
    // key = llvm name of exporting module, value = list of modules it exports to
    exports: FxHashMap<String, Vec<String>>,
}

impl ThinLTOImportMaps {
    /// Returns modules imported by `llvm_module_name` during some ThinLTO pass.
    fn imports_of(&self, llvm_module_name: &str) -> &[String] {
        self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
    }

    /// Returns modules exported by `llvm_module_name` during some ThinLTO pass.
    fn exports_of(&self, llvm_module_name: &str) -> &[String] {
        self.exports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
    }

    fn save_to_file(&self, path: &Path) -> io::Result<()> {
        use std::io::Write;
        let file = File::create(path)?;
        let mut writer = io::BufWriter::new(file);
        for (importing_module_name, imported_modules) in &self.imports {
            writeln!(writer, "{}", importing_module_name)?;
            for imported_module in imported_modules {
                writeln!(writer, " {}", imported_module)?;
            }
            writeln!(writer)?;
        }
        Ok(())
    }

    fn load_from_file(path: &Path) -> io::Result<ThinLTOImportMaps> {
        use std::io::BufRead;
        let mut imports = FxHashMap::default();
        let mut exports: FxHashMap<_, Vec<_>> = FxHashMap::default();
        let mut current_module: Option<String> = None;
        let mut current_imports: Vec<String> = vec![];
        let file = File::open(path)?;
        for line in io::BufReader::new(file).lines() {
            let line = line?;
            if line.is_empty() {
                let importing_module = current_module.take().expect("Importing module not set");
                for imported in &current_imports {
                    exports.entry(imported.clone()).or_default().push(importing_module.clone());
                }
                imports.insert(importing_module, mem::replace(&mut current_imports, vec![]));
            } else if line.starts_with(' ') {
                // Space marks an imported module
                assert_ne!(current_module, None);
                current_imports.push(line.trim().to_string());
            } else {
                // Otherwise, beginning of a new module (must be start or follow empty line)
                assert_eq!(current_module, None);
                current_module = Some(line.trim().to_string());
            }
        }
        Ok(ThinLTOImportMaps { imports, exports })
    }

    /// Loads the ThinLTO import map from ThinLTOData.
    unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImportMaps {
        unsafe extern "C" fn imported_module_callback(
            payload: *mut libc::c_void,
            importing_module_name: *const libc::c_char,
            imported_module_name: *const libc::c_char,
        ) {
            let map = &mut *(payload as *mut ThinLTOImportMaps);
            let importing_module_name = CStr::from_ptr(importing_module_name);
            let importing_module_name = module_name_to_str(&importing_module_name);
            let imported_module_name = CStr::from_ptr(imported_module_name);
            let imported_module_name = module_name_to_str(&imported_module_name);

            if !map.imports.contains_key(importing_module_name) {
                map.imports.insert(importing_module_name.to_owned(), vec![]);
            }

            map.imports
                .get_mut(importing_module_name)
                .unwrap()
                .push(imported_module_name.to_owned());

            if !map.exports.contains_key(imported_module_name) {
                map.exports.insert(imported_module_name.to_owned(), vec![]);
            }

            map.exports
                .get_mut(imported_module_name)
                .unwrap()
                .push(importing_module_name.to_owned());
        }

        let mut map = ThinLTOImportMaps::default();
        llvm::LLVMRustGetThinLTOModuleImports(
            data,
            imported_module_callback,
            &mut map as *mut _ as *mut libc::c_void,
        );
        map
    }
}

fn module_name_to_str(c_str: &CStr) -> &str {
    c_str.to_str().unwrap_or_else(|e| {
        bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
    })
}

pub fn parse_module<'a>(
    cx: &'a llvm::Context,
    name: &CStr,
    data: &[u8],
    diag_handler: &Handler,
) -> Result<&'a llvm::Module, FatalError> {
    unsafe {
        llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
            || {
                let msg = "failed to parse bitcode for LTO module";
                write::llvm_err(&diag_handler, msg)
            },
        )
    }
}
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								use crate::back::write::{
 								    self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
 								};
-												librustc_codegen_llvm => 2018

											
										
										
											2019-02-17 18:58:58 +00:00
+								use crate::llvm::archive_ro::ArchiveRO;
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								use crate::llvm::{self, False, True};
 								use crate::{LlvmCodegenBackend, ModuleLlvm};
 								use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 								use rustc_codegen_ssa::back::symbol_export;
 								use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, ModuleConfig};
 								use rustc_codegen_ssa::traits::*;
-												Store LLVM bitcode in object files, not compressed

This commit is an attempted resurrection of #70458 where LLVM bitcode
emitted by rustc into rlibs is stored into object file sections rather
than in a separate file. The main rationale for doing this is that when
rustc emits bitcode it will no longer use a custom compression scheme
which makes it both easier to interoperate with existing tools and also
cuts down on compile time since this compression isn't happening.

The blocker for this in #70458 turned out to be that native linkers
didn't handle the new sections well, causing the sections to either
trigger bugs in the linker or actually end up in the final linked
artifact. This commit attempts to address these issues by ensuring that
native linkers ignore the new sections by inserting custom flags with
module-level inline assembly.

Note that this does not currently change the API of the compiler at all.
The pre-existing `-C bitcode-in-rlib` flag is co-opted to indicate
whether the bitcode should be present in the object file or not.

Finally, note that an important consequence of this commit, which is also
one of its primary purposes, is to enable rustc's `-Clto` bitcode
loading to load rlibs produced with `-Clinker-plugin-lto`. The goal here
is that when you're building with LTO Cargo will tell rustc to skip
codegen of all intermediate crates and only generate LLVM IR. Today
rustc will generate both object code and LLVM IR, but the object code is
later simply thrown away, wastefully.

											
										
										
											2020-04-23 18:45:55 +00:00
+								use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 								use rustc_errors::{FatalError, Handler};
-												Remove rustc_hir reexports in rustc::hir.

											
										
										
											2020-01-05 01:37:57 +00:00
+								use rustc_hir::def_id::LOCAL_CRATE;
-												rustc -> rustc_middle part 3 (rustfmt)

											
										
										
											2020-03-29 15:19:48 +00:00
+								use rustc_middle::bug;
 								use rustc_middle::dep_graph::WorkProduct;
 								use rustc_middle::middle::exported_symbols::SymbolExportLevel;
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								use rustc_session::cgu_reuse_tracker::CguReuse;
-												cleanup: `config::CrateType` -> `CrateType`

											
										
										
											2020-05-01 22:30:23 +00:00
+								use rustc_session::config::{self, CrateType, Lto};
-												Incorporate tracing crate

											
										
										
											2020-08-05 11:35:53 +00:00
+								use tracing::{debug, info};
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								use std::ffi::{CStr, CString};
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								use std::fs::File;
 								use std::io;
 								use std::mem;
 								use std::path::Path;
-												rustc: Work around `DICompileUnit` bugs in LLVM

This commit implements a workaround for #46346 which basically just
avoids triggering the situation that LLVM's bug
https://bugs.llvm.org/show_bug.cgi?id=35562 arises. More details can be
found in the code itself but this commit is also intended to ...

Closes #46346

											
										
										
											2017-12-16 16:20:54 +00:00
+								use std::ptr;
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								use std::slice;
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								use std::sync::Arc;
-												Use a versioning scheme for bytecode objects in rlibs.

Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs.

While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014.

											
										
										
											2014-07-31 13:05:08 +00:00
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								/// We keep track of past LTO imports that were used to produce the current set
 								/// of compiled object files that we might choose to reuse during this
 								/// compilation session.
 								pub const THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-imports.bin";
-												cleanup: `config::CrateType` -> `CrateType`

											
										
										
											2020-05-01 22:30:23 +00:00
+								pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
-												Refactor symbol export list generation.

											
										
										
											2016-11-30 15:03:42 +00:00
+								    match crate_type {
-												cleanup: `config::CrateType` -> `CrateType`

											
										
										
											2020-05-01 22:30:23 +00:00
+								        CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => true,
 								        CrateType::Dylib | CrateType::Rlib | CrateType::ProcMacro => false,
-												Refactor symbol export list generation.

											
										
										
											2016-11-30 15:03:42 +00:00
+								    }
 								}
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								fn prepare_lto(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    diag_handler: &Handler,
 								) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
-												rustc: Add `-C lto=val` option

This commit primarily adds the ability to control what kind of LTO happens when
rustc performs LTO, namely allowing values to be specified to the `-C lto`
option, such as `-C lto=thin` and `-C lto=fat`. (where "fat" is the previous
kind of LTO, throw everything in one giant module)

Along the way this also refactors a number of fields which store information
about whether LTO/ThinLTO are enabled to unify them all into one field through
which everything is dispatched, hopefully removing a number of special cases
throughout.

This is intended to help mitigate #47409 but will require a backport as well,
and this would unfortunately need to be an otherwise insta-stable option.

											
										
										
											2018-01-16 23:02:31 +00:00
+								    let export_threshold = match cgcx.lto {
 								        // We're just doing LTO for our one crate
 								        Lto::ThinLocal => SymbolExportLevel::Rust,
 								        // We're doing LTO for the entire crate graph
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
-												rustc: Add `-C lto=val` option

This commit primarily adds the ability to control what kind of LTO happens when
rustc performs LTO, namely allowing values to be specified to the `-C lto`
option, such as `-C lto=thin` and `-C lto=fat`. (where "fat" is the previous
kind of LTO, throw everything in one giant module)

Along the way this also refactors a number of fields which store information
about whether LTO/ThinLTO are enabled to unify them all into one field through
which everything is dispatched, hopefully removing a number of special cases
throughout.

This is intended to help mitigate #47409 but will require a backport as well,
and this would unfortunately need to be an otherwise insta-stable option.

											
										
										
											2018-01-16 23:02:31 +00:00
 								        Lto::No => panic!("didn't request LTO but we're doing LTO"),
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    };
-												Refactor symbol export list generation.

											
										
										
											2016-11-30 15:03:42 +00:00
-												Compute symbol names more lazily.

											
										
										
											2018-02-27 16:52:07 +00:00
+								    let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
-												rustc: Mostly remove `ExportedSymbols`

This is a big map that ends up inside of a `CrateContext` during translation for
all codegen units. This means that any change to the map may end up causing an
incremental recompilation of a codegen unit! In order to reduce the amount of
dependencies here between codegen units and the actual input crate this commit
refactors dealing with exported symbols and such into various queries.

The new queries are largely based on existing queries with filled out
implementations for the local crate in addition to external crates, but the main
idea is that while translating codegen untis no unit needs the entire set of
exported symbols, instead they only need queries about particulare `DefId`
instances every now and then.

The linking stage, however, still generates a full list of all exported symbols
from all crates, but that's going to always happen unconditionally anyway, so no
news there!

											
										
										
											2017-09-13 20:22:20 +00:00
+								        if level.is_below_threshold(export_threshold) {
-												Avoid realloc in `CString::new`

											
										
										
											2019-10-18 07:10:13 +00:00
+								            Some(CString::new(name.as_str()).unwrap())
-												Refactor symbol export list generation.

											
										
										
											2016-11-30 15:03:42 +00:00
+								        } else {
 								            None
 								        }
 								    };
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    let mut symbols_below_threshold = {
 								        let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								    };
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
-												Refactor symbol export list generation.

											
										
										
											2016-11-30 15:03:42 +00:00
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    // If we're performing LTO for the entire crate graph, then for each of our
 								    // upstream dependencies, find the corresponding rlib and load the bitcode
 								    // from the archive.
 								    //
 								    // We save off all the bytecode and LLVM module ids for later processing
 								    // with either fat or thin LTO
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    let mut upstream_modules = Vec::new();
-												rustc: Add `-C lto=val` option

This commit primarily adds the ability to control what kind of LTO happens when
rustc performs LTO, namely allowing values to be specified to the `-C lto`
option, such as `-C lto=thin` and `-C lto=fat`. (where "fat" is the previous
kind of LTO, throw everything in one giant module)

Along the way this also refactors a number of fields which store information
about whether LTO/ThinLTO are enabled to unify them all into one field through
which everything is dispatched, hopefully removing a number of special cases
throughout.

This is intended to help mitigate #47409 but will require a backport as well,
and this would unfortunately need to be an otherwise insta-stable option.

											
										
										
											2018-01-16 23:02:31 +00:00
+								    if cgcx.lto != Lto::ThinLocal {
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        if cgcx.opts.cg.prefer_dynamic {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            diag_handler
 								                .struct_err("cannot prefer dynamic linking when performing LTO")
 								                .note(
 								                    "only 'staticlib', 'bin', and 'cdylib' outputs are \
 								                               supported with LTO",
 								                )
 								                .emit();
 								            return Err(FatalError);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        }
 								        // Make sure we actually can run LTO
 								        for crate_type in cgcx.crate_types.iter() {
 								            if !crate_type_allows_lto(*crate_type) {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                let e = diag_handler.fatal(
 								                    "lto can only be run for executables, cdylibs and \
 								                                            static library outputs",
 								                );
 								                return Err(e);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            }
-												support LTO against libraries built with codegen-units > 1

											
										
										
											2014-09-17 23:18:12 +00:00
+								        }
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            let exported_symbols =
 								                cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            {
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								                let _timer =
 								                    cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
 								                symbols_below_threshold
 								                    .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            }
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
 								            let archive = ArchiveRO::open(&path).expect("wanted an rlib");
-												Store LLVM bitcode in object files, not compressed

This commit is an attempted resurrection of #70458 where LLVM bitcode
emitted by rustc into rlibs is stored into object file sections rather
than in a separate file. The main rationale for doing this is that when
rustc emits bitcode it will no longer use a custom compression scheme
which makes it both easier to interoperate with existing tools and also
cuts down on compile time since this compression isn't happening.

The blocker for this in #70458 turned out to be that native linkers
didn't handle the new sections well, causing the sections to either
trigger bugs in the linker or actually end up in the final linked
artifact. This commit attempts to address these issues by ensuring that
native linkers ignore the new sections by inserting custom flags with
module-level inline assembly.

Note that this does not currently change the API of the compiler at all.
The pre-existing `-C bitcode-in-rlib` flag is co-opted to indicate
whether the bitcode should be present in the object file or not.

Finally, note that an important consequence of this commit, which is also
one of its primary purposes, is to enable rustc's `-Clto` bitcode
loading to load rlibs produced with `-Clinker-plugin-lto`. The goal here
is that when you're building with LTO Cargo will tell rustc to skip
codegen of all intermediate crates and only generate LLVM IR. Today
rustc will generate both object code and LLVM IR, but the object code is
later simply thrown away, wastefully.

											
										
										
											2020-04-23 18:45:55 +00:00
+								            let obj_files = archive
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                .iter()
 								                .filter_map(|child| child.ok().and_then(|c| c.name().map(|name| (name, c))))
-												Store LLVM bitcode in object files, not compressed

This commit is an attempted resurrection of #70458 where LLVM bitcode
emitted by rustc into rlibs is stored into object file sections rather
than in a separate file. The main rationale for doing this is that when
rustc emits bitcode it will no longer use a custom compression scheme
which makes it both easier to interoperate with existing tools and also
cuts down on compile time since this compression isn't happening.

The blocker for this in #70458 turned out to be that native linkers
didn't handle the new sections well, causing the sections to either
trigger bugs in the linker or actually end up in the final linked
artifact. This commit attempts to address these issues by ensuring that
native linkers ignore the new sections by inserting custom flags with
module-level inline assembly.

Note that this does not currently change the API of the compiler at all.
The pre-existing `-C bitcode-in-rlib` flag is co-opted to indicate
whether the bitcode should be present in the object file or not.

Finally, note that an important consequence of this commit, which is also
one of its primary purposes, is to enable rustc's `-Clto` bitcode
loading to load rlibs produced with `-Clinker-plugin-lto`. The goal here
is that when you're building with LTO Cargo will tell rustc to skip
codegen of all intermediate crates and only generate LLVM IR. Today
rustc will generate both object code and LLVM IR, but the object code is
later simply thrown away, wastefully.

											
										
										
											2020-04-23 18:45:55 +00:00
+								                .filter(|&(name, _)| looks_like_rust_object_file(name));
 								            for (name, child) in obj_files {
 								                info!("adding bitcode from {}", name);
 								                match get_bitcode_slice_from_object_data(child.data()) {
 								                    Ok(data) => {
 								                        let module = SerializedModule::FromRlib(data.to_vec());
 								                        upstream_modules.push((module, CString::new(name).unwrap()));
 								                    }
 								                    Err(msg) => return Err(diag_handler.fatal(&msg)),
 								                }
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            }
 								        }
 								    }
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    Ok((symbols_below_threshold, upstream_modules))
-												Separate out methods for running thin and fat LTO

											
										
										
											2018-12-03 19:45:03 +00:00
+								}
-												Store LLVM bitcode in object files, not compressed

This commit is an attempted resurrection of #70458 where LLVM bitcode
emitted by rustc into rlibs is stored into object file sections rather
than in a separate file. The main rationale for doing this is that when
rustc emits bitcode it will no longer use a custom compression scheme
which makes it both easier to interoperate with existing tools and also
cuts down on compile time since this compression isn't happening.

The blocker for this in #70458 turned out to be that native linkers
didn't handle the new sections well, causing the sections to either
trigger bugs in the linker or actually end up in the final linked
artifact. This commit attempts to address these issues by ensuring that
native linkers ignore the new sections by inserting custom flags with
module-level inline assembly.

Note that this does not currently change the API of the compiler at all.
The pre-existing `-C bitcode-in-rlib` flag is co-opted to indicate
whether the bitcode should be present in the object file or not.

Finally, note that an important consequence of this commit, which is also
one of its primary purposes, is to enable rustc's `-Clto` bitcode
loading to load rlibs produced with `-Clinker-plugin-lto`. The goal here
is that when you're building with LTO Cargo will tell rustc to skip
codegen of all intermediate crates and only generate LLVM IR. Today
rustc will generate both object code and LLVM IR, but the object code is
later simply thrown away, wastefully.

											
										
										
											2020-04-23 18:45:55 +00:00
+								fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
 								    let mut len = 0;
 								    let data =
 								        unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
 								    if !data.is_null() {
 								        assert!(len != 0);
 								        let bc = unsafe { slice::from_raw_parts(data, len) };
 								        // `bc` must be a sub-slice of `obj`.
 								        assert!(obj.as_ptr() <= bc.as_ptr());
 								        assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());
 								        Ok(bc)
 								    } else {
 								        assert!(len == 0);
 								        let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
 								        Err(format!("failed to get bitcode from object file for LTO ({})", msg))
 								    }
 								}
-												Separate out methods for running thin and fat LTO

											
										
										
											2018-12-03 19:45:03 +00:00
+								/// Performs fat LTO by merging all modules into a single one and returning it
 								/// for further optimization.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								pub(crate) fn run_fat(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 								    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 								) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
-												Separate out methods for running thin and fat LTO

											
										
										
											2018-12-03 19:45:03 +00:00
+								    let diag_handler = cgcx.create_diag_handler();
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 								    let symbols_below_threshold =
 								        symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
 								    fat_lto(
 								        cgcx,
 								        &diag_handler,
 								        modules,
 								        cached_modules,
 								        upstream_modules,
 								        &symbols_below_threshold,
 								    )
-												Separate out methods for running thin and fat LTO

											
										
										
											2018-12-03 19:45:03 +00:00
+								}
 								/// Performs thin LTO by performing necessary global analysis and returning two
 								/// lists, one of the modules that need optimization and another for modules that
 								/// can simply be copied over from the incr. comp. cache.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								pub(crate) fn run_thin(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    modules: Vec<(String, ThinBuffer)>,
 								    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 								) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
-												Separate out methods for running thin and fat LTO

											
										
										
											2018-12-03 19:45:03 +00:00
+								    let diag_handler = cgcx.create_diag_handler();
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 								    let symbols_below_threshold =
 								        symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
-												Stabilize linker-plugin based LTO.

											
										
										
											2019-02-01 14:15:43 +00:00
+								    if cgcx.opts.cg.linker_plugin_lto.enabled() {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        unreachable!(
 								            "We should never reach this case if the LTO step \
 								                      is deferred to the linker"
 								        );
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    }
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    thin_lto(
 								        cgcx,
 								        &diag_handler,
 								        modules,
 								        upstream_modules,
 								        cached_modules,
 								        &symbols_below_threshold,
 								    )
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								}
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
-												Serialize modules into ThinBuffer after initial optimization

Instead of keeping all modules in memory until thin LTO and only
serializing them then, serialize the module immediately after
it finishes optimizing.

											
										
										
											2018-12-04 15:24:20 +00:00
+								    let name = module.name.clone();
 								    let buffer = ThinBuffer::new(module.module_llvm.llmod());
 								    (name, buffer)
 								}
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								fn fat_lto(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    diag_handler: &Handler,
 								    modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 								    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 								    mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    symbols_below_threshold: &[*const libc::c_char],
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								    let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    info!("going for a fat lto");
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								    // Sort out all our lists of incoming modules into two lists.
 								    //
 								    // * `serialized_modules` (also and argument to this function) contains all
 								    //   modules that are serialized in-memory.
 								    // * `in_memory` contains modules which are already parsed and in-memory,
 								    //   such as from multi-CGU builds.
 								    //
 								    // All of `cached_modules` (cached from previous incremental builds) can
 								    // immediately go onto the `serialized_modules` modules list and then we can
 								    // split the `modules` array into these two lists.
 								    let mut in_memory = Vec::new();
 								    serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
 								        info!("pushing cached module {:?}", wp.cgu_name);
 								        (buffer, CString::new(wp.cgu_name).unwrap())
 								    }));
 								    for module in modules {
 								        match module {
 								            FatLTOInput::InMemory(m) => in_memory.push(m),
 								            FatLTOInput::Serialized { name, buffer } => {
 								                info!("pushing serialized module {:?}", name);
 								                let buffer = SerializedModule::Local(buffer);
 								                serialized_modules.push((buffer, CString::new(name).unwrap()));
 								            }
 								        }
 								    }
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    // Find the "costliest" module and merge everything into that codegen unit.
 								    // All the other modules will be serialized and reparsed into the new
 								    // context, so this hopefully avoids serializing and parsing the largest
 								    // codegen unit.
 								    //
 								    // Additionally use a regular module as the base here to ensure that various
 								    // file copy operations in the backend work correctly. The only other kind
 								    // of module here should be an allocator one, and if your crate is smaller
 								    // than the allocator module then the size doesn't really matter anyway.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    let costliest_module = in_memory
 								        .iter()
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								        .enumerate()
 								        .filter(|&(_, module)| module.kind == ModuleKind::Regular)
 								        .map(|(i, module)| {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								            (cost, i)
 								        })
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								        .max();
 								    // If we found a costliest module, we're good to go. Otherwise all our
 								    // inputs were serialized which could happen in the case, for example, that
 								    // all our inputs were incrementally reread from the cache and we're just
 								    // re-executing the LTO passes. If that's the case deserialize the first
 								    // module and create a linker with it.
 								    let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								        Some((_cost, i)) => in_memory.remove(i),
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								        None => {
-												use is_empty() instead of len() == x  to determine if structs are empty.

											
										
										
											2020-02-28 13:20:33 +00:00
+								            assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								            let (buffer, name) = serialized_modules.remove(0);
 								            info!("no in-memory regular modules to choose from, parsing {:?}", name);
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								            ModuleCodegen {
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								                module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
 								                name: name.into_string().unwrap(),
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								                kind: ModuleKind::Regular,
 								            }
 								        }
 								    };
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    let mut serialized_bitcode = Vec::new();
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								    {
-												rustc: Handle linker diagnostic from LLVM

Previously linker diagnostic were being hidden when two modules were linked
together but failed to link. This commit fixes the situation by ensuring that we
have a diagnostic handler installed and also adds support for handling linker
diagnostics.

											
										
										
											2018-07-17 23:20:51 +00:00
+								        let (llcx, llmod) = {
-												Support local ThinLTO with incremental compilation.

											
										
										
											2018-08-20 15:13:01 +00:00
+								            let llvm = &module.module_llvm;
-												rustc: Handle linker diagnostic from LLVM

Previously linker diagnostic were being hidden when two modules were linked
together but failed to link. This commit fixes the situation by ensuring that we
have a diagnostic handler installed and also adds support for handling linker
diagnostics.

											
										
										
											2018-07-17 23:20:51 +00:00
+								            (&llvm.llcx, llvm.llmod())
 								        };
-												Use CGU name as LLVM module name and add some caching to CGU name generation.

											
										
										
											2018-08-14 15:55:22 +00:00
+								        info!("using {:?} as a base module", module.name);
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
-												rustc: Handle linker diagnostic from LLVM

Previously linker diagnostic were being hidden when two modules were linked
together but failed to link. This commit fixes the situation by ensuring that we
have a diagnostic handler installed and also adds support for handling linker
diagnostics.

											
										
										
											2018-07-17 23:20:51 +00:00
+								        // The linking steps below may produce errors and diagnostics within LLVM
 								        // which we'd like to handle and print, so set up our diagnostic handlers
 								        // (which get unregistered when they go out of scope below).
 								        let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								        // For all other modules we codegened we'll need to link them into our own
 								        // bitcode. All modules were codegened in their own LLVM context, however,
 								        // and we want to move everything to the same LLVM context. Currently the
 								        // way we know of to do that is to serialize them to a string and them parse
 								        // them later. Not great but hey, that's why it's "fat" LTO, right?
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								        for module in in_memory {
 								            let buffer = ModuleBuffer::new(module.module_llvm.llmod());
 								            let llmod_id = CString::new(&module.name[..]).unwrap();
 								            serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
 								        }
-												Explain why we're sorting the modules.

											
										
										
											2019-08-08 17:51:52 +00:00
+								        // Sort the modules to ensure we produce deterministic results.
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								        serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								        // For all serialized bitcode files we parse them and link them in as we did
 								        // above, this is all mostly handled in C++. Like above, though, we don't
 								        // know much about the memory management here so we err on the side of being
 								        // save and persist everything with the original module.
 								        let mut linker = Linker::new(llmod);
 								        for (bc_decoded, name) in serialized_modules {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer = cgcx
 								                .prof
 								                .generic_activity_with_arg("LLVM_fat_lto_link_module", format!("{:?}", name));
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								            info!("linking {:?}", name);
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let data = bc_decoded.data();
 								            linker.add(&data).map_err(|()| {
 								                let msg = format!("failed to load bc of {:?}", name);
 								                write::llvm_err(&diag_handler, &msg)
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								            })?;
 								            serialized_bitcode.push(bc_decoded);
 								        }
 								        drop(linker);
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        save_temp_bitcode(&cgcx, &module, "lto.input");
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								        // Internalize everything below threshold to help strip out more modules and such.
-												Disable all unwinding on -Z no-landing-pads LTO

When performing LTO, the rust compiler has an opportunity to completely strip
all landing pads in all dependent libraries. I've modified the LTO pass to
recognize the -Z no-landing-pads option when also running an LTO pass to flag
everything in LLVM as nothrow. I've verified that this prevents any and all
invoke instructions from being emitted.

I believe that this is one of our best options for moving forward with
accomodating use-cases where unwinding doesn't really make sense. This will
allow libraries to be built with landing pads by default but allow usage of them
in contexts where landing pads aren't necessary.

cc #10780

											
										
										
											2013-12-11 07:27:15 +00:00
+								        unsafe {
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								            let ptr = symbols_below_threshold.as_ptr();
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            llvm::LLVMRustRunRestrictionPass(
 								                llmod,
 								                ptr as *const *const libc::c_char,
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								                symbols_below_threshold.len() as libc::size_t,
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            );
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								            save_temp_bitcode(&cgcx, &module, "lto.after-restriction");
-												Disable all unwinding on -Z no-landing-pads LTO

When performing LTO, the rust compiler has an opportunity to completely strip
all landing pads in all dependent libraries. I've modified the LTO pass to
recognize the -Z no-landing-pads option when also running an LTO pass to flag
everything in LLVM as nothrow. I've verified that this prevents any and all
invoke instructions from being emitted.

I believe that this is one of our best options for moving forward with
accomodating use-cases where unwinding doesn't really make sense. This will
allow libraries to be built with landing pads by default but allow usage of them
in contexts where landing pads aren't necessary.

cc #10780

											
										
										
											2013-12-11 07:27:15 +00:00
+								        }
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
 								        if cgcx.no_landing_pads {
 								            unsafe {
 								                llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 								            }
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								            save_temp_bitcode(&cgcx, &module, "lto.after-nounwind");
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								        }
-												Disable all unwinding on -Z no-landing-pads LTO

When performing LTO, the rust compiler has an opportunity to completely strip
all landing pads in all dependent libraries. I've modified the LTO pass to
recognize the -Z no-landing-pads option when also running an LTO pass to flag
everything in LLVM as nothrow. I've verified that this prevents any and all
invoke instructions from being emitted.

I believe that this is one of our best options for moving forward with
accomodating use-cases where unwinding doesn't really make sense. This will
allow libraries to be built with landing pads by default but allow usage of them
in contexts where landing pads aren't necessary.

cc #10780

											
										
										
											2013-12-11 07:27:15 +00:00
+								    }
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode })
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								}
-												rustc_codegen_llvm: use safe references for Linker.


											
										
										
											2018-07-17 11:26:22 +00:00
+								struct Linker<'a>(&'a mut llvm::Linker<'a>);
-												rustc: Persist LLVM's `Linker` in Fat LTO

This commit updates our Fat LTO logic to tweak our custom wrapper around LLVM's
"link modules" functionality. Previously whenever the
`LLVMRustLinkInExternalBitcode` function was called it would call LLVM's
`Linker::linkModules` wrapper. Internally this would crate an instance of a
`Linker` which internally creates an instance of an `IRMover`. Unfortunately for
us the creation of `IRMover` is somewhat O(n) with the input module. This means
that every time we linked a module it was O(n) with respect to the entire module
we had built up!

Now the modules we build up during LTO are quite large, so this quickly started
creating an O(n^2) problem for us! Discovered in #48025 it turns out this has
always been a problem and we just haven't noticed it. It became particularly
worse recently though due to most libraries having 16x more object files than
they previously did (1 -> 16).

This commit fixes this performance issue by preserving the `Linker` instance
across all links into the main LLVM module. This means we only create one
`IRMover` and allows LTO to progress much speedier.

From the `cargo-cache` project in #48025 a **full build** locally when from
5m15s to 2m24s. Looking at the timing logs each object file was linked in in
single-digit millisecond rather than hundreds, clearly being a nice improvement!

Closes #48025

											
										
										
											2018-02-12 16:38:46 +00:00
-												rustc_codegen_llvm: use safe references for Linker.


											
										
										
											2018-07-17 11:26:22 +00:00
+								impl Linker<'a> {
 								    fn new(llmod: &'a llvm::Module) -> Self {
-												rustc: Persist LLVM's `Linker` in Fat LTO

This commit updates our Fat LTO logic to tweak our custom wrapper around LLVM's
"link modules" functionality. Previously whenever the
`LLVMRustLinkInExternalBitcode` function was called it would call LLVM's
`Linker::linkModules` wrapper. Internally this would crate an instance of a
`Linker` which internally creates an instance of an `IRMover`. Unfortunately for
us the creation of `IRMover` is somewhat O(n) with the input module. This means
that every time we linked a module it was O(n) with respect to the entire module
we had built up!

Now the modules we build up during LTO are quite large, so this quickly started
creating an O(n^2) problem for us! Discovered in #48025 it turns out this has
always been a problem and we just haven't noticed it. It became particularly
worse recently though due to most libraries having 16x more object files than
they previously did (1 -> 16).

This commit fixes this performance issue by preserving the `Linker` instance
across all links into the main LLVM module. This means we only create one
`IRMover` and allows LTO to progress much speedier.

From the `cargo-cache` project in #48025 a **full build** locally when from
5m15s to 2m24s. Looking at the timing logs each object file was linked in in
single-digit millisecond rather than hundreds, clearly being a nice improvement!

Closes #48025

											
										
										
											2018-02-12 16:38:46 +00:00
+								        unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
 								    }
 								    fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
 								        unsafe {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            if llvm::LLVMRustLinkerAdd(
 								                self.0,
 								                bytecode.as_ptr() as *const libc::c_char,
 								                bytecode.len(),
 								            ) {
-												rustc: Persist LLVM's `Linker` in Fat LTO

This commit updates our Fat LTO logic to tweak our custom wrapper around LLVM's
"link modules" functionality. Previously whenever the
`LLVMRustLinkInExternalBitcode` function was called it would call LLVM's
`Linker::linkModules` wrapper. Internally this would crate an instance of a
`Linker` which internally creates an instance of an `IRMover`. Unfortunately for
us the creation of `IRMover` is somewhat O(n) with the input module. This means
that every time we linked a module it was O(n) with respect to the entire module
we had built up!

Now the modules we build up during LTO are quite large, so this quickly started
creating an O(n^2) problem for us! Discovered in #48025 it turns out this has
always been a problem and we just haven't noticed it. It became particularly
worse recently though due to most libraries having 16x more object files than
they previously did (1 -> 16).

This commit fixes this performance issue by preserving the `Linker` instance
across all links into the main LLVM module. This means we only create one
`IRMover` and allows LTO to progress much speedier.

From the `cargo-cache` project in #48025 a **full build** locally when from
5m15s to 2m24s. Looking at the timing logs each object file was linked in in
single-digit millisecond rather than hundreds, clearly being a nice improvement!

Closes #48025

											
										
										
											2018-02-12 16:38:46 +00:00
+								                Ok(())
 								            } else {
 								                Err(())
 								            }
 								        }
 								    }
 								}
-												rustc_codegen_llvm: use safe references for Linker.


											
										
										
											2018-07-17 11:26:22 +00:00
+								impl Drop for Linker<'a> {
-												rustc: Persist LLVM's `Linker` in Fat LTO

This commit updates our Fat LTO logic to tweak our custom wrapper around LLVM's
"link modules" functionality. Previously whenever the
`LLVMRustLinkInExternalBitcode` function was called it would call LLVM's
`Linker::linkModules` wrapper. Internally this would crate an instance of a
`Linker` which internally creates an instance of an `IRMover`. Unfortunately for
us the creation of `IRMover` is somewhat O(n) with the input module. This means
that every time we linked a module it was O(n) with respect to the entire module
we had built up!

Now the modules we build up during LTO are quite large, so this quickly started
creating an O(n^2) problem for us! Discovered in #48025 it turns out this has
always been a problem and we just haven't noticed it. It became particularly
worse recently though due to most libraries having 16x more object files than
they previously did (1 -> 16).

This commit fixes this performance issue by preserving the `Linker` instance
across all links into the main LLVM module. This means we only create one
`IRMover` and allows LTO to progress much speedier.

From the `cargo-cache` project in #48025 a **full build** locally when from
5m15s to 2m24s. Looking at the timing logs each object file was linked in in
single-digit millisecond rather than hundreds, clearly being a nice improvement!

Closes #48025

											
										
										
											2018-02-12 16:38:46 +00:00
+								    fn drop(&mut self) {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        unsafe {
 								            llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
 								        }
-												rustc: Persist LLVM's `Linker` in Fat LTO

This commit updates our Fat LTO logic to tweak our custom wrapper around LLVM's
"link modules" functionality. Previously whenever the
`LLVMRustLinkInExternalBitcode` function was called it would call LLVM's
`Linker::linkModules` wrapper. Internally this would crate an instance of a
`Linker` which internally creates an instance of an `IRMover`. Unfortunately for
us the creation of `IRMover` is somewhat O(n) with the input module. This means
that every time we linked a module it was O(n) with respect to the entire module
we had built up!

Now the modules we build up during LTO are quite large, so this quickly started
creating an O(n^2) problem for us! Discovered in #48025 it turns out this has
always been a problem and we just haven't noticed it. It became particularly
worse recently though due to most libraries having 16x more object files than
they previously did (1 -> 16).

This commit fixes this performance issue by preserving the `Linker` instance
across all links into the main LLVM module. This means we only create one
`IRMover` and allows LTO to progress much speedier.

From the `cargo-cache` project in #48025 a **full build** locally when from
5m15s to 2m24s. Looking at the timing logs each object file was linked in in
single-digit millisecond rather than hundreds, clearly being a nice improvement!

Closes #48025

											
										
										
											2018-02-12 16:38:46 +00:00
+								    }
 								}
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								/// Prepare "thin" LTO to get run on these modules.
 								///
 								/// The general structure of ThinLTO is quite different from the structure of
 								/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
 								/// one giant LLVM module, and then we run more optimization passes over this
 								/// big module after internalizing most symbols. Thin LTO, on the other hand,
 								/// avoid this large bottleneck through more targeted optimization.
 								///
 								/// At a high level Thin LTO looks like:
 								///
 								///     1. Prepare a "summary" of each LLVM module in question which describes
 								///        the values inside, cost of the values, etc.
 								///     2. Merge the summaries of all modules in question into one "index"
 								///     3. Perform some global analysis on this index
 								///     4. For each module, use the index and analysis calculated previously to
 								///        perform local transformations on the module, for example inlining
 								///        small functions from other modules.
 								///     5. Run thin-specific optimization passes over each module, and then code
 								///        generate everything at the end.
 								///
 								/// The summary for each module is intended to be quite cheap, and the global
 								/// index is relatively quite cheap to create as well. As a result, the goal of
 								/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
 								/// situations. For example one cheap optimization is that we can parallelize
 								/// all codegen modules, easily making use of all the cores on a machine.
 								///
 								/// With all that in mind, the function here is designed at specifically just
 								/// calculating the *index* for ThinLTO. This index will then be shared amongst
-												Rename trans to codegen everywhere.

											
										
										
											2018-05-08 13:10:16 +00:00
+								/// all of the `LtoModuleCodegen` units returned below and destroyed once
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								/// they all go out of scope.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								fn thin_lto(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    diag_handler: &Handler,
 								    modules: Vec<(String, ThinBuffer)>,
 								    serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
 								    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								    symbols_below_threshold: &[*const libc::c_char],
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    unsafe {
 								        info!("going for that thin, thin LTO");
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        let green_modules: FxHashMap<_, _> =
 								            cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
-												codegen_llvm_back: improve allocations

											
										
										
											2018-10-06 09:45:11 +00:00
+								        let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
 								        let mut thin_buffers = Vec::with_capacity(modules.len());
 								        let mut module_names = Vec::with_capacity(full_scope_len);
 								        let mut thin_modules = Vec::with_capacity(full_scope_len);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
-												Serialize modules into ThinBuffer after initial optimization

Instead of keeping all modules in memory until thin LTO and only
serializing them then, serialize the module immediately after
it finishes optimizing.

											
										
										
											2018-12-04 15:24:20 +00:00
+								        for (i, (name, buffer)) in modules.into_iter().enumerate() {
 								            info!("local module: {} - {}", i, name);
 								            let cname = CString::new(name.clone()).unwrap();
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            thin_modules.push(llvm::ThinLTOModule {
-												Serialize modules into ThinBuffer after initial optimization

Instead of keeping all modules in memory until thin LTO and only
serializing them then, serialize the module immediately after
it finishes optimizing.

											
										
										
											2018-12-04 15:24:20 +00:00
+								                identifier: cname.as_ptr(),
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								                data: buffer.data().as_ptr(),
 								                len: buffer.data().len(),
 								            });
 								            thin_buffers.push(buffer);
-												Serialize modules into ThinBuffer after initial optimization

Instead of keeping all modules in memory until thin LTO and only
serializing them then, serialize the module immediately after
it finishes optimizing.

											
										
										
											2018-12-04 15:24:20 +00:00
+								            module_names.push(cname);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        }
 								        // FIXME: All upstream crates are deserialized internally in the
 								        //        function below to extract their summary and modules. Note that
 								        //        unlike the loop above we *must* decode and/or read something
 								        //        here as these are all just serialized files on disk. An
 								        //        improvement, however, to make here would be to store the
 								        //        module summary separately from the actual module itself. Right
 								        //        now this is store in one large bitcode file, and the entire
 								        //        file is deflate-compressed. We could try to bypass some of the
 								        //        decompression by storing the index uncompressed and only
 								        //        lazily decompressing the bytecode if necessary.
 								        //
 								        //        Note that truly taking advantage of this optimization will
 								        //        likely be further down the road. We'd have to implement
 								        //        incremental ThinLTO first where we could actually avoid
 								        //        looking at upstream modules entirely sometimes (the contents,
 								        //        we must always unconditionally look at the index).
-												codegen_llvm_back: improve allocations

											
										
										
											2018-10-06 09:45:11 +00:00
+								        let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        let cached_modules =
 								            cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
 								        for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
 								            info!("upstream or cached module {:?}", name);
-												Support local ThinLTO with incremental compilation.

											
										
										
											2018-08-20 15:13:01 +00:00
+								            thin_modules.push(llvm::ThinLTOModule {
 								                identifier: name.as_ptr(),
 								                data: module.data().as_ptr(),
 								                len: module.data().len(),
 								            });
 								            serialized.push(module);
 								            module_names.push(name);
 								        }
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								        // Sanity check
 								        assert_eq!(thin_modules.len(), module_names.len());
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        // Delegate to the C++ bindings to create some data here. Once this is a
 								        // tried-and-true interface we may wish to try to upstream some of this
 								        // to LLVM itself, right now we reimplement a lot of what they do
 								        // upstream...
 								        let data = llvm::LLVMRustCreateThinLTOData(
 								            thin_modules.as_ptr(),
 								            thin_modules.len() as u32,
-												Avoid "whitelist"

Other terms are more inclusive and precise.

											
										
										
											2020-07-07 15:12:44 +00:00
+								            symbols_below_threshold.as_ptr(),
 								            symbols_below_threshold.len() as u32,
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        )
 								        .ok_or_else(|| write::llvm_err(&diag_handler, "failed to prepare thin LTO context"))?;
-												rustc_codegen_llvm: use safe references for ThinLTOData.

											
										
										
											2018-07-17 13:43:49 +00:00
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        info!("thin LTO data created");
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								        let (import_map_path, prev_import_map, curr_import_map) =
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
 								                let path = incr_comp_session_dir.join(THIN_LTO_IMPORTS_INCR_COMP_FILE_NAME);
 								                // If previous imports have been deleted, or we get an IO error
 								                // reading the file storing them, then we'll just use `None` as the
 								                // prev_import_map, which will force the code to be recompiled.
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								                let prev = if path.exists() {
 								                    ThinLTOImportMaps::load_from_file(&path).ok()
 								                } else {
 								                    None
 								                };
 								                let curr = ThinLTOImportMaps::from_thin_lto_data(data);
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                (Some(path), prev, curr)
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            } else {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                // If we don't compile incrementally, we don't need to load the
 								                // import data from LLVM.
 								                assert!(green_modules.is_empty());
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								                let curr = ThinLTOImportMaps::default();
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                (None, None, curr)
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            };
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								        info!("thin LTO import map loaded");
 								        let data = ThinData(data);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        // Throw our data in an `Arc` as we'll be sharing it across threads. We
 								        // also put all memory referenced by the C++ data (buffers, ids, etc)
 								        // into the arc as well. After this we'll create a thin module
-												Rename trans to codegen everywhere.

											
										
										
											2018-05-08 13:10:16 +00:00
+								        // codegen per module in this data.
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        let shared = Arc::new(ThinShared {
 								            data,
 								            thin_buffers,
 								            serialized_modules: serialized,
 								            module_names,
 								        });
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
 								        let mut copy_jobs = vec![];
 								        let mut opt_jobs = vec![];
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								        info!("checking which modules can be-reused and which have to be re-optimized.");
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								        for (module_index, module_name) in shared.module_names.iter().enumerate() {
 								            let module_name = module_name_to_str(module_name);
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            // If (1.) the module hasn't changed, and (2.) none of the modules
-												Issue #71248: attempt to recover perf by removing `exports_all_green` flag.

(My hypothesis is that my use of this flag was an overly conservative
generalization of PR 67020.)

											
										
										
											2020-04-17 20:04:59 +00:00
+								            // it imports from have changed, *and* (3.) the import and export
 								            // sets themselves have not changed from the previous compile when
 								            // it was last ThinLTO'ed, then we can re-use the post-ThinLTO
 								            // version of the module. Otherwise, freshly perform LTO
 								            // optimization.
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								            //
 								            // (Note that globally, the export set is just the inverse of the
 								            // import set.)
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            //
-												Expand comment to justify not checking if all the exports are green.

											
										
										
											2020-04-20 14:33:27 +00:00
+								            // For further justification of why the above is necessary and sufficient,
 								            // see the LLVM blog post on ThinLTO:
 								            //
 								            // http://blog.llvm.org/2016/06/thinlto-scalable-and-incremental-lto.html
 								            //
 								            // which states the following:
 								            //
 								            // ```quote
 								            // any particular ThinLTO backend must be redone iff:
 								            //
 								            // 1. The corresponding (primary) module’s bitcode changed
 								            // 2. The list of imports into or exports from the module changed
 								            // 3. The bitcode for any module being imported from has changed
 								            // 4. Any global analysis result affecting either the primary module
 								            //    or anything it imports has changed.
 								            // ```
 								            //
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            // This strategy means we can always save the computed imports as
 								            // canon: when we reuse the post-ThinLTO version, condition (3.)
-												fix various typos

											
										
										
											2020-03-06 11:13:55 +00:00
+								            // ensures that the current import set is the same as the previous
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            // one. (And of course, when we don't reuse the post-ThinLTO
 								            // version, the current import set *is* the correct one, since we
 								            // are doing the ThinLTO in this current compilation cycle.)
 								            //
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								            // For more discussion, see rust-lang/rust#59535 (where the import
 								            // issue was discovered) and rust-lang/rust#69798 (where the
 								            // analogous export issue was discovered).
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								            if let (Some(prev_import_map), true) =
 								                (prev_import_map.as_ref(), green_modules.contains_key(module_name))
 								            {
 								                assert!(cgcx.incr_comp_session_dir.is_some());
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								                let prev_imports = prev_import_map.imports_of(module_name);
 								                let curr_imports = curr_import_map.imports_of(module_name);
 								                let prev_exports = prev_import_map.exports_of(module_name);
 								                let curr_exports = curr_import_map.exports_of(module_name);
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								                let imports_all_green = curr_imports
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								                    .iter()
 								                    .all(|imported_module| green_modules.contains_key(imported_module));
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
+								                if imports_all_green
 								                    && equivalent_as_sets(prev_imports, curr_imports)
 								                    && equivalent_as_sets(prev_exports, curr_exports)
 								                {
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								                    let work_product = green_modules[module_name].clone();
 								                    copy_jobs.push(work_product);
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								                    info!(" - {}: re-used", module_name);
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								                    assert!(cgcx.incr_comp_session_dir.is_some());
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                    cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
 								                    continue;
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								                }
 								            }
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								            info!(" - {}: re-compiled", module_name);
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								            opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								                shared: shared.clone(),
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								                idx: module_index,
 								            }));
 								        }
-												fix various typos

											
										
										
											2020-03-06 11:13:55 +00:00
+								        // Save the current ThinLTO import information for the next compilation
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								        // session, overwriting the previous serialized imports (if any).
 								        if let Some(path) = import_map_path {
 								            if let Err(err) = curr_import_map.save_to_file(&path) {
 								                let msg = format!("Error while writing ThinLTO import data: {}", err);
 								                return Err(write::llvm_err(&diag_handler, &msg));
 								            }
 								        }
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								        Ok((opt_jobs, copy_jobs))
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    }
 								}
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								/// Given two slices, each with no repeat elements. returns true if and only if
 								/// the two slices have the same contents when considered as sets (i.e. when
 								/// element order is disregarded).
 								fn equivalent_as_sets(a: &[String], b: &[String]) -> bool {
 								    // cheap path: unequal lengths means cannot possibly be set equivalent.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    if a.len() != b.len() {
 								        return false;
 								    }
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								    // fast path: before building new things, check if inputs are equivalent as is.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    if a == b {
 								        return true;
 								    }
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								    // slow path: general set comparison.
 								    let a: FxHashSet<&str> = a.iter().map(|s| s.as_str()).collect();
 								    let b: FxHashSet<&str> = b.iter().map(|s| s.as_str()).collect();
 								    a == b
 								}
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								pub(crate) fn run_pass_manager(
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								    module: &ModuleCodegen<ModuleLlvm>,
 								    config: &ModuleConfig,
 								    thin: bool,
 								) {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								    let _timer = cgcx.prof.extra_verbose_generic_activity("LLVM_lto_optimize", &module.name[..]);
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
+								    // Now we have one massive module inside of llmod. Time to run the
 								    // LTO-specific optimization passes that LLVM provides.
 								    //
 								    // This code is based off the code found in llvm's LTO code generator:
 								    //      tools/lto/LTOCodeGenerator.cpp
 								    debug!("running the pass manager");
 								    unsafe {
-												Add support for new pass manager

The new pass manager can be enabled using
-Z new-llvm-pass-manager=on.

											
										
										
											2020-01-05 18:16:58 +00:00
+								        if write::should_use_new_llvm_pass_manager(config) {
 								            let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
 								            let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
 								            // See comment below for why this is necessary.
 								            let opt_level = if let config::OptLevel::No = opt_level {
 								                config::OptLevel::Less
 								            } else {
 								                opt_level
 								            };
-												add selfprofiling for new llvm passmanager

											
										
										
											2020-02-11 21:37:16 +00:00
+								            write::optimize_with_new_llvm_pass_manager(cgcx, module, config, opt_level, opt_stage);
-												Add support for new pass manager

The new pass manager can be enabled using
-Z new-llvm-pass-manager=on.

											
										
										
											2020-01-05 18:16:58 +00:00
+								            debug!("lto done");
 								            return;
 								        }
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
+								        let pm = llvm::LLVMCreatePassManager();
-												Use LLVMAddAnalysisPasses instead of Rust's wrapper

LLVM exposes a C API `LLVMAddAnalysisPasses` and hence Rust's own
wrapper `LLVMRustAddAnalysisPasses` is not needed anymore.

											
										
										
											2019-11-29 03:31:09 +00:00
+								        llvm::LLVMAddAnalysisPasses(module.module_llvm.tm, pm);
-												Respect -Z no-verify during LTO

Currently -Z no-verify only controls IR verification prior to
LLVM codegen, while verification is performed unconditionally
both before and after linking with (Thin)LTO.

											
										
										
											2018-05-30 20:48:20 +00:00
-												Rename -Z no-verify to -Z verify-llvm-ir

This disables IR verification by default.

											
										
										
											2018-06-12 19:05:37 +00:00
+								        if config.verify_llvm_ir {
-												Replaces some instances of `as *[const | mut] _` with `.cast()`

											
										
										
											2019-10-05 07:48:14 +00:00
+								            let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
-												rustc_codegen_llvm: use safe references for Pass.


											
										
										
											2018-07-12 15:00:49 +00:00
+								            llvm::LLVMRustAddPass(pm, pass.unwrap());
-												Respect -Z no-verify during LTO

Currently -Z no-verify only controls IR verification prior to
LLVM codegen, while verification is performed unconditionally
both before and after linking with (Thin)LTO.

											
										
										
											2018-05-30 20:48:20 +00:00
+								        }
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												rustc: Fix some ThinLTO internalization

First the `addPreservedGUID` function forgot to take care of "alias" summaries.
I'm not 100% sure what this is but the current code now matches upstream. Next
the `computeDeadSymbols` return value wasn't actually being used, but it needed
to be used! Together these should...

Closes #45195

											
										
										
											2017-10-11 18:19:59 +00:00
+								        // When optimizing for LTO we don't actually pass in `-O0`, but we force
 								        // it to always happen at least with `-O1`.
 								        //
 								        // With ThinLTO we mess around a lot with symbol visibility in a way
 								        // that will actually cause linking failures if we optimize at O0 which
 								        // notable is lacking in dead code elimination. To ensure we at least
 								        // get some optimizations and correctly link we forcibly switch to `-O1`
 								        // to get dead code elimination.
 								        //
 								        // Note that in general this shouldn't matter too much as you typically
 								        // only turn on ThinLTO when you're compiling with optimizations
 								        // otherwise.
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        let opt_level = config
 								            .opt_level
 								            .map(|x| to_llvm_opt_settings(x).0)
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								            .unwrap_or(llvm::CodeGenOptLevel::None);
-												rustc: Fix some ThinLTO internalization

First the `addPreservedGUID` function forgot to take care of "alias" summaries.
I'm not 100% sure what this is but the current code now matches upstream. Next
the `computeDeadSymbols` return value wasn't actually being used, but it needed
to be used! Together these should...

Closes #45195

											
										
										
											2017-10-11 18:19:59 +00:00
+								        let opt_level = match opt_level {
 								            llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
 								            level => level,
 								        };
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            if thin {
-												Remove support for building against LLVM 4

With emscripten removed in #55626, we no longer need to support
building against LLVM 4.

											
										
										
											2018-11-05 13:52:08 +00:00
+								                llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            } else {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                llvm::LLVMPassManagerBuilderPopulateLTOPassManager(
 								                    b, pm, /* Internalize = */ False, /* RunInliner = */ True,
 								                );
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								            }
-												trans: Consolidate creating pass manager builders

The LTO pass in the compiler forgot to call the `LLVMRustAddBuilderLibraryInfo`
function and configure other options such as merge_functions, vectorize_slp,
etc. This ended up causing linker errors on MSVC targets because the optimizer
didn't have the right knowledge that some system functions are missing on these
platforms.

This commit consolidates creation of PassManagerBuilder instances to one
function which is then called when needed. This ensures that the pass manager is
always correctly configured with the various target-specific information that
LLVM needs.

Overall, this fixes `-C lto -C opt-level=3` on 32-bit MSVC targets.

											
										
										
											2015-07-22 23:22:51 +00:00
+								        });
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												Run name-anon-globals after LTO passes as well

If we're going to emit bitcode (through ThinLTOBuffer), then we
need to ensure that anon globals are named. This was already done
after optimization passes, but also has to happen after LTO passes,
as we always emit the final result in a ThinLTO-compatible manner.

Fixes #51947.

											
										
										
											2018-11-02 12:22:48 +00:00
+								        // We always generate bitcode through ThinLTOBuffers,
 								        // which do not support anonymous globals
 								        if config.bitcode_needed() {
-												Replaces some instances of `as *[const | mut] _` with `.cast()`

											
										
										
											2019-10-05 07:48:14 +00:00
+								            let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
-												Run name-anon-globals after LTO passes as well

If we're going to emit bitcode (through ThinLTOBuffer), then we
need to ensure that anon globals are named. This was already done
after optimization passes, but also has to happen after LTO passes,
as we always emit the final result in a ThinLTO-compatible manner.

Fixes #51947.

											
										
										
											2018-11-02 12:22:48 +00:00
+								            llvm::LLVMRustAddPass(pm, pass.unwrap());
 								        }
-												Rename -Z no-verify to -Z verify-llvm-ir

This disables IR verification by default.

											
										
										
											2018-06-12 19:05:37 +00:00
+								        if config.verify_llvm_ir {
-												Replaces some instances of `as *[const | mut] _` with `.cast()`

											
										
										
											2019-10-05 07:48:14 +00:00
+								            let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
-												rustc_codegen_llvm: use safe references for Pass.


											
										
										
											2018-07-12 15:00:49 +00:00
+								            llvm::LLVMRustAddPass(pm, pass.unwrap());
-												Respect -Z no-verify during LTO

Currently -Z no-verify only controls IR verification prior to
LLVM codegen, while verification is performed unconditionally
both before and after linking with (Thin)LTO.

											
										
										
											2018-05-30 20:48:20 +00:00
+								        }
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								        llvm::LLVMRunPassManager(pm, module.module_llvm.llmod());
-												Implement LTO

This commit implements LTO for rust leveraging LLVM's passes. What this means
is:

* When compiling an rlib, in addition to insdering foo.o into the archive, also
  insert foo.bc (the LLVM bytecode) of the optimized module.

* When the compiler detects the -Z lto option, it will attempt to perform LTO on
  a staticlib or binary output. The compiler will emit an error if a dylib or
  rlib output is being generated.

* The actual act of performing LTO is as follows:

    1. Force all upstream libraries to have an rlib version available.
    2. Load the bytecode of each upstream library from the rlib.
    3. Link all this bytecode into the current LLVM module (just using llvm
       apis)
    4. Run an internalization pass which internalizes all symbols except those
       found reachable for the local crate of compilation.
    5. Run the LLVM LTO pass manager over this entire module

    6a. If assembling an archive, then add all upstream rlibs into the output
        archive. This ignores all of the object/bitcode/metadata files rust
        generated and placed inside the rlibs.
    6b. If linking a binary, create copies of all upstream rlibs, remove the
        rust-generated object-file, and then link everything as usual.

As I have explained in #10741, this process is excruciatingly slow, so this is
*not* turned on by default, and it is also why I have decided to hide it behind
a -Z flag for now. The good news is that the binary sizes are about as small as
they can be as a result of LTO, so it's definitely working.

Closes #10741
Closes #10740

											
										
										
											2013-12-03 07:19:29 +00:00
 								        llvm::LLVMDisposePassManager(pm);
 								    }
 								    debug!("lto done");
 								}
-												Use a versioning scheme for bytecode objects in rlibs.

Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs.

While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014.

											
										
										
											2014-07-31 13:05:08 +00:00
-												rustc_codegen_llvm: use safe references for ModuleBuffer.


											
										
										
											2018-07-17 13:08:25 +00:00
+								pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
 								unsafe impl Send for ModuleBuffer {}
 								unsafe impl Sync for ModuleBuffer {}
 								impl ModuleBuffer {
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								    pub fn new(m: &llvm::Module) -> ModuleBuffer {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    }
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								}
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								impl ModuleBufferMethods for ModuleBuffer {
 								    fn data(&self) -> &[u8] {
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								        unsafe {
 								            let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
 								            let len = llvm::LLVMRustModuleBufferLen(self.0);
 								            slice::from_raw_parts(ptr, len)
 								        }
 								    }
 								}
 								impl Drop for ModuleBuffer {
 								    fn drop(&mut self) {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        unsafe {
 								            llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
 								        }
-												rustc: Enable LTO and multiple codegen units

This commit is a refactoring of the LTO backend in Rust to support compilations
with multiple codegen units. The immediate result of this PR is to remove the
artificial error emitted by rustc about `-C lto -C codegen-units-8`, but longer
term this is intended to lay the groundwork for LTO with incremental compilation
and ultimately be the underpinning of ThinLTO support.

The problem here that needed solving is that when rustc is producing multiple
codegen units in one compilation LTO needs to merge them all together.
Previously only upstream dependencies were merged and it was inherently relied
on that there was only one local codegen unit. Supporting this involved
refactoring the optimization backend architecture for rustc, namely splitting
the `optimize_and_codegen` function into `optimize` and `codegen`. After an LLVM
module has been optimized it may be blocked and queued up for LTO, and only
after LTO are modules code generated.

Non-LTO compilations should look the same as they do today backend-wise, we'll
spin up a thread for each codegen unit and optimize/codegen in that thread. LTO
compilations will, however, send the LLVM module back to the coordinator thread
once optimizations have finished. When all LLVM modules have finished optimizing
the coordinator will invoke the LTO backend, producing a further list of LLVM
modules. Currently this is always a list of one LLVM module. The coordinator
then spawns further work to run LTO and code generation passes over each module.

In the course of this refactoring a number of other pieces were refactored:

* Management of the bytecode encoding in rlibs was centralized into one module
  instead of being scattered across LTO and linking.
* Some internal refactorings on the link stage of the compiler was done to work
  directly from `CompiledModule` structures instead of lists of paths.
* The trans time-graph output was tweaked a little to include a name on each
  bar and inflate the size of the bars a little

											
										
										
											2017-07-23 15:14:38 +00:00
+								    }
-												Use a versioning scheme for bytecode objects in rlibs.

Before this commit, the LLVM IR of exported items was simply zip-compressed and stored as an object file inside rlib archives. This commit adds a header to this "object" containing a file identifier and a format version number so the compiler can deal with changes in the way bytecode objects are stored within rlibs.

While updating the format of bytecode objects, this commit also worksaround a problem in LLDB which could not handle odd-sized objects within archives before mid-2014.

											
										
										
											2014-07-31 13:05:08 +00:00
+								}
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								pub struct ThinData(&'static mut llvm::ThinLTOData);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
 								unsafe impl Send for ThinData {}
 								unsafe impl Sync for ThinData {}
 								impl Drop for ThinData {
 								    fn drop(&mut self) {
 								        unsafe {
-												rustc_codegen_llvm: use safe references for ThinLTOData.

											
										
										
											2018-07-17 13:43:49 +00:00
+								            llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        }
 								    }
 								}
-												rustc_codegen_llvm: use safe references for ThinLTOBuffer.


											
										
										
											2018-07-17 13:31:09 +00:00
+								pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
 								unsafe impl Send for ThinBuffer {}
 								unsafe impl Sync for ThinBuffer {}
 								impl ThinBuffer {
-												rustc_codegen_llvm: use safe references for Context and Module.


											
										
										
											2018-06-27 14:57:25 +00:00
+								    pub fn new(m: &llvm::Module) -> ThinBuffer {
-												rustc: Move bytecode compression into codegen

This commit moves compression of the bytecode from the `link` module to the
`write` module, namely allowing it to be (a) cached by incremental compilation
and (b) produced in parallel. The parallelization may show up as some nice wins
during normal compilation and the caching in incremental mode should be
beneficial for incremental compiles! (no more need to recompress the entire
crate's bitcode on all builds)

											
										
										
											2017-10-20 01:44:33 +00:00
+								        unsafe {
 								            let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
 								            ThinBuffer(buffer)
 								        }
 								    }
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								}
-												rustc: Move bytecode compression into codegen

This commit moves compression of the bytecode from the `link` module to the
`write` module, namely allowing it to be (a) cached by incremental compilation
and (b) produced in parallel. The parallelization may show up as some nice wins
during normal compilation and the caching in incremental mode should be
beneficial for incremental compiles! (no more need to recompress the entire
crate's bitcode on all builds)

											
										
										
											2017-10-20 01:44:33 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								impl ThinBufferMethods for ThinBuffer {
 								    fn data(&self) -> &[u8] {
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        unsafe {
 								            let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
 								            let len = llvm::LLVMRustThinLTOBufferLen(self.0);
 								            slice::from_raw_parts(ptr, len)
 								        }
 								    }
 								}
 								impl Drop for ThinBuffer {
 								    fn drop(&mut self) {
 								        unsafe {
-												rustc_codegen_llvm: use safe references for ThinLTOBuffer.


											
										
										
											2018-07-17 13:31:09 +00:00
+								            llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        }
 								    }
 								}
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								pub unsafe fn optimize_thin_module(
 								    thin_module: &mut ThinModule<LlvmCodegenBackend>,
 								    cgcx: &CodegenContext<LlvmCodegenBackend>,
 								) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
 								    let diag_handler = cgcx.create_diag_handler();
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    let tm = (cgcx.tm_factory.0)().map_err(|e| write::llvm_err(&diag_handler, &e))?;
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
 								    // Right now the implementation we've got only works over serialized
 								    // modules, so we create a fresh new LLVM context and parse the module
 								    // into that context. One day, however, we may do this for upstream
 								    // crates but for locally codegened modules we may be able to reuse
 								    // that LLVM Context and Module.
 								    let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								    let llmod_raw = parse_module(
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        llcx,
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								        &thin_module.shared.module_names[thin_module.idx],
 								        thin_module.data(),
 								        &diag_handler,
 								    )? as *const _;
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								    let module = ModuleCodegen {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        name: thin_module.name().to_string(),
 								        kind: ModuleKind::Regular,
 								    };
 								    {
-												Prepare for LLVM 11

											
										
										
											2020-06-26 01:52:41 +00:00
+								        let target = &*module.module_llvm.tm;
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        let llmod = module.module_llvm.llmod();
 								        save_temp_bitcode(&cgcx, &module, "thin-lto-input");
 								        // Before we do much else find the "main" `DICompileUnit` that we'll be
 								        // using below. If we find more than one though then rustc has changed
 								        // in a way we're not ready for, so generate an ICE by returning
 								        // an error.
 								        let mut cu1 = ptr::null_mut();
 								        let mut cu2 = ptr::null_mut();
 								        llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
 								        if !cu2.is_null() {
 								            let msg = "multiple source DICompileUnits found";
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								            return Err(write::llvm_err(&diag_handler, msg));
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        }
-												rustc: Work around `DICompileUnit` bugs in LLVM

This commit implements a workaround for #46346 which basically just
avoids triggering the situation that LLVM's bug
https://bugs.llvm.org/show_bug.cgi?id=35562 arises. More details can be
found in the code itself but this commit is also intended to ...

Closes #46346

											
										
										
											2017-12-16 16:20:54 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        // Like with "fat" LTO, get some better optimizations if landing pads
 								        // are disabled by removing all landing pads.
 								        if cgcx.no_landing_pads {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer = cgcx
 								                .prof
 								                .generic_activity_with_arg("LLVM_thin_lto_remove_landing_pads", thin_module.name());
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								            llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 								            save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
 								        }
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        // Up next comes the per-module local analyses that we do for Thin LTO.
 								        // Each of these functions is basically copied from the LLVM
 								        // implementation and then tailored to suit this implementation. Ideally
 								        // each of these would be supported by upstream LLVM but that's perhaps
 								        // a patch for another day!
 								        //
 								        // You can find some more comments about these functions in the LLVM
 								        // bindings we've got (currently `PassWrapper.cpp`)
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								        {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer =
 								                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
-												Prepare for LLVM 11

											
										
										
											2020-06-26 01:52:41 +00:00
+								            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								                let msg = "failed to prepare thin LTO module";
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                return Err(write::llvm_err(&diag_handler, msg));
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            }
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								        }
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
 								        {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer = cgcx
 								                .prof
 								                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
 								                let msg = "failed to prepare thin LTO module";
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                return Err(write::llvm_err(&diag_handler, msg));
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            }
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        }
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
 								        {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer = cgcx
 								                .prof
 								                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
 								                let msg = "failed to prepare thin LTO module";
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                return Err(write::llvm_err(&diag_handler, msg));
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            }
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        }
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
 								        {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer =
 								                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
-												Prepare for LLVM 11

											
										
										
											2020-06-26 01:52:41 +00:00
+								            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								                let msg = "failed to prepare thin LTO module";
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                return Err(write::llvm_err(&diag_handler, msg));
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            }
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        }
-												rustc: Set release mode cgus to 16 by default

This commit is the next attempt to enable multiple codegen units by default in
release mode, getting some of those sweet, sweet parallelism wins by running
codegen in parallel. Performance should not be lost due to ThinLTO being on by
default as well.

Closes #45320

											
										
										
											2017-12-21 15:03:16 +00:00
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								        // Ok now this is a bit unfortunate. This is also something you won't
 								        // find upstream in LLVM's ThinLTO passes! This is a hack for now to
 								        // work around bugs in LLVM.
 								        //
 								        // First discovered in #45511 it was found that as part of ThinLTO
 								        // importing passes LLVM will import `DICompileUnit` metadata
 								        // information across modules. This means that we'll be working with one
 								        // LLVM module that has multiple `DICompileUnit` instances in it (a
 								        // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
 								        // bugs in LLVM's backend which generates invalid DWARF in a situation
 								        // like this:
 								        //
 								        //  https://bugs.llvm.org/show_bug.cgi?id=35212
 								        //  https://bugs.llvm.org/show_bug.cgi?id=35562
 								        //
 								        // While the first bug there is fixed the second ended up causing #46346
 								        // which was basically a resurgence of #45511 after LLVM's bug 35212 was
 								        // fixed.
 								        //
 								        // This function below is a huge hack around this problem. The function
 								        // below is defined in `PassWrapper.cpp` and will basically "merge"
 								        // all `DICompileUnit` instances in a module. Basically it'll take all
 								        // the objects, rewrite all pointers of `DISubprogram` to point to the
 								        // first `DICompileUnit`, and then delete all the other units.
 								        //
 								        // This is probably mangling to the debug info slightly (but hopefully
 								        // not too much) but for now at least gets LLVM to emit valid DWARF (or
 								        // so it appears). Hopefully we can remove this once upstream bugs are
 								        // fixed in LLVM.
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								        {
-												self-profile: Support arguments for generic_activities.

											
										
										
											2020-02-07 14:01:23 +00:00
+								            let _timer = cgcx
 								                .prof
 								                .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								            llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
 								        }
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
 								        // Alright now that we've done everything related to the ThinLTO
 								        // analysis it's time to run some optimizations! Here we use the same
 								        // `run_pass_manager` as the "fat" LTO above except that we tell it to
 								        // populate a thin-specific pass manager, which presumably LLVM treats a
 								        // little differently.
-												Self-Profiling: Make names of existing events more consistent and use new API.

											
										
										
											2019-09-27 12:04:36 +00:00
+								        {
 								            info!("running thin lto passes over {}", module.name);
 								            let config = cgcx.config(module.kind);
 								            run_pass_manager(cgcx, &module, config, true);
 								            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
 								        }
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								    }
-												Separating the back folder between backend-agnostic and LLVM-specific code

											
										
										
											2018-10-23 15:01:35 +00:00
+								    Ok(module)
-												rustc: Implement ThinLTO

This commit is an implementation of LLVM's ThinLTO for consumption in rustc
itself. Currently today LTO works by merging all relevant LLVM modules into one
and then running optimization passes. "Thin" LTO operates differently by having
more sharded work and allowing parallelism opportunities between optimizing
codegen units. Further down the road Thin LTO also allows *incremental* LTO
which should enable even faster release builds without compromising on the
performance we have today.

This commit uses a `-Z thinlto` flag to gate whether ThinLTO is enabled. It then
also implements two forms of ThinLTO:

* In one mode we'll *only* perform ThinLTO over the codegen units produced in a
  single compilation. That is, we won't load upstream rlibs, but we'll instead
  just perform ThinLTO amongst all codegen units produced by the compiler for
  the local crate. This is intended to emulate a desired end point where we have
  codegen units turned on by default for all crates and ThinLTO allows us to do
  this without performance loss.

* In anther mode, like full LTO today, we'll optimize all upstream dependencies
  in "thin" mode. Unlike today, however, this LTO step is fully parallelized so
  should finish much more quickly.

There's a good bit of comments about what the implementation is doing and where
it came from, but the tl;dr; is that currently most of the support here is
copied from upstream LLVM. This code duplication is done for a number of
reasons:

* Controlling parallelism means we can use the existing jobserver support to
  avoid overloading machines.
* We will likely want a slightly different form of incremental caching which
  integrates with our own incremental strategy, but this is yet to be
  determined.
* This buys us some flexibility about when/where we run ThinLTO, as well as
  having it tailored to fit our needs for the time being.
* Finally this allows us to reuse some artifacts such as our `TargetMachine`
  creation, where all our options we used today aren't necessarily supported by
  upstream LLVM yet.

My hope is that we can get some experience with this copy/paste in tree and then
eventually upstream some work to LLVM itself to avoid the duplication while
still ensuring our needs are met. Otherwise I fear that maintaining these
bindings may be quite costly over the years with LLVM updates!

											
										
										
											2017-07-23 15:14:38 +00:00
+								}
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								/// Summarizes module import/export relationships used by LLVM's ThinLTO pass.
 								///
 								/// Note that we tend to have two such instances of `ThinLTOImportMaps` in use:
 								/// one loaded from a file that represents the relationships used during the
 								/// compilation associated with the incremetnal build artifacts we are
 								/// attempting to reuse, and another constructed via `from_thin_lto_data`, which
 								/// captures the relationships of ThinLTO in the current compilation.
-												Prefer `Default::default` over `FxHash*::default` in struct constructors

											
										
										
											2018-10-16 14:57:53 +00:00
+								#[derive(Debug, Default)]
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								pub struct ThinLTOImportMaps {
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								    // key = llvm name of importing module, value = list of modules it imports from
 								    imports: FxHashMap<String, Vec<String>>,
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
+								    // key = llvm name of exporting module, value = list of modules it exports to
 								    exports: FxHashMap<String, Vec<String>>,
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								}
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								impl ThinLTOImportMaps {
 								    /// Returns modules imported by `llvm_module_name` during some ThinLTO pass.
 								    fn imports_of(&self, llvm_module_name: &str) -> &[String] {
-												Support local ThinLTO with incremental compilation.

											
										
										
											2018-08-20 15:13:01 +00:00
+								        self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
 								    }
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								    /// Returns modules exported by `llvm_module_name` during some ThinLTO pass.
 								    fn exports_of(&self, llvm_module_name: &str) -> &[String] {
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
+								        self.exports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
 								    }
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								    fn save_to_file(&self, path: &Path) -> io::Result<()> {
 								        use std::io::Write;
 								        let file = File::create(path)?;
 								        let mut writer = io::BufWriter::new(file);
 								        for (importing_module_name, imported_modules) in &self.imports {
 								            writeln!(writer, "{}", importing_module_name)?;
 								            for imported_module in imported_modules {
 								                writeln!(writer, " {}", imported_module)?;
 								            }
 								            writeln!(writer)?;
 								        }
 								        Ok(())
 								    }
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								    fn load_from_file(path: &Path) -> io::Result<ThinLTOImportMaps> {
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								        use std::io::BufRead;
 								        let mut imports = FxHashMap::default();
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
+								        let mut exports: FxHashMap<_, Vec<_>> = FxHashMap::default();
 								        let mut current_module: Option<String> = None;
 								        let mut current_imports: Vec<String> = vec![];
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								        let file = File::open(path)?;
 								        for line in io::BufReader::new(file).lines() {
 								            let line = line?;
 								            if line.is_empty() {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                let importing_module = current_module.take().expect("Importing module not set");
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
+								                for imported in &current_imports {
 								                    exports.entry(imported.clone()).or_default().push(importing_module.clone());
 								                }
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                imports.insert(importing_module, mem::replace(&mut current_imports, vec![]));
-												use char instead of &str for single char patterns

											
										
										
											2020-02-26 12:03:46 +00:00
+								            } else if line.starts_with(' ') {
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								                // Space marks an imported module
 								                assert_ne!(current_module, None);
 								                current_imports.push(line.trim().to_string());
 								            } else {
 								                // Otherwise, beginning of a new module (must be start or follow empty line)
 								                assert_eq!(current_module, None);
 								                current_module = Some(line.trim().to_string());
 								            }
 								        }
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								        Ok(ThinLTOImportMaps { imports, exports })
-												save LTO import information and check it when trying to reuse build products.

adopts simple strategy devised with assistance from mw: Instead of accumulating
(and acting upon) LTO import information over an unbounded number of prior
compilations, just see if the current import set matches the previous import set.
if they don't match, then you cannot reuse the PostLTO build product for that
module.

In either case (of a match or a non-match), we can (and must) unconditionally
emit the current import set as the recorded information in the incremental
compilation cache, ready to be loaded during the next compiler run for use in
the same check described above.

resolves issue 59535.

											
										
										
											2019-11-29 15:04:40 +00:00
+								    }
-												rustc: doc comments

											
										
										
											2019-02-08 13:53:55 +00:00
+								    /// Loads the ThinLTO import map from ThinLTOData.
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								    unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImportMaps {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        unsafe extern "C" fn imported_module_callback(
 								            payload: *mut libc::c_void,
 								            importing_module_name: *const libc::c_char,
 								            imported_module_name: *const libc::c_char,
 								        ) {
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								            let map = &mut *(payload as *mut ThinLTOImportMaps);
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								            let importing_module_name = CStr::from_ptr(importing_module_name);
 								            let importing_module_name = module_name_to_str(&importing_module_name);
 								            let imported_module_name = CStr::from_ptr(imported_module_name);
 								            let imported_module_name = module_name_to_str(&imported_module_name);
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								            if !map.imports.contains_key(importing_module_name) {
 								                map.imports.insert(importing_module_name.to_owned(), vec![]);
 								            }
-												Support local ThinLTO with incremental compilation.

											
										
										
											2018-08-20 15:13:01 +00:00
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								            map.imports
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								                .get_mut(importing_module_name)
 								                .unwrap()
 								                .push(imported_module_name.to_owned());
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
 								            if !map.exports.contains_key(imported_module_name) {
 								                map.exports.insert(imported_module_name.to_owned(), vec![]);
 								            }
 								            map.exports
 								                .get_mut(imported_module_name)
 								                .unwrap()
 								                .push(importing_module_name.to_owned());
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								        }
-												If an LLVM module's exports change, cannot reuse its post-LTO object file in
incremental compilation.

This is symmetric to PR #67020, which handled the case where the LLVM module's
*imports* changed. This commit builds upon the infrastructure added there; the
export map is just the inverse of the import map, so we can build the export map
at the same time that we load the serialized import map.

Fix #69798

											
										
										
											2020-04-14 13:47:03 +00:00
-												Incorporated review feedback:

Renamed the struct to make it a little clearer that it doesn't just hold one
imports map. (I couldn't bring myself to write it as `ThinLTOImportsExports`
though, mainly since the exports map is literally derived from the imports map
data.) Added some doc to the struct too.

Revised comments to add link to the newer issue that discusses why the exports
are relevant.

Renamed a few of the methods so that the two character difference is more
apparent (because 1. the method name is shorter and, perhaps more importantly,
the changed characters now lie at the beginning of the method name.)

											
										
										
											2020-04-15 16:28:01 +00:00
+								        let mut map = ThinLTOImportMaps::default();
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        llvm::LLVMRustGetThinLTOModuleImports(
 								            data,
 								            imported_module_callback,
 								            &mut map as *mut _ as *mut libc::c_void,
 								        );
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
+								        map
 								    }
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								}
-												Persist ThinLTO import data in incr. comp. session directory.

											
										
										
											2018-08-17 14:07:23 +00:00
-												Always add all modules to the global ThinLTO module analysis when compiling incrementally.

											
										
										
											2018-08-31 13:18:08 +00:00
+								fn module_name_to_str(c_str: &CStr) -> &str {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								    c_str.to_str().unwrap_or_else(|e| {
 								        bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
 								    })
-												incr.ThinLTO: Do some cleanup and add some logging.

											
										
										
											2018-09-03 10:42:27 +00:00
+								}
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
-												rustc: Handle modules in "fat" LTO more robustly

When performing a "fat" LTO the compiler has a whole mess of codegen
units that it links together. To do this it needs to select one module
as a "base" module and then link everything else into this module.
Previously LTO passes assume that there's at least one module in-memory
to link into, but nowadays that's not always true! With incremental
compilation modules may actually largely be cached and it may be
possible that there's no in-memory modules to work with.

This commit updates the logic of the LTO backend to handle modules a bit
more uniformly during a fat LTO. This commit immediately splits them
into two lists, one serialized and one in-memory. The in-memory list is
then searched for the largest module and failing that we simply
deserialize the first serialized module and link into that. This
refactoring avoids juggling three lists, two of which are serialized
modules and one of which is half serialized and half in-memory.

Closes #63349

											
										
										
											2019-08-27 19:25:35 +00:00
+								pub fn parse_module<'a>(
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								    cx: &'a llvm::Context,
 								    name: &CStr,
 								    data: &[u8],
 								    diag_handler: &Handler,
 								) -> Result<&'a llvm::Module, FatalError> {
 								    unsafe {
-												Format the world

											
										
										
											2019-12-22 22:42:04 +00:00
+								        llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
 								            || {
 								                let msg = "failed to parse bitcode for LTO module";
 								                write::llvm_err(&diag_handler, msg)
 								            },
 								        )
-												rustc: Implement incremental "fat" LTO

Currently the compiler will produce an error if both incremental
compilation and full fat LTO is requested. With recent changes and the
advent of incremental ThinLTO, however, all the hard work is already
done for us and it's actually not too bad to remove this error!

This commit updates the codegen backend to allow incremental full fat
LTO. The semantics are that the input modules to LTO are all produce
incrementally, but the final LTO step is always done unconditionally
regardless of whether the inputs changed or not. The only real
incremental win we could have here is if zero of the input modules
changed, but that's so rare it's unlikely to be worthwhile to implement
such a code path.

cc #57968
cc rust-lang/cargo#6643

											
										
										
											2019-02-11 15:46:04 +00:00
+								    }
 								}