Translate MIR to clif ir in parallel with parallel rustc

On dev-desktop the advantage of cg_clif over cg_llvm on simple-raytracer is 15% when parallel rustc is disabled. With -Zthreads=16 the advantage goes from 5% to 22% with this change.
2024-11-26 16:54:01 +00:00 · 2024-05-11 18:51:59 +00:00 · 2024-05-11 18:51:59 +00:00 · a167142946
commit a167142946
parent 50b34279c3
2 changed files with 31 additions and 31 deletions
--- a/src/concurrency_limiter.rs
+++ b/src/concurrency_limiter.rs
@ -6,7 +6,7 @@ use rustc_session::Session;
 // FIXME don't panic when a worker thread panics

 pub(super) struct ConcurrencyLimiter {
-    helper_thread: Option<HelperThread>,
+    helper_thread: Option<Mutex<HelperThread>>,
    state: Arc<Mutex<state::ConcurrencyLimiterState>>,
    available_token_condvar: Arc<Condvar>,
    finished: bool,
@ -39,14 +39,14 @@ impl ConcurrencyLimiter {
            })
            .unwrap();
        ConcurrencyLimiter {
-            helper_thread: Some(helper_thread),
+            helper_thread: Some(Mutex::new(helper_thread)),
            state,
            available_token_condvar,
            finished: false,
        }
    }

-    pub(super) fn acquire(&mut self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
+    pub(super) fn acquire(&self, dcx: &rustc_errors::DiagCtxt) -> ConcurrencyLimiterToken {
        let mut state = self.state.lock().unwrap();
        loop {
            state.assert_invariants();
@ -73,7 +73,7 @@ impl ConcurrencyLimiter {
                }
            }

-            self.helper_thread.as_mut().unwrap().request_token();
+            self.helper_thread.as_ref().unwrap().lock().unwrap().request_token();
            state = self.available_token_condvar.wait(state).unwrap();
        }
    }
--- a/src/driver/aot.rs
+++ b/src/driver/aot.rs
@ -15,6 +15,7 @@ use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, CrateInfo, ModuleKind};
 use rustc_data_structures::profiling::SelfProfilerRef;
 use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
+use rustc_data_structures::sync::{par_map, IntoDynSyncSend};
 use rustc_metadata::fs::copy_to_stdout;
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
@ -611,34 +612,33 @@ pub(crate) fn run_aot(
            CguReuse::PreLto | CguReuse::PostLto => false,
        });

-    let mut concurrency_limiter = ConcurrencyLimiter::new(tcx.sess, todo_cgus.len());
+    let concurrency_limiter = IntoDynSyncSend(ConcurrencyLimiter::new(tcx.sess, todo_cgus.len()));

-    let modules =
-        tcx.sess.time("codegen mono items", || {
-            todo_cgus
-                .into_iter()
-                .map(|(_, cgu)| {
-                    let dep_node = cgu.codegen_dep_node(tcx);
-                    tcx.dep_graph
-                        .with_task(
-                            dep_node,
-                            tcx,
-                            (
-                                backend_config.clone(),
-                                global_asm_config.clone(),
-                                cgu.name(),
-                                concurrency_limiter.acquire(tcx.dcx()),
-                            ),
-                            module_codegen,
-                            Some(rustc_middle::dep_graph::hash_result),
-                        )
-                        .0
-                })
-                .chain(done_cgus.into_iter().map(|(_, cgu)| {
-                    OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))
-                }))
-                .collect::<Vec<_>>()
+    let modules = tcx.sess.time("codegen mono items", || {
+        let mut modules: Vec<_> = par_map(todo_cgus, |(_, cgu)| {
+            let dep_node = cgu.codegen_dep_node(tcx);
+            tcx.dep_graph
+                .with_task(
+                    dep_node,
+                    tcx,
+                    (
+                        backend_config.clone(),
+                        global_asm_config.clone(),
+                        cgu.name(),
+                        concurrency_limiter.acquire(tcx.dcx()),
+                    ),
+                    module_codegen,
+                    Some(rustc_middle::dep_graph::hash_result),
+                )
+                .0
        });
+        modules.extend(
+            done_cgus
+                .into_iter()
+                .map(|(_, cgu)| OngoingModuleCodegen::Sync(reuse_workproduct_for_cgu(tcx, cgu))),
+        );
+        modules
+    });

    let mut allocator_module = make_module(tcx.sess, &backend_config, "allocator_shim".to_string());
    let mut allocator_unwind_context = UnwindContext::new(allocator_module.isa(), true);
@ -706,6 +706,6 @@ pub(crate) fn run_aot(
        metadata_module,
        metadata,
        crate_info: CrateInfo::new(tcx, target_cpu),
-        concurrency_limiter,
+        concurrency_limiter: concurrency_limiter.0,
    })
 }