From 2c2a3bc31cca598c5471bf75da746d8993ee6210 Mon Sep 17 00:00:00 2001 From: Eduard-Mihai Burtescu Date: Mon, 24 Jul 2023 17:31:27 +0300 Subject: [PATCH] Improve custom debuginfo with aggressive deduplication. --- .../src/builder/builder_methods.rs | 17 ++ .../rustc_codegen_spirv/src/codegen_cx/mod.rs | 15 +- .../src/linker/duplicates.rs | 276 +++++++++++++++++- .../rustc_codegen_spirv/src/linker/inline.rs | 90 +++--- crates/rustc_codegen_spirv/src/linker/mod.rs | 43 ++- docs/src/codegen-args.md | 18 +- 6 files changed, 394 insertions(+), 65 deletions(-) diff --git a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs index 21ee0d8075..a828d39d55 100644 --- a/crates/rustc_codegen_spirv/src/builder/builder_methods.rs +++ b/crates/rustc_codegen_spirv/src/builder/builder_methods.rs @@ -708,6 +708,23 @@ impl<'a, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'tcx> { }, ); } + + // HACK(eddyb) remove the previous instruction if made irrelevant. + let mut builder = self.emit(); + if let (Some(func_idx), Some(block_idx)) = + (builder.selected_function(), builder.selected_block()) + { + let block = &mut builder.module_mut().functions[func_idx].blocks[block_idx]; + match &block.instructions[..] { + [.., a, b] + if a.class.opcode == b.class.opcode + && a.operands[..2] == b.operands[..2] => + { + block.instructions.remove(block.instructions.len() - 2); + } + _ => {} + } + } } else { // We may not always have valid spans. // FIXME(eddyb) reduce the sources of this as much as possible. diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs index 04e3942a42..362f89b209 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs @@ -378,8 +378,13 @@ impl CodegenArgs { ); opts.optflag( "", - "spirt-keep-custom-debuginfo-in-dumps", - "keep custom debuginfo when dumping SPIR-T (instead of lossily prettifying it)", + "spirt-strip-custom-debuginfo-from-dumps", + "strip custom debuginfo instructions when dumping SPIR-T", + ); + opts.optflag( + "", + "spirt-keep-debug-sources-in-dumps", + "keep file contents debuginfo when dumping SPIR-T", ); opts.optflag( "", @@ -548,8 +553,10 @@ impl CodegenArgs { dump_post_merge: matches_opt_dump_dir_path("dump-post-merge"), dump_post_split: matches_opt_dump_dir_path("dump-post-split"), dump_spirt_passes: matches_opt_dump_dir_path("dump-spirt-passes"), - spirt_keep_custom_debuginfo_in_dumps: matches - .opt_present("spirt-keep-custom-debuginfo-in-dumps"), + spirt_strip_custom_debuginfo_from_dumps: matches + .opt_present("spirt-strip-custom-debuginfo-from-dumps"), + spirt_keep_debug_sources_in_dumps: matches + .opt_present("spirt-keep-debug-sources-in-dumps"), specializer_debug: matches.opt_present("specializer-debug"), specializer_dump_instances: matches_opt_path("specializer-dump-instances"), print_all_zombie: matches.opt_present("print-all-zombie"), diff --git a/crates/rustc_codegen_spirv/src/linker/duplicates.rs b/crates/rustc_codegen_spirv/src/linker/duplicates.rs index 24ccfe8802..3ca05022f0 100644 --- a/crates/rustc_codegen_spirv/src/linker/duplicates.rs +++ b/crates/rustc_codegen_spirv/src/linker/duplicates.rs @@ -1,9 +1,12 @@ +use crate::custom_insts::{self, CustomOp}; use rspirv::binary::Assemble; use rspirv::dr::{Instruction, Module, Operand}; use rspirv::spirv::{Op, Word}; use rustc_data_structures::fx::{FxHashMap, FxHashSet}; use rustc_middle::bug; +use smallvec::SmallVec; use std::collections::hash_map; +use std::mem; // FIXME(eddyb) consider deduplicating the `OpString` and `OpSource` created for // file-level debuginfo (but using SPIR-T for linking might be better?). @@ -269,19 +272,272 @@ pub fn remove_duplicate_types(module: &mut Module) { }); } -pub fn remove_duplicate_lines(module: &mut Module) { +pub fn remove_duplicate_debuginfo(module: &mut Module) { + // FIXME(eddyb) avoid repeating this across different passes/helpers. + let custom_ext_inst_set_import = module + .ext_inst_imports + .iter() + .find(|inst| { + assert_eq!(inst.class.opcode, Op::ExtInstImport); + inst.operands[0].unwrap_literal_string() == &custom_insts::CUSTOM_EXT_INST_SET[..] + }) + .map(|inst| inst.result_id.unwrap()); + for func in &mut module.functions { for block in &mut func.blocks { - block.instructions.dedup_by(|a, b| { - if a.class.opcode == Op::Line && b.class.opcode == Op::Line { - // dedup_by removes the *second* element in a pair of duplicates. We want to - // remove the *first* (so the last OpLine is kept). So, swap them! :D - std::mem::swap(a, b); - true - } else { - false + // Ignore the terminator, it's effectively "outside" debuginfo. + let (_, insts) = block.instructions.split_last_mut().unwrap(); + + // HACK(eddyb) to make random access easier, we first replace unused + // instructions with `OpNop`, and then remove all the `OpNop`s. + + #[derive(Clone)] + struct DbgLocInst { + inst_idx: usize, + used: bool, + } + + fn nop() -> Instruction { + Instruction::new(Op::Nop, None, None, vec![]) + } + impl DbgLocInst { + fn nop_if_unused(&self, insts: &mut [Instruction]) { + if !self.used { + insts[self.inst_idx] = nop(); + } } - }); + } + + #[derive(Clone, Default)] + struct DbgState { + loc: Option, + has_semantic_insts: bool, + } + let mut dbg = DbgState::default(); + + struct Frame { + call_dbg: DbgState, + push_inst_idx: usize, + } + let mut inlined_frames = SmallVec::<[Frame; 8]>::new(); + + // HACK(eddyb) `PopInlinedCallFrame` moves `inlined_frames.last()` + // `fusable_freshly_popped_inlined_frames.last()`, so a sequence of + // N pops will reverse the N last entries of `inlined_frames` into + // this vector (and go from outside-in, to inside-out), which allows + // *fusing* a pop with a push (of an identical inlined frame), when + // no interverning instructions prevent it (such instructions will + // clear this vector to indicate the pops are "fully committed"). + struct PoppedFrame { + frame: Frame, + callee_has_semantic_insts: bool, + pop_inst_idx: usize, + } + let mut fusable_freshly_popped_inlined_frames = SmallVec::<[PoppedFrame; 8]>::new(); + + for inst_idx in 0..insts.len() { + let inst = &insts[inst_idx]; + let custom_op = match inst.class.opcode { + Op::ExtInst + if Some(inst.operands[0].unwrap_id_ref()) == custom_ext_inst_set_import => + { + Some(CustomOp::decode_from_ext_inst(inst)) + } + _ => None, + }; + + fn inst_eq_key(inst: &Instruction) -> impl PartialEq + '_ { + (inst.class.opcode, &inst.operands) + } + + // NOTE(eddyb) `fusable_freshly_popped_inlined_frames`-preserving + // cases must all use `if can_continue { continue; }` to skip the + // draining logic (`can_continue` is only `false` at the very end). + let can_continue = inst_idx < insts.len() - 1; + let prev_dbg_loc_snapshot = dbg.loc.clone(); + match (inst.class.opcode, custom_op) { + (Op::Line | Op::NoLine, _) + | (_, Some(CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc)) => { + // HACK(eddyb) prefer keeping older active `DbgLocInst`s, + // if all the details are the same (it helps with fusion). + if dbg.loc.as_ref().is_some_and(|old_dbg_loc| { + inst_eq_key(inst) == inst_eq_key(&insts[old_dbg_loc.inst_idx]) + }) { + insts[inst_idx] = nop(); + if can_continue { + continue; + } + } else { + dbg.loc = Some(DbgLocInst { + inst_idx, + used: false, + }); + } + } + (_, Some(CustomOp::PushInlinedCallFrame)) => { + // HACK(eddyb) attempt fusing this push with the last pop. + let fuse_with_last_pop = fusable_freshly_popped_inlined_frames + .last() + .is_some_and(|last_popped| { + // HACK(eddyb) updating `dbg.loc` deduplicates eagerly, + // so here it suffices to check the (deduped) indices. + let dbg_loc_inst_idx = + |dbg: &DbgState| dbg.loc.as_ref().map(|d| d.inst_idx); + dbg_loc_inst_idx(&last_popped.frame.call_dbg) + == dbg_loc_inst_idx(&dbg) + && inst_eq_key(inst) + == inst_eq_key(&insts[last_popped.frame.push_inst_idx]) + }); + if fuse_with_last_pop { + let PoppedFrame { + frame, + callee_has_semantic_insts, + pop_inst_idx, + } = fusable_freshly_popped_inlined_frames.pop().unwrap(); + + insts[pop_inst_idx] = nop(); + + // Can't make entering an inlined function a nop, + // as it needs to reset callee-side `DbgLocInst`, + // but we can replace it in-place and hope later + // it get nop'd out by some real `DbgLocInst`. + insts[inst_idx].operands.splice( + 1.., + [Operand::LiteralExtInstInteger( + CustomOp::ClearDebugSrcLoc as u32, + )], + ); + dbg = DbgState { + loc: Some(DbgLocInst { + inst_idx, + used: false, + }), + has_semantic_insts: callee_has_semantic_insts, + }; + + inlined_frames.push(frame); + + // Allow further fusing to occur. + if can_continue { + continue; + } + } else { + // HACK(eddyb) the actual push to `inlined_frames` is + // done at the very end of the loop body, to be able + // to process any pending updates on the previous state. + } + } + (_, Some(CustomOp::PopInlinedCallFrame)) => { + // Leaving an inlined function doesn't use `DbgLocInst`. + if let Some(dbg_loc) = dbg.loc.take() { + // HACK(eddyb) only treat as "definitely unused" + // instructions that are too "recent" to have been + // used by a `PushInlinedCallFrame` with a still + // uncommitted `PopInlinedCallFrame`. + let min_safe_inst_idx_to_nop = fusable_freshly_popped_inlined_frames + .last() + .map_or(0, |last_popped| last_popped.pop_inst_idx); + if dbg_loc.inst_idx > min_safe_inst_idx_to_nop { + dbg_loc.nop_if_unused(insts); + } + } + if let Some(frame) = inlined_frames.pop() { + let callee_has_semantic_insts = dbg.has_semantic_insts; + dbg = frame.call_dbg.clone(); + dbg.has_semantic_insts |= callee_has_semantic_insts; + + // HACK(eddyb) inform future `PushInlinedCallFrame`s + // of potential fusion, by saving a copy of the frame. + fusable_freshly_popped_inlined_frames.push(PoppedFrame { + frame, + callee_has_semantic_insts, + pop_inst_idx: inst_idx, + }); + } else { + // FIXME(eddyb) this may indicate a bug elsewhere. + insts[inst_idx] = nop(); + } + if can_continue { + continue; + } + } + _ => { + if let Some(dbg_loc) = &mut dbg.loc { + dbg_loc.used = true; + } + dbg.has_semantic_insts = true; + } + } + + // NOTE(eddyb) mutable so that it may be marked as used below. + let mut freshly_replaced_dbg_loc = prev_dbg_loc_snapshot.filter(|prev_dbg_loc| { + dbg.loc.as_ref().map(|d| d.inst_idx) != Some(prev_dbg_loc.inst_idx) + }); + + // NOTE(eddyb) the iteration order doesn't matter, as this is + // effectively a set of `PopInlinedCallFrame`s which have had + // all their other side-effects processed, and didn't get a + // chance to be fused away, so they're getting committed. + for popped in fusable_freshly_popped_inlined_frames.drain(..) { + let PoppedFrame { + mut frame, + callee_has_semantic_insts, + pop_inst_idx, + } = popped; + + // HACK(eddyb) this popped frame's `call_dbg.loc` may still + // be used elsewhere, in which case that use takes precedence, + // and is effectively the new "owner" of the `DbgLocInst`. + let call_dbg_loc_used_elsewhere = + frame.call_dbg.loc.as_ref().and_then(|call_dbg_loc| { + [dbg.loc.as_mut(), freshly_replaced_dbg_loc.as_mut()] + .into_iter() + .flatten() + .find(|dbg_loc| dbg_loc.inst_idx == call_dbg_loc.inst_idx) + }); + if call_dbg_loc_used_elsewhere.is_some() { + frame.call_dbg.loc = None; + } + + if callee_has_semantic_insts { + // The `PushInlinedCallFrame` being kept requires its + // original `DbgLocInst` to also be kept around. + if let Some(call_dbg_loc) = call_dbg_loc_used_elsewhere { + call_dbg_loc.used = true; + } + } else { + // If the entire inlined call is all `OpNop`s now, + // entering/leaving it can also become `OpNop`s. + if let Some(call_dbg_loc) = &mut frame.call_dbg.loc { + call_dbg_loc.nop_if_unused(insts); + } + insts[frame.push_inst_idx] = nop(); + insts[pop_inst_idx] = nop(); + } + } + + // Only remove a replaced `DbgLocInst` after it had a chance to + // be marked as used above (for e.g. a `PushInlinedCallFrame`). + if let Some(old_dbg_loc) = freshly_replaced_dbg_loc { + old_dbg_loc.nop_if_unused(insts); + } + + // HACK(eddyb) the actual push to `inlined_frames` is + // done at the very end of the loop body, to be able + // to process any pending updates on the previous state. + if custom_op == Some(CustomOp::PushInlinedCallFrame) { + inlined_frames.push(Frame { + call_dbg: mem::take(&mut dbg), + push_inst_idx: inst_idx, + }); + } + } + + assert!(fusable_freshly_popped_inlined_frames.is_empty()); + + block + .instructions + .retain(|inst| inst.class.opcode != Op::Nop); } } } diff --git a/crates/rustc_codegen_spirv/src/linker/inline.rs b/crates/rustc_codegen_spirv/src/linker/inline.rs index 736e92c259..c036464ade 100644 --- a/crates/rustc_codegen_spirv/src/linker/inline.rs +++ b/crates/rustc_codegen_spirv/src/linker/inline.rs @@ -919,55 +919,67 @@ impl Inliner<'_, '_> { let mut blocks = callee.blocks.clone(); for block in &mut blocks { - let last = block.instructions.last().unwrap(); - if let Op::Return | Op::ReturnValue = last.class.opcode { - if Op::ReturnValue == last.class.opcode { - let return_value = last.operands[0].id_ref_any().unwrap(); - block.instructions.insert( - block.instructions.len() - 1, - Instruction::new( - Op::Store, - None, - None, - vec![ - Operand::IdRef(return_variable.unwrap()), - Operand::IdRef(return_value), - ], - ), - ); + let mut terminator = block.instructions.pop().unwrap(); + + // HACK(eddyb) strip trailing debuginfo (as it can't impact terminators). + while let Some(last) = block.instructions.last() { + let can_remove = match last.class.opcode { + Op::Line | Op::NoLine => true, + Op::ExtInst => { + last.operands[0].unwrap_id_ref() == custom_ext_inst_set_import + && matches!( + CustomOp::decode_from_ext_inst(last), + CustomOp::SetDebugSrcLoc | CustomOp::ClearDebugSrcLoc + ) + } + _ => false, + }; + if can_remove { + block.instructions.pop(); + } else { + break; + } + } + + if let Op::Return | Op::ReturnValue = terminator.class.opcode { + if Op::ReturnValue == terminator.class.opcode { + let return_value = terminator.operands[0].id_ref_any().unwrap(); + block.instructions.push(Instruction::new( + Op::Store, + None, + None, + vec![ + Operand::IdRef(return_variable.unwrap()), + Operand::IdRef(return_value), + ], + )); } else { assert!(return_variable.is_none()); } - *block.instructions.last_mut().unwrap() = + terminator = Instruction::new(Op::Branch, None, None, vec![Operand::IdRef(return_jump)]); } - let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(true); - block.instructions.splice( + let num_phis = block + .instructions + .iter() + .take_while(|inst| inst.class.opcode == Op::Phi) + .count(); + + // HACK(eddyb) avoid adding debuginfo to otherwise-empty blocks. + if block.instructions.len() > num_phis { + let (debuginfo_prefix, debuginfo_suffix) = mk_debuginfo_prefix_and_suffix(true); // Insert the prefix debuginfo instructions after `OpPhi`s, // which sadly can't be covered by them. - { - let i = block - .instructions - .iter() - .position(|inst| inst.class.opcode != Op::Phi) - .unwrap(); - i..i - }, - debuginfo_prefix, - ); - block.instructions.splice( + block + .instructions + .splice(num_phis..num_phis, debuginfo_prefix); // Insert the suffix debuginfo instructions before the terminator, // which sadly can't be covered by them. - { - let last_non_terminator = block.instructions.iter().rposition(|inst| { - !rspirv::grammar::reflect::is_block_terminator(inst.class.opcode) - }); - let i = last_non_terminator.map_or(0, |x| x + 1); - i..i - }, - debuginfo_suffix, - ); + block.instructions.extend(debuginfo_suffix); + } + + block.instructions.push(terminator); } let (caller_restore_debuginfo_after_call, calleer_reset_debuginfo_before_call) = diff --git a/crates/rustc_codegen_spirv/src/linker/mod.rs b/crates/rustc_codegen_spirv/src/linker/mod.rs index dc540a3686..19a488043c 100644 --- a/crates/rustc_codegen_spirv/src/linker/mod.rs +++ b/crates/rustc_codegen_spirv/src/linker/mod.rs @@ -60,7 +60,8 @@ pub struct Options { pub dump_post_merge: Option, pub dump_post_split: Option, pub dump_spirt_passes: Option, - pub spirt_keep_custom_debuginfo_in_dumps: bool, + pub spirt_strip_custom_debuginfo_from_dumps: bool, + pub spirt_keep_debug_sources_in_dumps: bool, pub specializer_debug: bool, pub specializer_dump_instances: Option, pub print_all_zombie: bool, @@ -327,6 +328,13 @@ pub fn link( } } + if opts.dce { + let _timer = + sess.timer("link_dce-and-remove_duplicate_debuginfo-after-mem2reg-before-inlining"); + dce::dce(&mut output); + duplicates::remove_duplicate_debuginfo(&mut output); + } + { let _timer = sess.timer("link_inline"); inline::inline(sess, &mut output)?; @@ -376,6 +384,13 @@ pub fn link( } } + if opts.dce { + let _timer = + sess.timer("link_dce-and-remove_duplicate_debuginfo-after-mem2reg-after-inlining"); + dce::dce(&mut output); + duplicates::remove_duplicate_debuginfo(&mut output); + } + // NOTE(eddyb) SPIR-T pipeline is entirely limited to this block. { let mut per_pass_module_for_dumping = vec![]; @@ -474,14 +489,28 @@ pub fn link( // NOTE(eddyb) this should be *before* `lift_to_spv` below, // so if that fails, the dump could be used to debug it. if let Some(dump_spirt_file_path) = &dump_spirt_file_path { - // HACK(eddyb) unless requested otherwise, clean up the pretty-printed - // SPIR-T output by converting our custom extended instructions, to - // standard SPIR-V debuginfo (which SPIR-T knows how to pretty-print). - if !opts.spirt_keep_custom_debuginfo_in_dumps { + if opts.spirt_strip_custom_debuginfo_from_dumps { for (_, module) in &mut per_pass_module_for_dumping { spirt_passes::debuginfo::convert_custom_debuginfo_to_spv(module); } } + if !opts.spirt_keep_debug_sources_in_dumps { + for (_, module) in &mut per_pass_module_for_dumping { + let spirt::ModuleDebugInfo::Spv(debuginfo) = &mut module.debug_info; + for sources in debuginfo.source_languages.values_mut() { + const DOTS: &str = "⋯"; + for file in sources.file_contents.values_mut() { + *file = DOTS.into(); + } + sources.file_contents.insert( + cx.intern(DOTS), + "sources hidden, to show them use \ + `RUSTGPU_CODEGEN_ARGS=--spirt-keep-debug-sources-in-dumps`" + .into(), + ); + } + } + } let plan = spirt::print::Plan::for_versions( &cx, @@ -670,8 +699,8 @@ pub fn link( } { - let _timer = sess.timer("link_remove_duplicate_lines"); - duplicates::remove_duplicate_lines(output); + let _timer = sess.timer("link_remove_duplicate_debuginfo"); + duplicates::remove_duplicate_debuginfo(output); } if opts.compact_ids { diff --git a/docs/src/codegen-args.md b/docs/src/codegen-args.md index 060bbbcb6e..2dda3864b9 100644 --- a/docs/src/codegen-args.md +++ b/docs/src/codegen-args.md @@ -192,11 +192,19 @@ _Note: passes that are not already enabled by default are considered experimenta Dump the `SPIR-🇹` module across passes (i.e. all of the versions before/after each pass), as a combined report, to a pair of files (`.spirt` and `.spirt.html`) in `DIR`. (the `.spirt.html` version of the report is the recommended form for viewing, as it uses tabling for versions, syntax-highlighting-like styling, and use->def linking) -### `--spirt-keep-custom-debuginfo-in-dumps` +### `--spirt-strip-custom-debuginfo-from-dumps` + +When dumping (pretty-printed) `SPIR-🇹` (e.g. with `--dump-spirt-passes`), strip +all the custom (Rust-GPU-specific) debuginfo instructions, by converting them +to the standard SPIR-V debuginfo (which `SPIR-🇹` understands more directly). + +The default (keeping the custom instructions) is more verbose, but also lossless, +if you want to see all instructions exactly as e.g. `--spirt-passes` see them. + +### `--spirt-keep-debug-sources-in-dumps` When dumping (pretty-printed) `SPIR-🇹` (e.g. with `--dump-spirt-passes`), preserve -all the custom (Rust-GPU-specific) debuginfo instructions, instead of converting -them to the standard SPIR-V debuginfo (which `SPIR-🇹` pretty-prints specially). +all the "file contents debuginfo" (i.e. from SPIR-V `OpSource` instructions), +which will end up being included, in full, at the start of the dump. -The default (of performing that conversion) has prettier results, but is lossier -if you want to see all instructions exactly as e.g. `--spirt-passes` see them. +The default (of hiding the file contents) is less verbose, but (arguably) lossier.