diff --git a/crates/rustc_codegen_spirv/src/builder_spirv.rs b/crates/rustc_codegen_spirv/src/builder_spirv.rs index 6e91a7897b..f513f3f160 100644 --- a/crates/rustc_codegen_spirv/src/builder_spirv.rs +++ b/crates/rustc_codegen_spirv/src/builder_spirv.rs @@ -393,7 +393,8 @@ pub struct BuilderCursor { } pub struct BuilderSpirv<'tcx> { - source_map: &'tcx SourceMap, + // HACK(eddyb) public only for `decorations`. + pub(crate) source_map: &'tcx SourceMap, builder: RefCell, @@ -696,9 +697,6 @@ impl<'tcx> BuilderSpirv<'tcx> { .or_insert_with_key(|DebugFileKey(sf)| { let mut builder = self.builder(Default::default()); - // FIXME(eddyb) remapping might be really bad for being able to - // load the sources the later, maybe it should be done at the - // very end? (just before linking outputting the final SPIR-V) let file_name_op_string_id = builder.string(sf.name.prefer_remapped().to_string()); let file_contents = self diff --git a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs index 87bc233abf..6c5943eb99 100644 --- a/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs +++ b/crates/rustc_codegen_spirv/src/codegen_cx/mod.rs @@ -189,7 +189,7 @@ impl<'tcx> CodegenCx<'tcx> { word, ZombieDecoration { reason: reason.to_string(), - span: SerializedSpan::from_rustc(span, self.tcx.sess.source_map()), + span: SerializedSpan::from_rustc(span, &self.builder), }, ); } diff --git a/crates/rustc_codegen_spirv/src/decorations.rs b/crates/rustc_codegen_spirv/src/decorations.rs index b4a937bab7..2a317ecc7a 100644 --- a/crates/rustc_codegen_spirv/src/decorations.rs +++ b/crates/rustc_codegen_spirv/src/decorations.rs @@ -1,10 +1,16 @@ //! SPIR-V decorations specific to `rustc_codegen_spirv`, produced during //! the original codegen of a crate, and consumed by the `linker`. +use crate::builder_spirv::BuilderSpirv; use rspirv::dr::{Instruction, Module, Operand}; use rspirv::spirv::{Decoration, Op, Word}; -use rustc_span::{source_map::SourceMap, FileName, Pos, Span}; +use rustc_data_structures::fx::{FxHashMap, FxIndexMap}; +use rustc_data_structures::sync::Lrc; +use rustc_span::{source_map::SourceMap, Pos, Span}; +use rustc_span::{FileName, SourceFile}; use serde::{Deserialize, Serialize}; +use smallvec::SmallVec; +use std::borrow::Cow; use std::marker::PhantomData; use std::path::PathBuf; use std::{iter, slice}; @@ -56,7 +62,7 @@ pub trait CustomDecoration: for<'de> Deserialize<'de> + Serialize { Some(( id, LazilyDeserialized { - json, + json: json.into(), _marker: PhantomData, }, )) @@ -87,15 +93,32 @@ type DecodeAllIter<'a, D> = iter::FilterMap< >; /// Helper allowing full deserialization to be avoided where possible. -#[derive(Copy, Clone)] pub struct LazilyDeserialized<'a, D> { - json: &'a str, + json: Cow<'a, str>, _marker: PhantomData, } -impl<'a, D: Deserialize<'a>> LazilyDeserialized<'a, D> { - pub fn deserialize(self) -> D { - serde_json::from_str(self.json).unwrap() +impl Clone for LazilyDeserialized<'_, D> { + fn clone(&self) -> Self { + let Self { ref json, _marker } = *self; + Self { + json: json.clone(), + _marker, + } + } +} + +impl Deserialize<'a>> LazilyDeserialized<'_, D> { + pub fn deserialize(&self) -> D { + serde_json::from_str(&self.json).unwrap() + } + + pub fn into_owned(self) -> LazilyDeserialized<'static, D> { + let Self { json, _marker } = self; + LazilyDeserialized { + json: json.into_owned().into(), + _marker, + } } } @@ -112,59 +135,16 @@ impl CustomDecoration for ZombieDecoration { } /// Representation of a `rustc` `Span` that can be turned into a `Span` again -/// in another compilation, by reloading the file. However, note that this will -/// fail if the file changed since, which is detected using the serialized `hash`. +/// in another compilation, by regenerating the `rustc` `SourceFile`. #[derive(Deserialize, Serialize)] pub struct SerializedSpan { - file: PathBuf, - hash: serde_adapters::SourceFileHash, + file_name: String, lo: u32, hi: u32, } -// HACK(eddyb) `rustc_span` types implement only `rustc_serialize` traits, but -// not `serde` traits, and the easiest workaround is to have our own types. -mod serde_adapters { - use serde::{Deserialize, Serialize}; - - #[derive(Copy, Clone, PartialEq, Eq, Deserialize, Serialize)] - pub enum SourceFileHashAlgorithm { - Md5, - Sha1, - Sha256, - } - - impl From for SourceFileHashAlgorithm { - fn from(kind: rustc_span::SourceFileHashAlgorithm) -> Self { - match kind { - rustc_span::SourceFileHashAlgorithm::Md5 => Self::Md5, - rustc_span::SourceFileHashAlgorithm::Sha1 => Self::Sha1, - rustc_span::SourceFileHashAlgorithm::Sha256 => Self::Sha256, - } - } - } - - #[derive(Copy, Clone, PartialEq, Eq, Deserialize, Serialize)] - pub struct SourceFileHash { - kind: SourceFileHashAlgorithm, - value: [u8; 32], - } - - impl From for SourceFileHash { - fn from(hash: rustc_span::SourceFileHash) -> Self { - let bytes = hash.hash_bytes(); - let mut hash = Self { - kind: hash.kind.into(), - value: Default::default(), - }; - hash.value[..bytes.len()].copy_from_slice(bytes); - hash - } - } -} - impl SerializedSpan { - pub fn from_rustc(span: Span, source_map: &SourceMap) -> Option { + pub fn from_rustc(span: Span, builder: &BuilderSpirv<'_>) -> Option { // Decorations may not always have valid spans. // FIXME(eddyb) reduce the sources of this as much as possible. if span.is_dummy() { @@ -177,42 +157,150 @@ impl SerializedSpan { return None; } - let file = source_map.lookup_source_file(lo); + let file = builder.source_map.lookup_source_file(lo); if !(file.start_pos <= lo && hi <= file.end_pos) { // FIXME(eddyb) broken `Span` - potentially turn this into an assert? return None; } + // NOTE(eddyb) this emits necessary `OpString`/`OpSource` instructions. + builder.def_debug_file(file.clone()); + Some(Self { - file: match &file.name { - // We can only support real files, not "synthetic" ones (which - // are almost never exposed to the compiler backend anyway). - FileName::Real(real_name) => real_name.local_path()?.to_path_buf(), - _ => return None, - }, - hash: file.src_hash.into(), + file_name: file.name.prefer_remapped().to_string(), lo: (lo - file.start_pos).to_u32(), hi: (hi - file.start_pos).to_u32(), }) } +} - pub fn to_rustc(&self, source_map: &SourceMap) -> Option { - let file = source_map.load_file(&self.file).ok()?; +/// Helper type to delay most of the work necessary to turn a `SerializedSpan` +/// back into an usable `Span`, until it's actually needed (i.e. for an error). +pub struct SpanRegenerator<'a> { + source_map: &'a SourceMap, + module: &'a Module, - // If the file has changed since serializing, there's not much we can do, - // other than avoid creating invalid/confusing `Span`s. - // FIXME(eddyb) we could still indicate some of this to the user. - if self.hash != file.src_hash.into() { - return None; + // HACK(eddyb) this is mostly replicating SPIR-T's module-level debuginfo. + spv_debug_files: Option>>, +} + +// HACK(eddyb) this is mostly replicating SPIR-T's module-level debuginfo. +#[derive(Default)] +struct SpvDebugFile<'a> { + /// Source strings from one `OpSource`, and any number of `OpSourceContinued`. + op_source_parts: SmallVec<[&'a str; 1]>, + + regenerated_rustc_source_file: Option>, +} + +impl<'a> SpanRegenerator<'a> { + pub fn new(source_map: &'a SourceMap, module: &'a Module) -> Self { + Self { + source_map, + module, + spv_debug_files: None, } + } + + fn regenerate_rustc_source_file(&mut self, file_name: &str) -> Option<&SourceFile> { + let spv_debug_files = self.spv_debug_files.get_or_insert_with(|| { + let mut op_string_by_id = FxHashMap::default(); + let mut spv_debug_files = FxIndexMap::default(); + let mut insts = self.module.debug_string_source.iter().peekable(); + while let Some(inst) = insts.next() { + match inst.class.opcode { + Op::String => { + op_string_by_id.insert( + inst.result_id.unwrap(), + inst.operands[0].unwrap_literal_string(), + ); + } + Op::Source if inst.operands.len() == 4 => { + let file_name_id = inst.operands[2].unwrap_id_ref(); + if let Some(&file_name) = op_string_by_id.get(&file_name_id) { + let mut file = SpvDebugFile::default(); + file.op_source_parts + .push(inst.operands[3].unwrap_literal_string()); + while let Some(&next_inst) = insts.peek() { + if next_inst.class.opcode != Op::SourceContinued { + break; + } + insts.next(); + + file.op_source_parts + .push(next_inst.operands[0].unwrap_literal_string()); + } + + // FIXME(eddyb) what if the file is already present, + // should it be considered ambiguous overall? + spv_debug_files.insert(file_name, file); + } + } + _ => {} + } + } + spv_debug_files + }); + let spv_debug_file = spv_debug_files.get_mut(file_name)?; + + let file = &mut spv_debug_file.regenerated_rustc_source_file; + if file.is_none() { + // FIXME(eddyb) reduce allocations here by checking if the file is + // already loaded, and not allocating just to compare the source, + // but at least it's cheap when `OpSourceContinued` isn't used. + let src = match &spv_debug_file.op_source_parts[..] { + &[part] => Cow::Borrowed(part), + parts => parts.concat().into(), + }; + + // HACK(eddyb) in case the file has changed, and because `SourceMap` + // is strictly monotonic, we need to come up with some other name. + let mut sm_file_name_candidates = [PathBuf::from(file_name).into()] + .into_iter() + .chain((0..).map(|i| FileName::Custom(format!("outdated({i}) {file_name}")))); + + *file = sm_file_name_candidates.find_map(|sm_file_name_candidate| { + let sf = self + .source_map + .new_source_file(sm_file_name_candidate, src.clone().into_owned()); + + // Only use this `FileName` candidate if we either: + // 1. reused a `SourceFile` with the right `src`/`external_src` + // 2. allocated a new `SourceFile` with our choice of `src` + self.source_map + .ensure_source_file_source_present(sf.clone()); + let sf_src_matches = sf + .src + .as_ref() + .map(|sf_src| sf_src[..] == src[..]) + .or_else(|| { + sf.external_src + .borrow() + .get_source() + .map(|sf_src| sf_src[..] == src[..]) + }) + .unwrap_or(false); + + if sf_src_matches { + Some(sf) + } else { + None + } + }); + } + file.as_deref() + } + + pub fn serialized_span_to_rustc(&mut self, span: &SerializedSpan) -> Option { + let file = self.regenerate_rustc_source_file(&span.file_name[..])?; // Sanity check - assuming `SerializedSpan` isn't corrupted, this assert - // could only ever fail because of a hash collision. - assert!(self.lo <= self.hi && self.hi <= (file.end_pos.0 - file.start_pos.0)); + // could only ever fail because of the file name being ambiguous. + assert!(span.lo <= span.hi && span.hi <= (file.end_pos.0 - file.start_pos.0)); Some(Span::with_root_ctxt( - file.start_pos + Pos::from_u32(self.lo), - file.start_pos + Pos::from_u32(self.hi), + file.start_pos + Pos::from_u32(span.lo), + file.start_pos + Pos::from_u32(span.hi), )) } } diff --git a/crates/rustc_codegen_spirv/src/linker/zombies.rs b/crates/rustc_codegen_spirv/src/linker/zombies.rs index d2e4c14dad..0a90ffd56c 100644 --- a/crates/rustc_codegen_spirv/src/linker/zombies.rs +++ b/crates/rustc_codegen_spirv/src/linker/zombies.rs @@ -1,33 +1,25 @@ //! See documentation on `CodegenCx::zombie` for a description of the zombie system. use super::{get_name, get_names}; -use crate::decorations::{CustomDecoration, ZombieDecoration}; +use crate::decorations::{CustomDecoration, LazilyDeserialized, SpanRegenerator, ZombieDecoration}; use rspirv::dr::{Instruction, Module}; use rspirv::spirv::{Op, Word}; use rustc_data_structures::fx::FxHashMap; use rustc_session::Session; -use rustc_span::{Span, DUMMY_SP}; +use rustc_span::DUMMY_SP; use std::iter::once; +// FIXME(eddyb) change this to chain through IDs instead of wasting allocations. #[derive(Clone)] struct ZombieInfo<'a> { - reason: &'a str, - span: Span, + serialized: &'a LazilyDeserialized<'static, ZombieDecoration>, stack: Vec, } impl<'a> ZombieInfo<'a> { - fn new(reason: &'a str, span: Span) -> Self { - Self { - reason, - span, - stack: Vec::new(), - } - } fn push_stack(&self, word: Word) -> Self { Self { - reason: self.reason, - span: self.span, + serialized: self.serialized, stack: self.stack.iter().cloned().chain(once(word)).collect(), } } @@ -66,7 +58,7 @@ fn is_or_contains_zombie<'h, 'a>( result_zombie.or_else(|| contains_zombie(inst, zombie)) } -fn spread_zombie(module: &mut Module, zombie: &mut FxHashMap>) -> bool { +fn spread_zombie(module: &Module, zombie: &mut FxHashMap>) -> bool { let mut any = false; // globals are easy for inst in module.global_inst_iter() { @@ -123,12 +115,18 @@ fn report_error_zombies( module: &Module, zombie: &FxHashMap>, ) -> super::Result<()> { + let mut span_regen = SpanRegenerator::new(sess.source_map(), module); + let mut result = Ok(()); let mut names = None; for root in super::dce::collect_roots(module) { - if let Some(reason) = zombie.get(&root) { + if let Some(zombie_info) = zombie.get(&root) { + let ZombieDecoration { reason, span } = zombie_info.serialized.deserialize(); + let span = span + .and_then(|span| span_regen.serialized_span_to_rustc(&span)) + .unwrap_or(DUMMY_SP); let names = names.get_or_insert_with(|| get_names(module)); - let stack = reason + let stack = zombie_info .stack .iter() .map(|&s| get_name(names, s).into_owned()); @@ -136,10 +134,7 @@ fn report_error_zombies( .chain(stack) .collect::>() .join("\n"); - result = Err(sess - .struct_span_err(reason.span, reason.reason) - .note(&stack_note) - .emit()); + result = Err(sess.struct_span_err(span, reason).note(&stack_note).emit()); } } result @@ -150,20 +145,25 @@ pub fn remove_zombies( opts: &super::Options, module: &mut Module, ) -> super::Result<()> { + // FIXME(eddyb) combine these two steps to take the original strings, + // instead of effectively cloning them (via `.into_owned()`). let zombies_owned = ZombieDecoration::decode_all(module) - .map(|(id, zombie)| { - let ZombieDecoration { reason, span } = zombie.deserialize(); - let span = span - .and_then(|span| span.to_rustc(sess.source_map())) - .unwrap_or(DUMMY_SP); - (id, (reason, span)) - }) + .map(|(id, zombie)| (id, zombie.into_owned())) .collect::>(); + ZombieDecoration::remove_all(module); + let mut zombies = zombies_owned .iter() - .map(|(id, (reason, span))| (*id, ZombieInfo::new(reason, *span))) + .map(|(id, serialized)| { + ( + *id, + ZombieInfo { + serialized, + stack: vec![], + }, + ) + }) .collect(); - ZombieDecoration::remove_all(module); // Note: This is O(n^2). while spread_zombie(module, &mut zombies) {} @@ -171,22 +171,31 @@ pub fn remove_zombies( // FIXME(eddyb) use `log`/`tracing` instead. if opts.print_all_zombie { - for (&zomb, reason) in &zombies { - let orig = if zombies_owned.iter().any(|&(z, _)| z == zomb) { + for (&zombie_id, zombie_info) in &zombies { + let orig = if zombies_owned.iter().any(|&(z, _)| z == zombie_id) { "original" } else { "infected" }; - println!("zombie'd {} because {} ({})", zomb, reason.reason, orig); + println!( + "zombie'd {} because {} ({})", + zombie_id, + zombie_info.serialized.deserialize().reason, + orig + ); } } if opts.print_zombie { let names = get_names(module); for f in &module.functions { - if let Some(reason) = is_zombie(f.def.as_ref().unwrap(), &zombies) { + if let Some(zombie_info) = is_zombie(f.def.as_ref().unwrap(), &zombies) { let name = get_name(&names, f.def_id().unwrap()); - println!("Function removed {:?} because {:?}", name, reason.reason); + println!( + "Function removed {:?} because {:?}", + name, + zombie_info.serialized.deserialize().reason + ); } } }