From 45ef92731b637a60cbad7cecf5382361bbf40531 Mon Sep 17 00:00:00 2001
From: Kyle Huey <khuey@kylehuey.com>
Date: Fri, 17 Jan 2025 09:44:09 -0800
Subject: [PATCH] When LLVM's location discriminator value limit is exceeded,
 emit locations with dummy spans instead of dropping them entirely

Revert most of #133194 (except the test and the comment fixes). Then refix
not emitting locations at all when the correct location discriminator value
exceeds LLVM's capacity.
---
 compiler/rustc_codegen_gcc/src/debuginfo.rs   | 18 ++---
 .../src/debuginfo/create_scope_map.rs         | 59 +++++++----------
 .../rustc_codegen_llvm/src/debuginfo/mod.rs   |  4 +-
 .../rustc_codegen_ssa/src/mir/debuginfo.rs    |  6 +-
 .../main.rs                                   |  3 +
 .../other.rs                                  |  1 +
 .../proc.rs                                   |  7 ++
 .../rmake.rs                                  | 65 +++++++++++++++++++
 8 files changed, 111 insertions(+), 52 deletions(-)
 create mode 100644 tests/run-make/llvm-location-discriminator-limit-dummy-span/main.rs
 create mode 100644 tests/run-make/llvm-location-discriminator-limit-dummy-span/other.rs
 create mode 100644 tests/run-make/llvm-location-discriminator-limit-dummy-span/proc.rs
 create mode 100644 tests/run-make/llvm-location-discriminator-limit-dummy-span/rmake.rs

diff --git a/compiler/rustc_codegen_gcc/src/debuginfo.rs b/compiler/rustc_codegen_gcc/src/debuginfo.rs
index d3aeb7f3bde..4b84b1dbfd3 100644
--- a/compiler/rustc_codegen_gcc/src/debuginfo.rs
+++ b/compiler/rustc_codegen_gcc/src/debuginfo.rs
@@ -113,15 +113,15 @@ fn make_mir_scope<'gcc, 'tcx>(
     let scope_data = &mir.source_scopes[scope];
     let parent_scope = if let Some(parent) = scope_data.parent_scope {
         make_mir_scope(cx, _instance, mir, variables, debug_context, instantiated, parent);
-        debug_context.scopes[parent].unwrap()
+        debug_context.scopes[parent]
     } else {
         // The root is the function itself.
         let file = cx.sess().source_map().lookup_source_file(mir.span.lo());
-        debug_context.scopes[scope] = Some(DebugScope {
+        debug_context.scopes[scope] = DebugScope {
             file_start_pos: file.start_pos,
             file_end_pos: file.end_position(),
-            ..debug_context.scopes[scope].unwrap()
-        });
+            ..debug_context.scopes[scope]
+        };
         instantiated.insert(scope);
         return;
     };
@@ -130,7 +130,7 @@ fn make_mir_scope<'gcc, 'tcx>(
         if !vars.contains(scope) && scope_data.inlined.is_none() {
             // Do not create a DIScope if there are no variables defined in this
             // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
-            debug_context.scopes[scope] = Some(parent_scope);
+            debug_context.scopes[scope] = parent_scope;
             instantiated.insert(scope);
             return;
         }
@@ -157,12 +157,12 @@ fn make_mir_scope<'gcc, 'tcx>(
     // TODO(tempdragon): dbg_scope: Add support for scope extension here.
     inlined_at.or(p_inlined_at);
 
-    debug_context.scopes[scope] = Some(DebugScope {
+    debug_context.scopes[scope] = DebugScope {
         dbg_scope,
         inlined_at,
         file_start_pos: loc.file.start_pos,
         file_end_pos: loc.file.end_position(),
-    });
+    };
     instantiated.insert(scope);
 }
 
@@ -232,12 +232,12 @@ impl<'gcc, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         }
 
         // Initialize fn debug context (including scopes).
-        let empty_scope = Some(DebugScope {
+        let empty_scope = DebugScope {
             dbg_scope: self.dbg_scope_fn(instance, fn_abi, Some(llfn)),
             inlined_at: None,
             file_start_pos: BytePos(0),
             file_end_pos: BytePos(0),
-        });
+        };
         let mut fn_debug_context = FunctionDebugContext {
             scopes: IndexVec::from_elem(empty_scope, mir.source_scopes.as_slice()),
             inlined_function_scopes: Default::default(),
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
index e545ce386ed..11eb9651af6 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/create_scope_map.rs
@@ -9,7 +9,7 @@ use rustc_middle::mir::{Body, SourceScope};
 use rustc_middle::ty::layout::{FnAbiOf, HasTypingEnv};
 use rustc_middle::ty::{self, Instance};
 use rustc_session::config::DebugInfo;
-use rustc_span::{BytePos, hygiene};
+use rustc_span::{BytePos, DUMMY_SP, hygiene};
 
 use super::metadata::file_metadata;
 use super::utils::DIB;
@@ -85,23 +85,15 @@ fn make_mir_scope<'ll, 'tcx>(
             discriminators,
             parent,
         );
-        if let Some(parent_scope) = debug_context.scopes[parent] {
-            parent_scope
-        } else {
-            // If the parent scope could not be represented then no children
-            // can be either.
-            debug_context.scopes[scope] = None;
-            instantiated.insert(scope);
-            return;
-        }
+        debug_context.scopes[parent]
     } else {
         // The root is the function itself.
         let file = cx.sess().source_map().lookup_source_file(mir.span.lo());
-        debug_context.scopes[scope] = Some(DebugScope {
+        debug_context.scopes[scope] = DebugScope {
             file_start_pos: file.start_pos,
             file_end_pos: file.end_position(),
-            ..debug_context.scopes[scope].unwrap()
-        });
+            ..debug_context.scopes[scope]
+        };
         instantiated.insert(scope);
         return;
     };
@@ -112,7 +104,7 @@ fn make_mir_scope<'ll, 'tcx>(
     {
         // Do not create a DIScope if there are no variables defined in this
         // MIR `SourceScope`, and it's not `inlined`, to avoid debuginfo bloat.
-        debug_context.scopes[scope] = Some(parent_scope);
+        debug_context.scopes[scope] = parent_scope;
         instantiated.insert(scope);
         return;
     }
@@ -145,14 +137,7 @@ fn make_mir_scope<'ll, 'tcx>(
         },
     };
 
-    let mut debug_scope = Some(DebugScope {
-        dbg_scope,
-        inlined_at: parent_scope.inlined_at,
-        file_start_pos: loc.file.start_pos,
-        file_end_pos: loc.file.end_position(),
-    });
-
-    if let Some((_, callsite_span)) = scope_data.inlined {
+    let inlined_at = scope_data.inlined.map(|(_, callsite_span)| {
         let callsite_span = hygiene::walk_chain_collapsed(callsite_span, mir.span);
         let callsite_scope = parent_scope.adjust_dbg_scope_for_span(cx, callsite_span);
         let loc = cx.dbg_loc(callsite_scope, parent_scope.inlined_at, callsite_span);
@@ -175,29 +160,29 @@ fn make_mir_scope<'ll, 'tcx>(
         // Note further that we can't key this hashtable on the span itself,
         // because these spans could have distinct SyntaxContexts. We have
         // to key on exactly what we're giving to LLVM.
-        let inlined_at = match discriminators.entry(callsite_span.lo()) {
+        match discriminators.entry(callsite_span.lo()) {
             Entry::Occupied(mut o) => {
                 *o.get_mut() += 1;
+                // NB: We have to emit *something* here or we'll fail LLVM IR verification
+                // in at least some circumstances (see issue #135322) so if the required
+                // discriminant cannot be encoded fall back to the dummy location.
                 unsafe { llvm::LLVMRustDILocationCloneWithBaseDiscriminator(loc, *o.get()) }
+                    .unwrap_or_else(|| {
+                        cx.dbg_loc(callsite_scope, parent_scope.inlined_at, DUMMY_SP)
+                    })
             }
             Entry::Vacant(v) => {
                 v.insert(0);
-                Some(loc)
-            }
-        };
-        match inlined_at {
-            Some(inlined_at) => {
-                debug_scope.as_mut().unwrap().inlined_at = Some(inlined_at);
-            }
-            None => {
-                // LLVM has a maximum discriminator that it can encode (currently
-                // it uses 12 bits for 4096 possible values). If we exceed that
-                // there is little we can do but drop the debug info.
-                debug_scope = None;
+                loc
             }
         }
-    }
+    });
 
-    debug_context.scopes[scope] = debug_scope;
+    debug_context.scopes[scope] = DebugScope {
+        dbg_scope,
+        inlined_at: inlined_at.or(parent_scope.inlined_at),
+        file_start_pos: loc.file.start_pos,
+        file_end_pos: loc.file.end_position(),
+    };
     instantiated.insert(scope);
 }
diff --git a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
index 755f4816acf..e6778411365 100644
--- a/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
+++ b/compiler/rustc_codegen_llvm/src/debuginfo/mod.rs
@@ -295,12 +295,12 @@ impl<'ll, 'tcx> DebugInfoCodegenMethods<'tcx> for CodegenCx<'ll, 'tcx> {
         }
 
         // Initialize fn debug context (including scopes).
-        let empty_scope = Some(DebugScope {
+        let empty_scope = DebugScope {
             dbg_scope: self.dbg_scope_fn(instance, fn_abi, Some(llfn)),
             inlined_at: None,
             file_start_pos: BytePos(0),
             file_end_pos: BytePos(0),
-        });
+        };
         let mut fn_debug_context = FunctionDebugContext {
             scopes: IndexVec::from_elem(empty_scope, &mir.source_scopes),
             inlined_function_scopes: Default::default(),
diff --git a/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs b/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
index 843a996d2bf..5924c8991ad 100644
--- a/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/debuginfo.rs
@@ -19,9 +19,7 @@ use crate::traits::*;
 
 pub struct FunctionDebugContext<'tcx, S, L> {
     /// Maps from source code to the corresponding debug info scope.
-    /// May be None if the backend is not capable of representing the scope for
-    /// some reason.
-    pub scopes: IndexVec<mir::SourceScope, Option<DebugScope<S, L>>>,
+    pub scopes: IndexVec<mir::SourceScope, DebugScope<S, L>>,
 
     /// Maps from an inlined function to its debug info declaration.
     pub inlined_function_scopes: FxHashMap<Instance<'tcx>, S>,
@@ -232,7 +230,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
         &self,
         source_info: mir::SourceInfo,
     ) -> Option<(Bx::DIScope, Option<Bx::DILocation>, Span)> {
-        let scope = &self.debug_context.as_ref()?.scopes[source_info.scope]?;
+        let scope = &self.debug_context.as_ref()?.scopes[source_info.scope];
         let span = hygiene::walk_chain_collapsed(source_info.span, self.mir.span);
         Some((scope.adjust_dbg_scope_for_span(self.cx, span), scope.inlined_at, span))
     }
diff --git a/tests/run-make/llvm-location-discriminator-limit-dummy-span/main.rs b/tests/run-make/llvm-location-discriminator-limit-dummy-span/main.rs
new file mode 100644
index 00000000000..421eb4331b3
--- /dev/null
+++ b/tests/run-make/llvm-location-discriminator-limit-dummy-span/main.rs
@@ -0,0 +1,3 @@
+fn main() {
+    other::big_function();
+}
diff --git a/tests/run-make/llvm-location-discriminator-limit-dummy-span/other.rs b/tests/run-make/llvm-location-discriminator-limit-dummy-span/other.rs
new file mode 100644
index 00000000000..a3ff578ebe4
--- /dev/null
+++ b/tests/run-make/llvm-location-discriminator-limit-dummy-span/other.rs
@@ -0,0 +1 @@
+proc::declare_big_function!();
diff --git a/tests/run-make/llvm-location-discriminator-limit-dummy-span/proc.rs b/tests/run-make/llvm-location-discriminator-limit-dummy-span/proc.rs
new file mode 100644
index 00000000000..59d17a9be59
--- /dev/null
+++ b/tests/run-make/llvm-location-discriminator-limit-dummy-span/proc.rs
@@ -0,0 +1,7 @@
+extern crate proc_macro;
+use proc_macro::TokenStream;
+
+#[proc_macro]
+pub fn declare_big_function(_input: TokenStream) -> TokenStream {
+    include_str!("./generated.rs").parse().unwrap()
+}
diff --git a/tests/run-make/llvm-location-discriminator-limit-dummy-span/rmake.rs b/tests/run-make/llvm-location-discriminator-limit-dummy-span/rmake.rs
new file mode 100644
index 00000000000..2727effe818
--- /dev/null
+++ b/tests/run-make/llvm-location-discriminator-limit-dummy-span/rmake.rs
@@ -0,0 +1,65 @@
+//! Regression test for <https://github.com/rust-lang/rust/issues/135332>.
+//!
+//! We can't simply drop debuginfo location spans when LLVM's location discriminator value limit is
+//! reached. Otherwise, with `-Z verify-llvm-ir` and fat LTO, LLVM will report a broken module for
+//!
+//! ```text
+//! inlinable function call in a function with debug info must have a !dbg location
+//! ```
+
+//@ ignore-cross-compile
+//@ needs-dynamic-linking
+//@ only-nightly (requires unstable rustc flag)
+
+#![deny(warnings)]
+
+use run_make_support::{dynamic_lib_name, rfs, rust_lib_name, rustc};
+
+// Synthesize a function that will have a large (`n`) number of functions
+// MIR-inlined into it. When combined with a proc-macro, all of these inline
+// callsites will have the same span, forcing rustc to use the DWARF
+// discriminator to distinguish between them. LLVM's capacity to store that
+// discriminator is not infinite (currently it allocates 12 bits for a
+// maximum value of 4096) so if this function gets big enough rustc's error
+// handling path will be exercised.
+fn generate_program(n: u32) -> String {
+    let mut program = String::from("pub type BigType = Vec<Vec<String>>;\n\n");
+    program.push_str("pub fn big_function() -> BigType {\n");
+    program.push_str("    vec![\n");
+    for i in 1..=n {
+        program.push_str(&format!("vec![\"string{}\".to_owned()],\n", i));
+    }
+    program.push_str("    ]\n");
+    program.push_str("}\n");
+    program
+}
+
+fn main() {
+    // The reported threshold is around 1366 (4096/3), but let's bump it to
+    // around 1500 to be less sensitive.
+    rfs::write("generated.rs", generate_program(1500));
+
+    rustc()
+        .input("proc.rs")
+        .crate_type("proc-macro")
+        .edition("2021")
+        .arg("-Cdebuginfo=line-tables-only")
+        .run();
+    rustc()
+        .extern_("proc", dynamic_lib_name("proc"))
+        .input("other.rs")
+        .crate_type("rlib")
+        .edition("2021")
+        .opt_level("3")
+        .arg("-Cdebuginfo=line-tables-only")
+        .run();
+    rustc()
+        .extern_("other", rust_lib_name("other"))
+        .input("main.rs")
+        .edition("2021")
+        .opt_level("3")
+        .arg("-Cdebuginfo=line-tables-only")
+        .arg("-Clto=fat")
+        .arg("-Zverify-llvm-ir")
+        .run();
+}