From 41b98da42d0500e38b9dd85c82110ba177ac4de1 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Tue, 2 Jul 2024 21:05:22 +0200 Subject: [PATCH] Miri function identity hack: account for possible inlining --- .../rustc_codegen_cranelift/src/constant.rs | 8 +- compiler/rustc_codegen_gcc/src/common.rs | 2 +- compiler/rustc_codegen_llvm/src/common.rs | 4 +- .../rustc_const_eval/src/interpret/memory.rs | 14 ++-- .../src/interpret/validity.rs | 2 +- .../rustc_middle/src/mir/interpret/mod.rs | 73 +++++++++++++------ compiler/rustc_middle/src/mir/pretty.rs | 2 +- compiler/rustc_middle/src/ty/print/pretty.rs | 4 +- compiler/rustc_monomorphize/src/collector.rs | 8 +- compiler/rustc_passes/src/reachable.rs | 2 +- .../rustc_smir/src/rustc_smir/convert/mir.rs | 2 +- src/tools/miri/src/shims/backtrace.rs | 2 +- .../miri/tests/pass/function_pointers.rs | 3 +- .../miri/tests/pass/issues/issue-91636.rs | 1 + 14 files changed, 80 insertions(+), 47 deletions(-) diff --git a/compiler/rustc_codegen_cranelift/src/constant.rs b/compiler/rustc_codegen_cranelift/src/constant.rs index 87c5da3b7c3..9f7b95261d5 100644 --- a/compiler/rustc_codegen_cranelift/src/constant.rs +++ b/compiler/rustc_codegen_cranelift/src/constant.rs @@ -155,7 +155,7 @@ pub(crate) fn codegen_const_value<'tcx>( fx.bcx.ins().global_value(fx.pointer_type, local_data_id) } } - GlobalAlloc::Function(instance) => { + GlobalAlloc::Function { instance, .. } => { let func_id = crate::abi::import_function(fx.tcx, fx.module, instance); let local_func_id = fx.module.declare_func_in_func(func_id, &mut fx.bcx.func); @@ -351,7 +351,9 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant TodoItem::Alloc(alloc_id) => { let alloc = match tcx.global_alloc(alloc_id) { GlobalAlloc::Memory(alloc) => alloc, - GlobalAlloc::Function(_) | GlobalAlloc::Static(_) | GlobalAlloc::VTable(..) => { + GlobalAlloc::Function { .. } + | GlobalAlloc::Static(_) + | GlobalAlloc::VTable(..) => { unreachable!() } }; @@ -415,7 +417,7 @@ fn define_all_allocs(tcx: TyCtxt<'_>, module: &mut dyn Module, cx: &mut Constant let reloc_target_alloc = tcx.global_alloc(alloc_id); let data_id = match reloc_target_alloc { - GlobalAlloc::Function(instance) => { + GlobalAlloc::Function { instance, .. } => { assert_eq!(addend, 0); let func_id = crate::abi::import_function(tcx, module, instance.polymorphize(tcx)); diff --git a/compiler/rustc_codegen_gcc/src/common.rs b/compiler/rustc_codegen_gcc/src/common.rs index 230fe4f5871..fa8a1ec037c 100644 --- a/compiler/rustc_codegen_gcc/src/common.rs +++ b/compiler/rustc_codegen_gcc/src/common.rs @@ -220,7 +220,7 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> { } value } - GlobalAlloc::Function(fn_instance) => self.get_fn_addr(fn_instance), + GlobalAlloc::Function { instance, .. } => self.get_fn_addr(instance), GlobalAlloc::VTable(ty, trait_ref) => { let alloc = self .tcx diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index d42c6ed827a..fe64649cf70 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -289,8 +289,8 @@ impl<'ll, 'tcx> ConstMethods<'tcx> for CodegenCx<'ll, 'tcx> { (value, AddressSpace::DATA) } } - GlobalAlloc::Function(fn_instance) => ( - self.get_fn_addr(fn_instance.polymorphize(self.tcx)), + GlobalAlloc::Function { instance, .. } => ( + self.get_fn_addr(instance.polymorphize(self.tcx)), self.data_layout().instruction_address_space, ), GlobalAlloc::VTable(ty, trait_ref) => { diff --git a/compiler/rustc_const_eval/src/interpret/memory.rs b/compiler/rustc_const_eval/src/interpret/memory.rs index 9d0c4908225..36fe8dfdd29 100644 --- a/compiler/rustc_const_eval/src/interpret/memory.rs +++ b/compiler/rustc_const_eval/src/interpret/memory.rs @@ -308,7 +308,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let Some((alloc_kind, mut alloc)) = self.memory.alloc_map.remove(&alloc_id) else { // Deallocating global memory -- always an error return Err(match self.tcx.try_get_global_alloc(alloc_id) { - Some(GlobalAlloc::Function(..)) => { + Some(GlobalAlloc::Function { .. }) => { err_ub_custom!( fluent::const_eval_invalid_dealloc, alloc_id = alloc_id, @@ -555,7 +555,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { // Memory of a constant or promoted or anonymous memory referenced by a static. (mem, None) } - Some(GlobalAlloc::Function(..)) => throw_ub!(DerefFunctionPointer(id)), + Some(GlobalAlloc::Function { .. }) => throw_ub!(DerefFunctionPointer(id)), Some(GlobalAlloc::VTable(..)) => throw_ub!(DerefVTablePointer(id)), None => throw_ub!(PointerUseAfterFree(id, CheckInAllocMsg::MemoryAccessTest)), Some(GlobalAlloc::Static(def_id)) => { @@ -828,7 +828,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { let alloc = alloc.inner(); (alloc.size(), alloc.align, AllocKind::LiveData) } - Some(GlobalAlloc::Function(_)) => bug!("We already checked function pointers above"), + Some(GlobalAlloc::Function { .. }) => { + bug!("We already checked function pointers above") + } Some(GlobalAlloc::VTable(..)) => { // No data to be accessed here. But vtables are pointer-aligned. return (Size::ZERO, self.tcx.data_layout.pointer_align.abi, AllocKind::VTable); @@ -865,7 +867,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> { Some(FnVal::Other(*extra)) } else { match self.tcx.try_get_global_alloc(id) { - Some(GlobalAlloc::Function(instance)) => Some(FnVal::Instance(instance)), + Some(GlobalAlloc::Function { instance, .. }) => Some(FnVal::Instance(instance)), _ => None, } } @@ -1056,8 +1058,8 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> { alloc.inner(), )?; } - Some(GlobalAlloc::Function(func)) => { - write!(fmt, " (fn: {func})")?; + Some(GlobalAlloc::Function { instance, .. }) => { + write!(fmt, " (fn: {instance})")?; } Some(GlobalAlloc::VTable(ty, Some(trait_ref))) => { write!(fmt, " (vtable: impl {trait_ref} for {ty})")?; diff --git a/compiler/rustc_const_eval/src/interpret/validity.rs b/compiler/rustc_const_eval/src/interpret/validity.rs index add48e1b186..7fea0617666 100644 --- a/compiler/rustc_const_eval/src/interpret/validity.rs +++ b/compiler/rustc_const_eval/src/interpret/validity.rs @@ -745,7 +745,7 @@ fn mutability<'tcx>(ecx: &InterpCx<'tcx, impl Machine<'tcx>>, alloc_id: AllocId) } } GlobalAlloc::Memory(alloc) => alloc.inner().mutability, - GlobalAlloc::Function(..) | GlobalAlloc::VTable(..) => { + GlobalAlloc::Function { .. } | GlobalAlloc::VTable(..) => { // These are immutable, we better don't allow mutable pointers here. Mutability::Not } diff --git a/compiler/rustc_middle/src/mir/interpret/mod.rs b/compiler/rustc_middle/src/mir/interpret/mod.rs index 16093cfca6a..4e95e600b5a 100644 --- a/compiler/rustc_middle/src/mir/interpret/mod.rs +++ b/compiler/rustc_middle/src/mir/interpret/mod.rs @@ -18,6 +18,7 @@ use smallvec::{smallvec, SmallVec}; use tracing::{debug, trace}; use rustc_ast::LitKind; +use rustc_attr::InlineAttr; use rustc_data_structures::fx::FxHashMap; use rustc_data_structures::sync::{HashMapExt, Lock}; use rustc_errors::ErrorGuaranteed; @@ -134,10 +135,11 @@ pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder>>( AllocDiscriminant::Alloc.encode(encoder); alloc.encode(encoder); } - GlobalAlloc::Function(fn_instance) => { - trace!("encoding {:?} with {:#?}", alloc_id, fn_instance); + GlobalAlloc::Function { instance, unique } => { + trace!("encoding {:?} with {:#?}", alloc_id, instance); AllocDiscriminant::Fn.encode(encoder); - fn_instance.encode(encoder); + instance.encode(encoder); + unique.encode(encoder); } GlobalAlloc::VTable(ty, poly_trait_ref) => { trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id); @@ -285,7 +287,12 @@ impl<'s> AllocDecodingSession<'s> { trace!("creating fn alloc ID"); let instance = ty::Instance::decode(decoder); trace!("decoded fn alloc instance: {:?}", instance); - let alloc_id = decoder.interner().reserve_and_set_fn_alloc(instance); + let unique = bool::decode(decoder); + // Here we cannot call `reserve_and_set_fn_alloc` as that would use a query, which + // is not possible in this context. That's why the allocation stores + // whether it is unique or not. + let alloc_id = + decoder.interner().reserve_and_set_fn_alloc_internal(instance, unique); alloc_id } AllocDiscriminant::VTable => { @@ -323,7 +330,12 @@ impl<'s> AllocDecodingSession<'s> { #[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)] pub enum GlobalAlloc<'tcx> { /// The alloc ID is used as a function pointer. - Function(Instance<'tcx>), + Function { + instance: Instance<'tcx>, + /// Stores whether this instance is unique, i.e. all pointers to this function use the same + /// alloc ID. + unique: bool, + }, /// This alloc ID points to a symbolic (not-reified) vtable. VTable(Ty<'tcx>, Option>), /// The alloc ID points to a "lazy" static variable that did not get computed (yet). @@ -349,7 +361,7 @@ impl<'tcx> GlobalAlloc<'tcx> { #[inline] pub fn unwrap_fn(&self) -> Instance<'tcx> { match *self { - GlobalAlloc::Function(instance) => instance, + GlobalAlloc::Function { instance, .. } => instance, _ => bug!("expected function, got {:?}", self), } } @@ -368,7 +380,7 @@ impl<'tcx> GlobalAlloc<'tcx> { #[inline] pub fn address_space(&self, cx: &impl HasDataLayout) -> AddressSpace { match self { - GlobalAlloc::Function(..) => cx.data_layout().instruction_address_space, + GlobalAlloc::Function { .. } => cx.data_layout().instruction_address_space, GlobalAlloc::Static(..) | GlobalAlloc::Memory(..) | GlobalAlloc::VTable(..) => { AddressSpace::DATA } @@ -426,7 +438,7 @@ impl<'tcx> TyCtxt<'tcx> { fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId { let mut alloc_map = self.alloc_map.lock(); match alloc { - GlobalAlloc::Function(..) | GlobalAlloc::Static(..) | GlobalAlloc::VTable(..) => {} + GlobalAlloc::Function { .. } | GlobalAlloc::Static(..) | GlobalAlloc::VTable(..) => {} GlobalAlloc::Memory(..) => bug!("Trying to dedup-reserve memory with real data!"), } if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) { @@ -445,30 +457,45 @@ impl<'tcx> TyCtxt<'tcx> { self.reserve_and_set_dedup(GlobalAlloc::Static(static_id)) } + /// Generates an `AllocId` for a function. The caller must already have decided whether this + /// function obtains a unique AllocId or gets de-duplicated via the cache. + fn reserve_and_set_fn_alloc_internal(self, instance: Instance<'tcx>, unique: bool) -> AllocId { + let alloc = GlobalAlloc::Function { instance, unique }; + if unique { + // Deduplicate. + self.reserve_and_set_dedup(alloc) + } else { + // Get a fresh ID. + let mut alloc_map = self.alloc_map.lock(); + let id = alloc_map.reserve(); + alloc_map.alloc_map.insert(id, alloc); + id + } + } + /// Generates an `AllocId` for a function. Depending on the function type, /// this might get deduplicated or assigned a new ID each time. pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>) -> AllocId { // Functions cannot be identified by pointers, as asm-equal functions can get deduplicated // by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be - // duplicated across crates. - // We thus generate a new `AllocId` for every mention of a function. This means that - // `main as fn() == main as fn()` is false, while `let x = main as fn(); x == x` is true. - // However, formatting code relies on function identity (see #58320), so we only do - // this for generic functions. Lifetime parameters are ignored. + // duplicated across crates. We thus generate a new `AllocId` for every mention of a + // function. This means that `main as fn() == main as fn()` is false, while `let x = main as + // fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify + // certain functions uniquely, e.g. for backtraces. So we identify whether codegen will + // actually emit duplicate functions. It does that when they have non-lifetime generics, or + // when they can be inlined. All other functions are given a unique address. + // This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied + // upon for anything. But if we don't do this, backtraces look terrible. let is_generic = instance .args .into_iter() .any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_))); - if is_generic { - // Get a fresh ID. - let mut alloc_map = self.alloc_map.lock(); - let id = alloc_map.reserve(); - alloc_map.alloc_map.insert(id, GlobalAlloc::Function(instance)); - id - } else { - // Deduplicate. - self.reserve_and_set_dedup(GlobalAlloc::Function(instance)) - } + let can_be_inlined = match self.codegen_fn_attrs(instance.def_id()).inline { + InlineAttr::Never => false, + _ => true, + }; + let unique = !is_generic && !can_be_inlined; + self.reserve_and_set_fn_alloc_internal(instance, unique) } /// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated. diff --git a/compiler/rustc_middle/src/mir/pretty.rs b/compiler/rustc_middle/src/mir/pretty.rs index 4657f4dcf81..4453ce44b03 100644 --- a/compiler/rustc_middle/src/mir/pretty.rs +++ b/compiler/rustc_middle/src/mir/pretty.rs @@ -1449,7 +1449,7 @@ pub fn write_allocations<'tcx>( // This can't really happen unless there are bugs, but it doesn't cost us anything to // gracefully handle it and allow buggy rustc to be debugged via allocation printing. None => write!(w, " (deallocated)")?, - Some(GlobalAlloc::Function(inst)) => write!(w, " (fn: {inst})")?, + Some(GlobalAlloc::Function { instance, .. }) => write!(w, " (fn: {instance})")?, Some(GlobalAlloc::VTable(ty, Some(trait_ref))) => { write!(w, " (vtable: impl {trait_ref} for {ty})")? } diff --git a/compiler/rustc_middle/src/ty/print/pretty.rs b/compiler/rustc_middle/src/ty/print/pretty.rs index 19700353f59..3f08ab33e10 100644 --- a/compiler/rustc_middle/src/ty/print/pretty.rs +++ b/compiler/rustc_middle/src/ty/print/pretty.rs @@ -1667,7 +1667,7 @@ pub trait PrettyPrinter<'tcx>: Printer<'tcx> + fmt::Write { Some(GlobalAlloc::Static(def_id)) => { p!(write("", def_id)) } - Some(GlobalAlloc::Function(_)) => p!(""), + Some(GlobalAlloc::Function { .. }) => p!(""), Some(GlobalAlloc::VTable(..)) => p!(""), None => p!(""), } @@ -1679,7 +1679,7 @@ pub trait PrettyPrinter<'tcx>: Printer<'tcx> + fmt::Write { ty::FnPtr(_) => { // FIXME: We should probably have a helper method to share code with the "Byte strings" // printing above (which also has to handle pointers to all sorts of things). - if let Some(GlobalAlloc::Function(instance)) = + if let Some(GlobalAlloc::Function { instance, .. }) = self.tcx().try_get_global_alloc(prov.alloc_id()) { self.typed_value( diff --git a/compiler/rustc_monomorphize/src/collector.rs b/compiler/rustc_monomorphize/src/collector.rs index 235743fccc8..ec16b716c10 100644 --- a/compiler/rustc_monomorphize/src/collector.rs +++ b/compiler/rustc_monomorphize/src/collector.rs @@ -1223,10 +1223,10 @@ fn collect_alloc<'tcx>(tcx: TyCtxt<'tcx>, alloc_id: AllocId, output: &mut MonoIt }); } } - GlobalAlloc::Function(fn_instance) => { - if should_codegen_locally(tcx, fn_instance) { - trace!("collecting {:?} with {:#?}", alloc_id, fn_instance); - output.push(create_fn_mono_item(tcx, fn_instance, DUMMY_SP)); + GlobalAlloc::Function { instance, .. } => { + if should_codegen_locally(tcx, instance) { + trace!("collecting {:?} with {:#?}", alloc_id, instance); + output.push(create_fn_mono_item(tcx, instance, DUMMY_SP)); } } GlobalAlloc::VTable(ty, trait_ref) => { diff --git a/compiler/rustc_passes/src/reachable.rs b/compiler/rustc_passes/src/reachable.rs index 6dd8eaf7e67..dee8ba7e87d 100644 --- a/compiler/rustc_passes/src/reachable.rs +++ b/compiler/rustc_passes/src/reachable.rs @@ -310,7 +310,7 @@ impl<'tcx> ReachableContext<'tcx> { GlobalAlloc::Static(def_id) => { self.propagate_item(Res::Def(self.tcx.def_kind(def_id), def_id)) } - GlobalAlloc::Function(instance) => { + GlobalAlloc::Function { instance, .. } => { // Manually visit to actually see the instance's `DefId`. Type visitors won't see it self.propagate_item(Res::Def( self.tcx.def_kind(instance.def_id()), diff --git a/compiler/rustc_smir/src/rustc_smir/convert/mir.rs b/compiler/rustc_smir/src/rustc_smir/convert/mir.rs index f15b82d0c03..9d5a14b5145 100644 --- a/compiler/rustc_smir/src/rustc_smir/convert/mir.rs +++ b/compiler/rustc_smir/src/rustc_smir/convert/mir.rs @@ -709,7 +709,7 @@ impl<'tcx> Stable<'tcx> for mir::interpret::GlobalAlloc<'tcx> { fn stable(&self, tables: &mut Tables<'_>) -> Self::T { match self { - mir::interpret::GlobalAlloc::Function(instance) => { + mir::interpret::GlobalAlloc::Function { instance, .. } => { GlobalAlloc::Function(instance.stable(tables)) } mir::interpret::GlobalAlloc::VTable(ty, trait_ref) => { diff --git a/src/tools/miri/src/shims/backtrace.rs b/src/tools/miri/src/shims/backtrace.rs index 06be9c1e63e..24a4b5f26a9 100644 --- a/src/tools/miri/src/shims/backtrace.rs +++ b/src/tools/miri/src/shims/backtrace.rs @@ -119,7 +119,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let (alloc_id, offset, _prov) = this.ptr_get_alloc_id(ptr)?; // This has to be an actual global fn ptr, not a dlsym function. - let fn_instance = if let Some(GlobalAlloc::Function(instance)) = + let fn_instance = if let Some(GlobalAlloc::Function { instance, .. }) = this.tcx.try_get_global_alloc(alloc_id) { instance diff --git a/src/tools/miri/tests/pass/function_pointers.rs b/src/tools/miri/tests/pass/function_pointers.rs index 36679b7180a..2aa3ebf2dd0 100644 --- a/src/tools/miri/tests/pass/function_pointers.rs +++ b/src/tools/miri/tests/pass/function_pointers.rs @@ -23,6 +23,7 @@ fn h(i: i32, j: i32) -> i32 { j * i * 7 } +#[inline(never)] fn i() -> i32 { 73 } @@ -77,7 +78,7 @@ fn main() { assert_eq!(indirect_mut3(h), 210); assert_eq!(indirect_once3(h), 210); // Check that `i` always has the same address. This is not guaranteed - // but Miri currently uses a fixed address for monomorphic functions. + // but Miri currently uses a fixed address for non-inlineable monomorphic functions. assert!(return_fn_ptr(i) == i); assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32); // Miri gives different addresses to different reifications of a generic function. diff --git a/src/tools/miri/tests/pass/issues/issue-91636.rs b/src/tools/miri/tests/pass/issues/issue-91636.rs index 21000bb68d2..00370165812 100644 --- a/src/tools/miri/tests/pass/issues/issue-91636.rs +++ b/src/tools/miri/tests/pass/issues/issue-91636.rs @@ -10,6 +10,7 @@ impl Function { } } +#[inline(never)] fn dummy(_: &str) {} fn main() {