miri: make vtable addresses not globally unique

This commit is contained in:
Ralf Jung 2024-08-06 19:02:01 +02:00
parent 60d146580c
commit 5cab8ae4a4
13 changed files with 148 additions and 112 deletions

View File

@ -19,7 +19,7 @@ use rustc_target::spec::abi::Abi as CallAbi;
use super::{
throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation,
ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT,
};
/// Data returned by [`Machine::after_stack_pop`], and consumed by
@ -575,6 +575,14 @@ pub trait Machine<'tcx>: Sized {
{
eval(ecx, val, span, layout)
}
/// Returns the salt to be used for a deduplicated global alloation.
/// If the allocation is for a function, the instance is provided as well
/// (this lets Miri ensure unique addresses for some functions).
fn get_global_alloc_salt(
ecx: &InterpCx<'tcx, Self>,
instance: Option<ty::Instance<'tcx>>,
) -> usize;
}
/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines
@ -677,4 +685,12 @@ pub macro compile_time_machine(<$tcx: lifetime>) {
let (prov, offset) = ptr.into_parts();
Some((prov.alloc_id(), offset, prov.immutable()))
}
#[inline(always)]
fn get_global_alloc_salt(
_ecx: &InterpCx<$tcx, Self>,
_instance: Option<ty::Instance<$tcx>>,
) -> usize {
CTFE_ALLOC_SALT
}
}

View File

@ -195,7 +195,10 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
pub fn fn_ptr(&mut self, fn_val: FnVal<'tcx, M::ExtraFnVal>) -> Pointer<M::Provenance> {
let id = match fn_val {
FnVal::Instance(instance) => self.tcx.reserve_and_set_fn_alloc(instance),
FnVal::Instance(instance) => {
let salt = M::get_global_alloc_salt(self, Some(instance));
self.tcx.reserve_and_set_fn_alloc(instance, salt)
}
FnVal::Other(extra) => {
// FIXME(RalfJung): Should we have a cache here?
let id = self.tcx.reserve_alloc_id();

View File

@ -1008,7 +1008,8 @@ where
// Use cache for immutable strings.
let ptr = if mutbl.is_not() {
// Use dedup'd allocation function.
let id = tcx.allocate_bytes_dedup(str.as_bytes());
let salt = M::get_global_alloc_salt(self, None);
let id = tcx.allocate_bytes_dedup(str.as_bytes(), salt);
// Turn untagged "global" pointers (obtained via `tcx`) into the machine pointer to the allocation.
M::adjust_alloc_root_pointer(&self, Pointer::from(id), Some(kind))?

View File

@ -28,7 +28,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
ensure_monomorphic_enough(*self.tcx, ty)?;
ensure_monomorphic_enough(*self.tcx, poly_trait_ref)?;
let vtable_symbolic_allocation = self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref);
let salt = M::get_global_alloc_salt(self, None);
let vtable_symbolic_allocation =
self.tcx.reserve_and_set_vtable_alloc(ty, poly_trait_ref, salt);
let vtable_ptr = self.global_root_pointer(Pointer::from(vtable_symbolic_allocation))?;
Ok(vtable_ptr.into())
}

View File

@ -13,7 +13,6 @@ use std::num::NonZero;
use std::{fmt, io};
use rustc_ast::LitKind;
use rustc_attr::InlineAttr;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::Lock;
use rustc_errors::ErrorGuaranteed;
@ -46,7 +45,7 @@ pub use self::pointer::{CtfeProvenance, Pointer, PointerArithmetic, Provenance};
pub use self::value::Scalar;
use crate::mir;
use crate::ty::codec::{TyDecoder, TyEncoder};
use crate::ty::{self, GenericArgKind, Instance, Ty, TyCtxt};
use crate::ty::{self, Instance, Ty, TyCtxt};
/// Uniquely identifies one of the following:
/// - A constant
@ -126,11 +125,10 @@ pub fn specialized_encode_alloc_id<'tcx, E: TyEncoder<I = TyCtxt<'tcx>>>(
AllocDiscriminant::Alloc.encode(encoder);
alloc.encode(encoder);
}
GlobalAlloc::Function { instance, unique } => {
GlobalAlloc::Function { instance } => {
trace!("encoding {:?} with {:#?}", alloc_id, instance);
AllocDiscriminant::Fn.encode(encoder);
instance.encode(encoder);
unique.encode(encoder);
}
GlobalAlloc::VTable(ty, poly_trait_ref) => {
trace!("encoding {:?} with {ty:#?}, {poly_trait_ref:#?}", alloc_id);
@ -219,8 +217,7 @@ impl<'s> AllocDecodingSession<'s> {
}
// Now decode the actual data.
let alloc_id = decoder.with_position(pos, |decoder| {
match alloc_kind {
let alloc_id = decoder.with_position(pos, |decoder| match alloc_kind {
AllocDiscriminant::Alloc => {
trace!("creating memory alloc ID");
let alloc = <ConstAllocation<'tcx> as Decodable<_>>::decode(decoder);
@ -231,11 +228,7 @@ impl<'s> AllocDecodingSession<'s> {
trace!("creating fn alloc ID");
let instance = ty::Instance::decode(decoder);
trace!("decoded fn alloc instance: {:?}", instance);
let unique = bool::decode(decoder);
// Here we cannot call `reserve_and_set_fn_alloc` as that would use a query, which
// is not possible in this context. That's why the allocation stores
// whether it is unique or not.
decoder.interner().reserve_and_set_fn_alloc_internal(instance, unique)
decoder.interner().reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT)
}
AllocDiscriminant::VTable => {
trace!("creating vtable alloc ID");
@ -243,7 +236,7 @@ impl<'s> AllocDecodingSession<'s> {
let poly_trait_ref =
<Option<ty::PolyExistentialTraitRef<'_>> as Decodable<D>>::decode(decoder);
trace!("decoded vtable alloc instance: {ty:?}, {poly_trait_ref:?}");
decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref)
decoder.interner().reserve_and_set_vtable_alloc(ty, poly_trait_ref, CTFE_ALLOC_SALT)
}
AllocDiscriminant::Static => {
trace!("creating extern static alloc ID");
@ -251,7 +244,6 @@ impl<'s> AllocDecodingSession<'s> {
trace!("decoded static def-ID: {:?}", did);
decoder.interner().reserve_and_set_static_alloc(did)
}
}
});
*entry = State::Done(alloc_id);
@ -265,12 +257,7 @@ impl<'s> AllocDecodingSession<'s> {
#[derive(Debug, Clone, Eq, PartialEq, Hash, TyDecodable, TyEncodable, HashStable)]
pub enum GlobalAlloc<'tcx> {
/// The alloc ID is used as a function pointer.
Function {
instance: Instance<'tcx>,
/// Stores whether this instance is unique, i.e. all pointers to this function use the same
/// alloc ID.
unique: bool,
},
Function { instance: Instance<'tcx> },
/// This alloc ID points to a symbolic (not-reified) vtable.
VTable(Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>),
/// The alloc ID points to a "lazy" static variable that did not get computed (yet).
@ -323,14 +310,17 @@ impl<'tcx> GlobalAlloc<'tcx> {
}
}
pub const CTFE_ALLOC_SALT: usize = 0;
pub(crate) struct AllocMap<'tcx> {
/// Maps `AllocId`s to their corresponding allocations.
alloc_map: FxHashMap<AllocId, GlobalAlloc<'tcx>>,
/// Used to ensure that statics and functions only get one associated `AllocId`.
//
// FIXME: Should we just have two separate dedup maps for statics and functions each?
dedup: FxHashMap<GlobalAlloc<'tcx>, AllocId>,
/// Used to deduplicate global allocations: functions, vtables, string literals, ...
///
/// The `usize` is a "salt" used by Miri to make deduplication imperfect, thus better emulating
/// the actual guarantees.
dedup: FxHashMap<(GlobalAlloc<'tcx>, usize), AllocId>,
/// The `AllocId` to assign to the next requested ID.
/// Always incremented; never gets smaller.
@ -368,74 +358,40 @@ impl<'tcx> TyCtxt<'tcx> {
/// Reserves a new ID *if* this allocation has not been dedup-reserved before.
/// Should not be used for mutable memory.
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>) -> AllocId {
fn reserve_and_set_dedup(self, alloc: GlobalAlloc<'tcx>, salt: usize) -> AllocId {
let mut alloc_map = self.alloc_map.lock();
if let GlobalAlloc::Memory(mem) = alloc {
if mem.inner().mutability.is_mut() {
bug!("trying to dedup-reserve mutable memory");
}
}
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc) {
let alloc_salt = (alloc, salt);
if let Some(&alloc_id) = alloc_map.dedup.get(&alloc_salt) {
return alloc_id;
}
let id = alloc_map.reserve();
debug!("creating alloc {alloc:?} with id {id:?}");
alloc_map.alloc_map.insert(id, alloc.clone());
alloc_map.dedup.insert(alloc, id);
debug!("creating alloc {:?} with id {id:?}", alloc_salt.0);
alloc_map.alloc_map.insert(id, alloc_salt.0.clone());
alloc_map.dedup.insert(alloc_salt, id);
id
}
/// Generates an `AllocId` for a memory allocation. If the exact same memory has been
/// allocated before, this will return the same `AllocId`.
pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Memory(mem))
pub fn reserve_and_set_memory_dedup(self, mem: ConstAllocation<'tcx>, salt: usize) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Memory(mem), salt)
}
/// Generates an `AllocId` for a static or return a cached one in case this function has been
/// called on the same static before.
pub fn reserve_and_set_static_alloc(self, static_id: DefId) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Static(static_id))
let salt = 0; // Statics have a guaranteed unique address, no salt added.
self.reserve_and_set_dedup(GlobalAlloc::Static(static_id), salt)
}
/// Generates an `AllocId` for a function. The caller must already have decided whether this
/// function obtains a unique AllocId or gets de-duplicated via the cache.
fn reserve_and_set_fn_alloc_internal(self, instance: Instance<'tcx>, unique: bool) -> AllocId {
let alloc = GlobalAlloc::Function { instance, unique };
if unique {
// Deduplicate.
self.reserve_and_set_dedup(alloc)
} else {
// Get a fresh ID.
let mut alloc_map = self.alloc_map.lock();
let id = alloc_map.reserve();
alloc_map.alloc_map.insert(id, alloc);
id
}
}
/// Generates an `AllocId` for a function. Depending on the function type,
/// this might get deduplicated or assigned a new ID each time.
pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>) -> AllocId {
// Functions cannot be identified by pointers, as asm-equal functions can get deduplicated
// by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be
// duplicated across crates. We thus generate a new `AllocId` for every mention of a
// function. This means that `main as fn() == main as fn()` is false, while `let x = main as
// fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify
// certain functions uniquely, e.g. for backtraces. So we identify whether codegen will
// actually emit duplicate functions. It does that when they have non-lifetime generics, or
// when they can be inlined. All other functions are given a unique address.
// This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied
// upon for anything. But if we don't do this, backtraces look terrible.
let is_generic = instance
.args
.into_iter()
.any(|kind| !matches!(kind.unpack(), GenericArgKind::Lifetime(_)));
let can_be_inlined = match self.codegen_fn_attrs(instance.def_id()).inline {
InlineAttr::Never => false,
_ => true,
};
let unique = !is_generic && !can_be_inlined;
self.reserve_and_set_fn_alloc_internal(instance, unique)
/// Generates an `AllocId` for a function. Will get deduplicated.
pub fn reserve_and_set_fn_alloc(self, instance: Instance<'tcx>, salt: usize) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::Function { instance }, salt)
}
/// Generates an `AllocId` for a (symbolic, not-reified) vtable. Will get deduplicated.
@ -443,8 +399,9 @@ impl<'tcx> TyCtxt<'tcx> {
self,
ty: Ty<'tcx>,
poly_trait_ref: Option<ty::PolyExistentialTraitRef<'tcx>>,
salt: usize,
) -> AllocId {
self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref))
self.reserve_and_set_dedup(GlobalAlloc::VTable(ty, poly_trait_ref), salt)
}
/// Interns the `Allocation` and return a new `AllocId`, even if there's already an identical

View File

@ -1438,11 +1438,11 @@ impl<'tcx> TyCtxt<'tcx> {
/// Allocates a read-only byte or string literal for `mir::interpret`.
/// Returns the same `AllocId` if called again with the same bytes.
pub fn allocate_bytes_dedup(self, bytes: &[u8]) -> interpret::AllocId {
pub fn allocate_bytes_dedup(self, bytes: &[u8], salt: usize) -> interpret::AllocId {
// Create an allocation that just contains these bytes.
let alloc = interpret::Allocation::from_bytes_byte_aligned_immutable(bytes);
let alloc = self.mk_const_alloc(alloc);
self.reserve_and_set_memory_dedup(alloc)
self.reserve_and_set_memory_dedup(alloc, salt)
}
/// Returns a range of the start/end indices specified with the

View File

@ -3,7 +3,7 @@ use std::fmt;
use rustc_ast::Mutability;
use rustc_macros::HashStable;
use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar};
use crate::mir::interpret::{alloc_range, AllocId, Allocation, Pointer, Scalar, CTFE_ALLOC_SALT};
use crate::ty::{self, Instance, PolyTraitRef, Ty, TyCtxt};
#[derive(Clone, Copy, PartialEq, HashStable)]
@ -73,6 +73,11 @@ pub(crate) fn vtable_min_entries<'tcx>(
/// Retrieves an allocation that represents the contents of a vtable.
/// Since this is a query, allocations are cached and not duplicated.
///
/// This is an "internal" `AllocId` that should never be used as a value in the interpreted program.
/// The interpreter should use `AllocId` that refer to a `GlobalAlloc::VTable` instead.
/// (This is similar to statics, which also have a similar "internal" `AllocId` storing their
/// initial contents.)
pub(super) fn vtable_allocation_provider<'tcx>(
tcx: TyCtxt<'tcx>,
key: (Ty<'tcx>, Option<ty::PolyExistentialTraitRef<'tcx>>),
@ -114,7 +119,7 @@ pub(super) fn vtable_allocation_provider<'tcx>(
VtblEntry::MetadataDropInPlace => {
if ty.needs_drop(tcx, ty::ParamEnv::reveal_all()) {
let instance = ty::Instance::resolve_drop_in_place(tcx, ty);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT);
let fn_ptr = Pointer::from(fn_alloc_id);
Scalar::from_pointer(fn_ptr, &tcx)
} else {
@ -127,7 +132,7 @@ pub(super) fn vtable_allocation_provider<'tcx>(
VtblEntry::Method(instance) => {
// Prepare the fn ptr we write into the vtable.
let instance = instance.polymorphize(tcx);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance);
let fn_alloc_id = tcx.reserve_and_set_fn_alloc(instance, CTFE_ALLOC_SALT);
let fn_ptr = Pointer::from(fn_alloc_id);
Scalar::from_pointer(fn_ptr, &tcx)
}

View File

@ -2,7 +2,9 @@
use rustc_ast as ast;
use rustc_hir::LangItem;
use rustc_middle::mir::interpret::{Allocation, LitToConstError, LitToConstInput, Scalar};
use rustc_middle::mir::interpret::{
Allocation, LitToConstError, LitToConstInput, Scalar, CTFE_ALLOC_SALT,
};
use rustc_middle::mir::*;
use rustc_middle::thir::*;
use rustc_middle::ty::{
@ -140,7 +142,7 @@ fn lit_to_mir_constant<'tcx>(
ConstValue::Slice { data: allocation, meta: allocation.inner().size().bytes() }
}
(ast::LitKind::ByteStr(data, _), ty::Ref(_, inner_ty, _)) if inner_ty.is_array() => {
let id = tcx.allocate_bytes_dedup(data);
let id = tcx.allocate_bytes_dedup(data, CTFE_ALLOC_SALT);
ConstValue::Scalar(Scalar::from_pointer(id.into(), &tcx))
}
(ast::LitKind::CStr(data, _), ty::Ref(_, inner_ty, _)) if matches!(inner_ty.kind(), ty::Adt(def, _) if tcx.is_lang_item(def.did(), LangItem::CStr)) =>

View File

@ -56,6 +56,7 @@ extern crate either;
extern crate tracing;
// The rustc crates we need
extern crate rustc_attr;
extern crate rustc_apfloat;
extern crate rustc_ast;
extern crate rustc_const_eval;

View File

@ -11,6 +11,7 @@ use rand::rngs::StdRng;
use rand::Rng;
use rand::SeedableRng;
use rustc_attr::InlineAttr;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
#[allow(unused)]
use rustc_data_structures::static_assert_size;
@ -47,10 +48,10 @@ pub const SIGRTMIN: i32 = 34;
/// `SIGRTMAX` - `SIGRTMIN` >= 8 (which is the value of `_POSIX_RTSIG_MAX`)
pub const SIGRTMAX: i32 = 42;
/// Each const has multiple addresses, but only this many. Since const allocations are never
/// deallocated, choosing a new [`AllocId`] and thus base address for each evaluation would
/// produce unbounded memory usage.
const ADDRS_PER_CONST: usize = 16;
/// Each anonymous global (constant, vtable, function pointer, ...) has multiple addresses, but only
/// this many. Since const allocations are never deallocated, choosing a new [`AllocId`] and thus
/// base address for each evaluation would produce unbounded memory usage.
const ADDRS_PER_ANON_GLOBAL: usize = 32;
/// Extra data stored with each stack frame
pub struct FrameExtra<'tcx> {
@ -1372,7 +1373,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
catch_unwind: None,
timing,
is_user_relevant: ecx.machine.is_user_relevant(&frame),
salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_CONST,
salt: ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL,
};
Ok(frame.with_extra(extra))
@ -1518,4 +1519,40 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
Entry::Occupied(oe) => Ok(oe.get().clone()),
}
}
fn get_global_alloc_salt(
ecx: &InterpCx<'tcx, Self>,
instance: Option<ty::Instance<'tcx>>,
) -> usize {
let unique = if let Some(instance) = instance {
// Functions cannot be identified by pointers, as asm-equal functions can get deduplicated
// by the linker (we set the "unnamed_addr" attribute for LLVM) and functions can be
// duplicated across crates. We thus generate a new `AllocId` for every mention of a
// function. This means that `main as fn() == main as fn()` is false, while `let x = main as
// fn(); x == x` is true. However, as a quality-of-life feature it can be useful to identify
// certain functions uniquely, e.g. for backtraces. So we identify whether codegen will
// actually emit duplicate functions. It does that when they have non-lifetime generics, or
// when they can be inlined. All other functions are given a unique address.
// This is not a stable guarantee! The `inline` attribute is a hint and cannot be relied
// upon for anything. But if we don't do this, backtraces look terrible.
let is_generic = instance
.args
.into_iter()
.any(|kind| !matches!(kind.unpack(), ty::GenericArgKind::Lifetime(_)));
let can_be_inlined = match ecx.tcx.codegen_fn_attrs(instance.def_id()).inline {
InlineAttr::Never => false,
_ => true,
};
!is_generic && !can_be_inlined
} else {
// Non-functions are never unique.
false
};
// Always use the same salt if the allocation is unique.
if unique {
CTFE_ALLOC_SALT
} else {
ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
}
}
}

View File

@ -141,7 +141,17 @@ fn unsized_dyn_autoderef() {
}
*/
fn vtable_ptr_eq() {
use std::{fmt, ptr};
// We don't always get the same vtable when casting this to a wide pointer.
let x = &2;
let x_wide = x as &dyn fmt::Display;
assert!((0..256).any(|_| !ptr::eq(x as &dyn fmt::Display, x_wide)));
}
fn main() {
ref_box_dyn();
box_box_trait();
vtable_ptr_eq();
}

View File

@ -82,7 +82,8 @@ fn main() {
assert!(return_fn_ptr(i) == i);
assert!(return_fn_ptr(i) as unsafe fn() -> i32 == i as fn() -> i32 as unsafe fn() -> i32);
// Miri gives different addresses to different reifications of a generic function.
assert!(return_fn_ptr(f) != f);
// at least if we try often enough.
assert!((0..256).any(|_| return_fn_ptr(f) != f));
// However, if we only turn `f` into a function pointer and use that pointer,
// it is equal to itself.
let f2 = f as fn() -> i32;

View File

@ -75,7 +75,8 @@ fn rc_fat_ptr_eq() {
let p = Rc::new(1) as Rc<dyn Debug>;
let a: *const dyn Debug = &*p;
let r = Rc::into_raw(p);
assert!(a == r);
// Only compare the pointer parts, as the vtable might differ.
assert!(a as *const () == r as *const ());
drop(unsafe { Rc::from_raw(r) });
}