Rollup merge of #122233 - RalfJung:custom-alloc-box, r=oli-obk

miri: do not apply aliasing restrictions to Box with custom allocator

This is the Miri side of https://github.com/rust-lang/rust/pull/122018. The "intrinsics with body" made this much more pleasant. :)

Fixes https://github.com/rust-lang/miri/issues/3341.
r? `@oli-obk`
This commit is contained in:
Matthias Krüger 2024-03-09 16:21:20 +01:00 committed by GitHub
commit b14e8054af
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 250 additions and 35 deletions

View File

@ -17,8 +17,8 @@ use rustc_middle::mir::interpret::{
ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance,
ValidationErrorInfo, ValidationErrorKind, ValidationErrorKind::*,
};
use rustc_middle::ty;
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, Ty};
use rustc_span::symbol::{sym, Symbol};
use rustc_target::abi::{
Abi, FieldIdx, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange,
@ -783,7 +783,11 @@ impl<'rt, 'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>> ValueVisitor<'mir, 'tcx, M>
}
#[inline]
fn visit_box(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
fn visit_box(
&mut self,
_box_ty: Ty<'tcx>,
op: &OpTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.check_safe_pointer(op, PointerKind::Box)?;
Ok(())
}

View File

@ -3,7 +3,7 @@
use rustc_index::IndexVec;
use rustc_middle::mir::interpret::InterpResult;
use rustc_middle::ty;
use rustc_middle::ty::{self, Ty};
use rustc_target::abi::FieldIdx;
use rustc_target::abi::{FieldsShape, VariantIdx, Variants};
@ -47,10 +47,10 @@ pub trait ValueVisitor<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>>: Sized {
Ok(())
}
/// Visits the given value as the pointer of a `Box`. There is nothing to recurse into.
/// The type of `v` will be a raw pointer, but this is a field of `Box<T>` and the
/// pointee type is the actual `T`.
/// The type of `v` will be a raw pointer to `T`, but this is a field of `Box<T>` and the
/// pointee type is the actual `T`. `box_ty` provides the full type of the `Box` itself.
#[inline(always)]
fn visit_box(&mut self, _v: &Self::V) -> InterpResult<'tcx> {
fn visit_box(&mut self, _box_ty: Ty<'tcx>, _v: &Self::V) -> InterpResult<'tcx> {
Ok(())
}
@ -144,7 +144,7 @@ pub trait ValueVisitor<'mir, 'tcx: 'mir, M: Machine<'mir, 'tcx>>: Sized {
assert_eq!(nonnull_ptr.layout().fields.count(), 1);
let raw_ptr = self.ecx().project_field(&nonnull_ptr, 0)?; // the actual raw ptr
// ... whose only field finally is a raw ptr we can dereference.
self.visit_box(&raw_ptr)?;
self.visit_box(ty, &raw_ptr)?;
// The second `Box` field is the allocator, which we recursively check for validity
// like in regular structs.

View File

@ -657,6 +657,10 @@ pub fn check_intrinsic_type(
sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)),
sym::simd_shuffle_generic => (2, 1, vec![param(0), param(0)], param(1)),
sym::retag_box_to_raw => {
(2, 0, vec![Ty::new_mut_ptr(tcx, param(0))], Ty::new_mut_ptr(tcx, param(0)))
}
other => {
tcx.dcx().emit_err(UnrecognizedIntrinsicFunction { span, name: other });
return;

View File

@ -2008,13 +2008,10 @@ impl<'tcx> Ty<'tcx> {
// Single-argument Box is always global. (for "minicore" tests)
return true;
};
if let Some(alloc_adt) = alloc.expect_ty().ty_adt_def() {
alloc.expect_ty().ty_adt_def().is_some_and(|alloc_adt| {
let global_alloc = tcx.require_lang_item(LangItem::GlobalAlloc, None);
alloc_adt.did() == global_alloc
} else {
// Allocator is not an ADT...
false
}
})
}
_ => false,
}

View File

@ -1450,6 +1450,7 @@ symbols! {
residual,
result,
resume,
retag_box_to_raw,
return_position_impl_trait_in_trait,
return_type_notation,
rhs,

View File

@ -155,6 +155,7 @@ use core::error::Error;
use core::fmt;
use core::future::Future;
use core::hash::{Hash, Hasher};
use core::intrinsics::retag_box_to_raw;
use core::iter::FusedIterator;
use core::marker::Tuple;
use core::marker::Unsize;
@ -1110,8 +1111,16 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
#[unstable(feature = "allocator_api", issue = "32838")]
#[inline]
pub fn into_raw_with_allocator(b: Self) -> (*mut T, A) {
let (leaked, alloc) = Box::into_unique(b);
(leaked.as_ptr(), alloc)
// This is the transition point from `Box` to raw pointers. For Stacked Borrows, these casts
// are relevant -- if this is a global allocator Box and we just get the pointer from `b.0`,
// it will have `Unique` permission, which is not what we want from a raw pointer. We could
// fix that by going through `&mut`, but then if this is *not* a global allocator Box, we'd
// be adding uniqueness assertions that we do not want. So for Miri's sake we pass this
// pointer through an intrinsic for box-to-raw casts, which can do the right thing wrt the
// aliasing model.
let b = mem::ManuallyDrop::new(b);
let alloc = unsafe { ptr::read(&b.1) };
(unsafe { retag_box_to_raw::<T, A>(b.0.as_ptr()) }, alloc)
}
#[unstable(
@ -1122,13 +1131,8 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
#[inline]
#[doc(hidden)]
pub fn into_unique(b: Self) -> (Unique<T>, A) {
// Box is recognized as a "unique pointer" by Stacked Borrows, but internally it is a
// raw pointer for the type system. Turning it directly into a raw pointer would not be
// recognized as "releasing" the unique pointer to permit aliased raw accesses,
// so all raw pointer methods have to go through `Box::leak`. Turning *that* to a raw pointer
// behaves correctly.
let alloc = unsafe { ptr::read(&b.1) };
(Unique::from(Box::leak(b)), alloc)
let (ptr, alloc) = Box::into_raw_with_allocator(b);
unsafe { (Unique::from(&mut *ptr), alloc) }
}
/// Returns a reference to the underlying allocator.
@ -1184,7 +1188,7 @@ impl<T: ?Sized, A: Allocator> Box<T, A> {
where
A: 'a,
{
unsafe { &mut *mem::ManuallyDrop::new(b).0.as_ptr() }
unsafe { &mut *Box::into_raw(b) }
}
/// Converts a `Box<T>` into a `Pin<Box<T>>`. If `T` does not implement [`Unpin`], then

View File

@ -2695,6 +2695,19 @@ pub unsafe fn vtable_size(_ptr: *const ()) -> usize {
unreachable!()
}
/// Retag a box pointer as part of casting it to a raw pointer. This is the `Box` equivalent of
/// `(x: &mut T) as *mut T`. The input pointer must be the pointer of a `Box` (passed as raw pointer
/// to avoid all questions around move semantics and custom allocators), and `A` must be the `Box`'s
/// allocator.
#[unstable(feature = "core_intrinsics", issue = "none")]
#[rustc_nounwind]
#[cfg_attr(not(bootstrap), rustc_intrinsic)]
#[cfg_attr(bootstrap, inline)]
pub unsafe fn retag_box_to_raw<T: ?Sized, A>(ptr: *mut T) -> *mut T {
// Miri needs to adjust the provenance, but for regular codegen this is not needed
ptr
}
// Some functions are defined here because they accidentally got made
// available in this module on stable. See <https://github.com/rust-lang/rust/issues/15702>.
// (`transmute` also falls into this category, but it cannot be wrapped due to the

View File

@ -5,7 +5,7 @@ use std::num::NonZero;
use smallvec::SmallVec;
use rustc_data_structures::fx::{FxHashMap, FxHashSet};
use rustc_middle::mir::RetagKind;
use rustc_middle::{mir::RetagKind, ty::Ty};
use rustc_target::abi::Size;
use crate::*;
@ -291,6 +291,19 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
}
fn retag_box_to_raw(
&mut self,
val: &ImmTy<'tcx, Provenance>,
alloc_ty: Ty<'tcx>,
) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> {
let this = self.eval_context_mut();
let method = this.machine.borrow_tracker.as_ref().unwrap().borrow().borrow_tracker_method;
match method {
BorrowTrackerMethod::StackedBorrows => this.sb_retag_box_to_raw(val, alloc_ty),
BorrowTrackerMethod::TreeBorrows => this.tb_retag_box_to_raw(val, alloc_ty),
}
}
fn retag_place_contents(
&mut self,
kind: RetagKind,

View File

@ -865,6 +865,24 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
this.sb_retag_reference(val, new_perm, RetagInfo { cause, in_field: false })
}
fn sb_retag_box_to_raw(
&mut self,
val: &ImmTy<'tcx, Provenance>,
alloc_ty: Ty<'tcx>,
) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> {
let this = self.eval_context_mut();
let is_global_alloc = alloc_ty.ty_adt_def().is_some_and(|adt| {
let global_alloc = this.tcx.require_lang_item(rustc_hir::LangItem::GlobalAlloc, None);
adt.did() == global_alloc
});
if is_global_alloc {
// Retag this as-if it was a mutable reference.
this.sb_retag_ptr_value(RetagKind::Raw, val)
} else {
Ok(val.clone())
}
}
fn sb_retag_place_contents(
&mut self,
kind: RetagKind,
@ -916,10 +934,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
self.ecx
}
fn visit_box(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> {
// Boxes get a weak protectors, since they may be deallocated.
let new_perm = NewPermission::from_box_ty(place.layout.ty, self.kind, self.ecx);
self.retag_ptr_inplace(place, new_perm)
fn visit_box(
&mut self,
box_ty: Ty<'tcx>,
place: &PlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx> {
// Only boxes for the global allocator get any special treatment.
if box_ty.is_box_global(*self.ecx.tcx) {
// Boxes get a weak protectors, since they may be deallocated.
let new_perm = NewPermission::from_box_ty(place.layout.ty, self.kind, self.ecx);
self.retag_ptr_inplace(place, new_perm)?;
}
Ok(())
}
fn visit_value(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> {

View File

@ -392,6 +392,15 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
}
}
fn tb_retag_box_to_raw(
&mut self,
val: &ImmTy<'tcx, Provenance>,
_alloc_ty: Ty<'tcx>,
) -> InterpResult<'tcx, ImmTy<'tcx, Provenance>> {
// Casts to raw pointers are NOPs in Tree Borrows.
Ok(val.clone())
}
/// Retag all pointers that are stored in this place.
fn tb_retag_place_contents(
&mut self,
@ -441,14 +450,22 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
/// Regardless of how `Unique` is handled, Boxes are always reborrowed.
/// When `Unique` is also reborrowed, then it behaves exactly like `Box`
/// except for the fact that `Box` has a non-zero-sized reborrow.
fn visit_box(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> {
let new_perm = NewPermission::from_unique_ty(
place.layout.ty,
self.kind,
self.ecx,
/* zero_size */ false,
);
self.retag_ptr_inplace(place, new_perm)
fn visit_box(
&mut self,
box_ty: Ty<'tcx>,
place: &PlaceTy<'tcx, Provenance>,
) -> InterpResult<'tcx> {
// Only boxes for the global allocator get any special treatment.
if box_ty.is_box_global(*self.ecx.tcx) {
let new_perm = NewPermission::from_unique_ty(
place.layout.ty,
self.kind,
self.ecx,
/* zero_size */ false,
);
self.retag_ptr_inplace(place, new_perm)?;
}
Ok(())
}
fn visit_value(&mut self, place: &PlaceTy<'tcx, Provenance>) -> InterpResult<'tcx> {

View File

@ -129,6 +129,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
this.write_bytes_ptr(ptr, iter::repeat(val_byte).take(byte_count.bytes_usize()))?;
}
// Memory model / provenance manipulation
"ptr_mask" => {
let [ptr, mask] = check_arg_count(args)?;
@ -139,6 +140,18 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriInterpCxExt<'mir, 'tcx> {
this.write_pointer(Pointer::new(ptr.provenance, masked_addr), dest)?;
}
"retag_box_to_raw" => {
let [ptr] = check_arg_count(args)?;
let alloc_ty = generic_args[1].expect_ty();
let val = this.read_immediate(ptr)?;
let new_val = if this.machine.borrow_tracker.is_some() {
this.retag_box_to_raw(&val, alloc_ty)?
} else {
val
};
this.write_immediate(*new_val, dest)?;
}
// We want to return either `true` or `false` at random, or else something like
// ```

View File

@ -0,0 +1,123 @@
//! Regression test for <https://github.com/rust-lang/miri/issues/3341>:
//! If `Box` has a local allocator, then it can't be `noalias` as the allocator
//! may want to access allocator state based on the data pointer.
//@revisions: stack tree
//@[tree]compile-flags: -Zmiri-tree-borrows
#![feature(allocator_api)]
#![feature(strict_provenance)]
use std::{
alloc::{AllocError, Allocator, Layout},
cell::{Cell, UnsafeCell},
ptr::{self, addr_of, NonNull},
thread::{self, ThreadId},
mem,
};
const BIN_SIZE: usize = 8;
// A bin represents a collection of blocks of a specific layout.
#[repr(align(128))]
struct MyBin {
top: Cell<usize>,
thread_id: ThreadId,
memory: UnsafeCell<[usize; BIN_SIZE]>,
}
impl MyBin {
fn pop(&self) -> Option<NonNull<u8>> {
let top = self.top.get();
if top == BIN_SIZE {
return None;
}
// Cast the *entire* thing to a raw pointer to not restrict its provenance.
let bin = self as *const MyBin;
let base_ptr = UnsafeCell::raw_get(unsafe{ addr_of!((*bin).memory )}).cast::<usize>();
let ptr = unsafe { NonNull::new_unchecked(base_ptr.add(top)) };
self.top.set(top + 1);
Some(ptr.cast())
}
// Pretends to not be a throwaway allocation method like this. A more realistic
// substitute is using intrusive linked lists, which requires access to the
// metadata of this bin as well.
unsafe fn push(&self, ptr: NonNull<u8>) {
// For now just check that this really is in this bin.
let start = self.memory.get().addr();
let end = start + BIN_SIZE * mem::size_of::<usize>();
let addr = ptr.addr().get();
assert!((start..end).contains(&addr));
}
}
// A collection of bins.
struct MyAllocator {
thread_id: ThreadId,
// Pretends to be some complex collection of bins, such as an array of linked lists.
bins: Box<[MyBin; 1]>,
}
impl MyAllocator {
fn new() -> Self {
let thread_id = thread::current().id();
MyAllocator {
thread_id,
bins: Box::new(
[MyBin {
top: Cell::new(0),
thread_id,
memory: UnsafeCell::default(),
}; 1],
),
}
}
// Pretends to be expensive finding a suitable bin for the layout.
fn find_bin(&self, layout: Layout) -> Option<&MyBin> {
if layout == Layout::new::<usize>() {
Some(&self.bins[0])
} else {
None
}
}
}
unsafe impl Allocator for MyAllocator {
fn allocate(&self, layout: Layout) -> Result<NonNull<[u8]>, AllocError> {
// Expensive bin search.
let bin = self.find_bin(layout).ok_or(AllocError)?;
let ptr = bin.pop().ok_or(AllocError)?;
Ok(NonNull::slice_from_raw_parts(ptr, layout.size()))
}
unsafe fn deallocate(&self, ptr: NonNull<u8>, _layout: Layout) {
// Since manually finding the corresponding bin of `ptr` is very expensive,
// doing pointer arithmetics is preferred.
// But this means we access `top` via `ptr` rather than `self`!
// That is fundamentally the source of the aliasing trouble in this example.
let their_bin = ptr.as_ptr().map_addr(|addr| addr & !127).cast::<MyBin>();
let thread_id = ptr::read(ptr::addr_of!((*their_bin).thread_id));
if self.thread_id == thread_id {
unsafe { (*their_bin).push(ptr) };
} else {
todo!("Deallocating from another thread")
}
}
}
// Make sure to involve `Box` in allocating these,
// as that's where `noalias` may come from.
fn v<T, A: Allocator>(t: T, a: A) -> Vec<T, A> {
(Box::new_in([t], a) as Box<[T], A>).into_vec()
}
fn main() {
assert!(mem::size_of::<MyBin>() <= 128); // if it grows bigger, the trick to access the "header" no longer works
let my_alloc = MyAllocator::new();
let a = v(1usize, &my_alloc);
let b = v(2usize, &my_alloc);
assert_eq!(a[0] + 1, b[0]);
assert_eq!(addr_of!(a[0]).wrapping_add(1), addr_of!(b[0]));
drop((a, b));
}