Auto merge of #129778 - RalfJung:interp-lossy-typed-copy, r=saethlin

interpret: make typed copies lossy wrt provenance and padding

A "typed copy" in Rust can be a lossy process: when copying at type `usize` (or any other non-pointer type), if the original memory had any provenance, that provenance is lost. When copying at pointer type, if the original memory had partial provenance (i.e., not the same provenance for all bytes), that provenance is lost. When copying any type with padding, the contents of padding are lost.

This PR equips our validity-checking pass with the ability to reset provenance and padding according to those rules. Can be reviewed commit-by-commit. The first three commits are just preparation without any functional change.

Fixes https://github.com/rust-lang/miri/issues/845
Fixes https://github.com/rust-lang/miri/issues/2182
This commit is contained in:
bors 2024-09-10 02:18:51 +00:00
commit 304b7f801b
49 changed files with 1246 additions and 310 deletions

View File

@ -94,7 +94,7 @@ fn eval_body_using_ecx<'tcx, R: InterpretationResult<'tcx>>(
let intern_result = intern_const_alloc_recursive(ecx, intern_kind, &ret);
// Since evaluation had no errors, validate the resulting constant.
const_validate_mplace(&ecx, &ret, cid)?;
const_validate_mplace(ecx, &ret, cid)?;
// Only report this after validation, as validaiton produces much better diagnostics.
// FIXME: ensure validation always reports this and stop making interning care about it.
@ -391,7 +391,7 @@ fn eval_in_interpreter<'tcx, R: InterpretationResult<'tcx>>(
#[inline(always)]
fn const_validate_mplace<'tcx>(
ecx: &InterpCx<'tcx, CompileTimeMachine<'tcx>>,
ecx: &mut InterpCx<'tcx, CompileTimeMachine<'tcx>>,
mplace: &MPlaceTy<'tcx>,
cid: GlobalId<'tcx>,
) -> Result<(), ErrorHandled> {

View File

@ -1,16 +1,16 @@
use std::borrow::Borrow;
use std::borrow::{Borrow, Cow};
use std::fmt;
use std::hash::Hash;
use std::ops::ControlFlow;
use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxIndexMap, IndexEntry};
use rustc_data_structures::fx::{FxHashMap, FxIndexMap, IndexEntry};
use rustc_hir::def_id::{DefId, LocalDefId};
use rustc_hir::{self as hir, LangItem, CRATE_HIR_ID};
use rustc_middle::mir::AssertMessage;
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::layout::{FnAbiOf, TyAndLayout};
use rustc_middle::ty::{self, TyCtxt};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_middle::{bug, mir};
use rustc_span::symbol::{sym, Symbol};
use rustc_span::Span;
@ -24,8 +24,8 @@ use crate::fluent_generated as fluent;
use crate::interpret::{
self, compile_time_machine, err_ub, throw_exhaust, throw_inval, throw_ub_custom, throw_unsup,
throw_unsup_format, AllocId, AllocRange, ConstAllocation, CtfeProvenance, FnArg, Frame,
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic, Scalar,
StackPopCleanup,
GlobalAlloc, ImmTy, InterpCx, InterpResult, MPlaceTy, OpTy, Pointer, PointerArithmetic,
RangeSet, Scalar, StackPopCleanup,
};
/// When hitting this many interpreted terminators we emit a deny by default lint
@ -65,6 +65,9 @@ pub struct CompileTimeMachine<'tcx> {
/// storing the result in the given `AllocId`.
/// Used to prevent reads from a static's base allocation, as that may allow for self-initialization loops.
pub(crate) static_root_ids: Option<(AllocId, LocalDefId)>,
/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
}
#[derive(Copy, Clone)]
@ -99,6 +102,7 @@ impl<'tcx> CompileTimeMachine<'tcx> {
can_access_mut_global,
check_alignment,
static_root_ids: None,
union_data_ranges: FxHashMap::default(),
}
}
}
@ -766,6 +770,19 @@ impl<'tcx> interpret::Machine<'tcx> for CompileTimeMachine<'tcx> {
}
Ok(())
}
fn cached_union_data_range<'e>(
ecx: &'e mut InterpCx<'tcx, Self>,
ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
if ecx.tcx.sess.opts.unstable_opts.extra_const_ub_checks {
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
} else {
// Don't bother caching, we're only doing one validation at the end anyway.
Cow::Owned(compute_range())
}
}
}
// Please do not add any code below the above `Machine` trait impl. I (oli-obk) plan more cleanups

View File

@ -7,7 +7,7 @@ use rustc_target::abi::{self, TagEncoding, VariantIdx, Variants};
use tracing::{instrument, trace};
use super::{
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Readable, Scalar, Writeable,
err_ub, throw_ub, ImmTy, InterpCx, InterpResult, Machine, Projectable, Scalar, Writeable,
};
impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
@ -60,7 +60,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
#[instrument(skip(self), level = "trace")]
pub fn read_discriminant(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, VariantIdx> {
let ty = op.layout().ty;
trace!("read_discriminant_value {:#?}", op.layout());

View File

@ -10,6 +10,7 @@ use rustc_apfloat::{Float, FloatConvert};
use rustc_ast::{InlineAsmOptions, InlineAsmTemplatePiece};
use rustc_middle::query::TyCtxtAt;
use rustc_middle::ty::layout::TyAndLayout;
use rustc_middle::ty::Ty;
use rustc_middle::{mir, ty};
use rustc_span::def_id::DefId;
use rustc_span::Span;
@ -19,7 +20,7 @@ use rustc_target::spec::abi::Abi as CallAbi;
use super::{
throw_unsup, throw_unsup_format, AllocBytes, AllocId, AllocKind, AllocRange, Allocation,
ConstAllocation, CtfeProvenance, FnArg, Frame, ImmTy, InterpCx, InterpResult, MPlaceTy,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, CTFE_ALLOC_SALT,
MemoryKind, Misalignment, OpTy, PlaceTy, Pointer, Provenance, RangeSet, CTFE_ALLOC_SALT,
};
/// Data returned by [`Machine::after_stack_pop`], and consumed by
@ -578,6 +579,15 @@ pub trait Machine<'tcx>: Sized {
ecx: &InterpCx<'tcx, Self>,
instance: Option<ty::Instance<'tcx>>,
) -> usize;
fn cached_union_data_range<'e>(
_ecx: &'e mut InterpCx<'tcx, Self>,
_ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
// Default to no caching.
Cow::Owned(compute_range())
}
}
/// A lot of the flexibility above is just needed for `Miri`, but all "compile-time" machines

View File

@ -8,9 +8,8 @@
use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::cell::Cell;
use std::collections::VecDeque;
use std::{fmt, ptr};
use std::{fmt, mem, ptr};
use rustc_ast::Mutability;
use rustc_data_structures::fx::{FxHashSet, FxIndexMap};
@ -118,7 +117,7 @@ pub struct Memory<'tcx, M: Machine<'tcx>> {
/// This stores whether we are currently doing reads purely for the purpose of validation.
/// Those reads do not trigger the machine's hooks for memory reads.
/// Needless to say, this must only be set with great care!
validation_in_progress: Cell<bool>,
validation_in_progress: bool,
}
/// A reference to some allocation that was already bounds-checked for the given region
@ -145,7 +144,7 @@ impl<'tcx, M: Machine<'tcx>> Memory<'tcx, M> {
alloc_map: M::MemoryMap::default(),
extra_fn_ptr_map: FxIndexMap::default(),
dead_alloc_map: FxIndexMap::default(),
validation_in_progress: Cell::new(false),
validation_in_progress: false,
}
}
@ -682,7 +681,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
// We want to call the hook on *all* accesses that involve an AllocId, including zero-sized
// accesses. That means we cannot rely on the closure above or the `Some` branch below. We
// do this after `check_and_deref_ptr` to ensure some basic sanity has already been checked.
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
if let Ok((alloc_id, ..)) = self.ptr_try_get_alloc_id(ptr, size_i64) {
M::before_alloc_read(self, alloc_id)?;
}
@ -690,7 +689,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
if let Some((alloc_id, offset, prov, alloc)) = ptr_and_alloc {
let range = alloc_range(offset, size);
if !self.memory.validation_in_progress.get() {
if !self.memory.validation_in_progress {
M::before_memory_read(
self.tcx,
&self.machine,
@ -766,11 +765,14 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
let parts = self.get_ptr_access(ptr, size)?;
if let Some((alloc_id, offset, prov)) = parts {
let tcx = self.tcx;
let validation_in_progress = self.memory.validation_in_progress;
// FIXME: can we somehow avoid looking up the allocation twice here?
// We cannot call `get_raw_mut` inside `check_and_deref_ptr` as that would duplicate `&mut self`.
let (alloc, machine) = self.get_alloc_raw_mut(alloc_id)?;
let range = alloc_range(offset, size);
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
if !validation_in_progress {
M::before_memory_write(tcx, machine, &mut alloc.extra, (alloc_id, prov), range)?;
}
Ok(Some(AllocRefMut { alloc, range, tcx: *tcx, alloc_id }))
} else {
Ok(None)
@ -1014,16 +1016,16 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
///
/// We do this so Miri's allocation access tracking does not show the validation
/// reads as spurious accesses.
pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
pub fn run_for_validation<R>(&mut self, f: impl FnOnce(&mut Self) -> R) -> R {
// This deliberately uses `==` on `bool` to follow the pattern
// `assert!(val.replace(new) == old)`.
assert!(
self.memory.validation_in_progress.replace(true) == false,
mem::replace(&mut self.memory.validation_in_progress, true) == false,
"`validation_in_progress` was already set"
);
let res = f();
let res = f(self);
assert!(
self.memory.validation_in_progress.replace(false) == true,
mem::replace(&mut self.memory.validation_in_progress, false) == true,
"`validation_in_progress` was unset by someone else"
);
res
@ -1115,6 +1117,10 @@ impl<'a, 'tcx, M: Machine<'tcx>> std::fmt::Debug for DumpAllocs<'a, 'tcx, M> {
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
AllocRefMut<'a, 'tcx, Prov, Extra, Bytes>
{
pub fn as_ref<'b>(&'b self) -> AllocRef<'b, 'tcx, Prov, Extra, Bytes> {
AllocRef { alloc: self.alloc, range: self.range, tcx: self.tcx, alloc_id: self.alloc_id }
}
/// `range` is relative to this allocation reference, not the base of the allocation.
pub fn write_scalar(&mut self, range: AllocRange, val: Scalar<Prov>) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
@ -1130,13 +1136,30 @@ impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes>
self.write_scalar(alloc_range(offset, self.tcx.data_layout().pointer_size), val)
}
/// Mark the given sub-range (relative to this allocation reference) as uninitialized.
pub fn write_uninit(&mut self, range: AllocRange) -> InterpResult<'tcx> {
let range = self.range.subrange(range);
Ok(self
.alloc
.write_uninit(&self.tcx, range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
/// Mark the entire referenced range as uninitialized
pub fn write_uninit(&mut self) -> InterpResult<'tcx> {
pub fn write_uninit_full(&mut self) -> InterpResult<'tcx> {
Ok(self
.alloc
.write_uninit(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
/// Remove all provenance in the reference range.
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
Ok(self
.alloc
.clear_provenance(&self.tcx, self.range)
.map_err(|e| e.to_interp_error(self.alloc_id))?)
}
}
impl<'tcx, 'a, Prov: Provenance, Extra, Bytes: AllocBytes> AllocRef<'a, 'tcx, Prov, Extra, Bytes> {
@ -1278,7 +1301,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
};
let src_alloc = self.get_alloc_raw(src_alloc_id)?;
let src_range = alloc_range(src_offset, size);
assert!(!self.memory.validation_in_progress.get(), "we can't be copying during validation");
assert!(!self.memory.validation_in_progress, "we can't be copying during validation");
M::before_memory_read(
tcx,
&self.machine,

View File

@ -33,11 +33,11 @@ pub(crate) use self::intrinsics::eval_nullary_intrinsic;
pub use self::machine::{compile_time_machine, AllocMap, Machine, MayLeak, ReturnAction};
pub use self::memory::{AllocKind, AllocRef, AllocRefMut, FnVal, Memory, MemoryKind};
use self::operand::Operand;
pub use self::operand::{ImmTy, Immediate, OpTy, Readable};
pub use self::operand::{ImmTy, Immediate, OpTy};
pub use self::place::{MPlaceTy, MemPlaceMeta, PlaceTy, Writeable};
use self::place::{MemPlace, Place};
pub use self::projection::{OffsetMode, Projectable};
pub use self::stack::{Frame, FrameInfo, LocalState, StackPopCleanup, StackPopInfo};
pub(crate) use self::util::create_static_alloc;
pub use self::validity::{CtfeValidationMode, RefTracking};
pub use self::validity::{CtfeValidationMode, RangeSet, RefTracking};
pub use self::visitor::ValueVisitor;

View File

@ -111,6 +111,46 @@ impl<Prov: Provenance> Immediate<Prov> {
Immediate::Uninit => bug!("Got uninit where a scalar or scalar pair was expected"),
}
}
/// Assert that this immediate is a valid value for the given ABI.
pub fn assert_matches_abi(self, abi: Abi, cx: &impl HasDataLayout) {
match (self, abi) {
(Immediate::Scalar(scalar), Abi::Scalar(s)) => {
assert_eq!(scalar.size(), s.size(cx));
if !matches!(s.primitive(), abi::Pointer(..)) {
assert!(matches!(scalar, Scalar::Int(..)));
}
}
(Immediate::ScalarPair(a_val, b_val), Abi::ScalarPair(a, b)) => {
assert_eq!(a_val.size(), a.size(cx));
if !matches!(a.primitive(), abi::Pointer(..)) {
assert!(matches!(a_val, Scalar::Int(..)));
}
assert_eq!(b_val.size(), b.size(cx));
if !matches!(b.primitive(), abi::Pointer(..)) {
assert!(matches!(b_val, Scalar::Int(..)));
}
}
(Immediate::Uninit, _) => {}
_ => {
bug!("value {self:?} does not match ABI {abi:?})",)
}
}
}
pub fn clear_provenance<'tcx>(&mut self) -> InterpResult<'tcx> {
match self {
Immediate::Scalar(s) => {
s.clear_provenance()?;
}
Immediate::ScalarPair(a, b) => {
a.clear_provenance()?;
b.clear_provenance()?;
}
Immediate::Uninit => {}
}
Ok(())
}
}
// ScalarPair needs a type to interpret, so we often have an immediate and a type together
@ -490,32 +530,6 @@ impl<'tcx, Prov: Provenance> Projectable<'tcx, Prov> for OpTy<'tcx, Prov> {
}
}
/// The `Readable` trait describes interpreter values that one can read from.
pub trait Readable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> {
fn as_mplace_or_imm(&self) -> Either<MPlaceTy<'tcx, Prov>, ImmTy<'tcx, Prov>>;
}
impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for OpTy<'tcx, Prov> {
#[inline(always)]
fn as_mplace_or_imm(&self) -> Either<MPlaceTy<'tcx, Prov>, ImmTy<'tcx, Prov>> {
self.as_mplace_or_imm()
}
}
impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for MPlaceTy<'tcx, Prov> {
#[inline(always)]
fn as_mplace_or_imm(&self) -> Either<MPlaceTy<'tcx, Prov>, ImmTy<'tcx, Prov>> {
Left(self.clone())
}
}
impl<'tcx, Prov: Provenance> Readable<'tcx, Prov> for ImmTy<'tcx, Prov> {
#[inline(always)]
fn as_mplace_or_imm(&self) -> Either<MPlaceTy<'tcx, Prov>, ImmTy<'tcx, Prov>> {
Right(self.clone())
}
}
impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// Try reading an immediate in memory; this is interesting particularly for `ScalarPair`.
/// Returns `None` if the layout does not permit loading this as a value.
@ -588,9 +602,9 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// ConstProp needs it, though.
pub fn read_immediate_raw(
&self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, Either<MPlaceTy<'tcx, M::Provenance>, ImmTy<'tcx, M::Provenance>>> {
Ok(match src.as_mplace_or_imm() {
Ok(match src.to_op(self)?.as_mplace_or_imm() {
Left(ref mplace) => {
if let Some(val) = self.read_immediate_from_mplace_raw(mplace)? {
Right(val)
@ -608,7 +622,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
#[inline(always)]
pub fn read_immediate(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> {
if !matches!(
op.layout().abi,
@ -627,7 +641,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// Read a scalar from a place
pub fn read_scalar(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, Scalar<M::Provenance>> {
Ok(self.read_immediate(op)?.to_scalar())
}
@ -638,21 +652,21 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// Read a pointer from a place.
pub fn read_pointer(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, Pointer<Option<M::Provenance>>> {
self.read_scalar(op)?.to_pointer(self)
}
/// Read a pointer-sized unsigned integer from a place.
pub fn read_target_usize(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, u64> {
self.read_scalar(op)?.to_target_usize(self)
}
/// Read a pointer-sized signed integer from a place.
pub fn read_target_isize(
&self,
op: &impl Readable<'tcx, M::Provenance>,
op: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, i64> {
self.read_scalar(op)?.to_target_isize(self)
}
@ -717,7 +731,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
) -> InterpResult<'tcx, OpTy<'tcx, M::Provenance>> {
match place.as_mplace_or_local() {
Left(mplace) => Ok(mplace.into()),
Right((local, offset, locals_addr)) => {
Right((local, offset, locals_addr, _)) => {
debug_assert!(place.layout.is_sized()); // only sized locals can ever be `Place::Local`.
debug_assert_eq!(locals_addr, self.frame().locals_addr());
let base = self.local_to_op(local, None)?;

View File

@ -15,7 +15,7 @@ use tracing::{instrument, trace};
use super::{
alloc_range, mir_assign_valid_types, AllocRef, AllocRefMut, CheckAlignMsg, CtfeProvenance,
ImmTy, Immediate, InterpCx, InterpResult, Machine, MemoryKind, Misalignment, OffsetMode, OpTy,
Operand, Pointer, Projectable, Provenance, Readable, Scalar,
Operand, Pointer, Projectable, Provenance, Scalar,
};
#[derive(Copy, Clone, Hash, PartialEq, Eq, Debug)]
@ -180,7 +180,8 @@ pub(super) enum Place<Prov: Provenance = CtfeProvenance> {
Ptr(MemPlace<Prov>),
/// To support alloc-free locals, we are able to write directly to a local. The offset indicates
/// where in the local this place is located; if it is `None`, no projection has been applied.
/// where in the local this place is located; if it is `None`, no projection has been applied
/// and the type of the place is exactly the type of the local.
/// Such projections are meaningful even if the offset is 0, since they can change layouts.
/// (Without that optimization, we'd just always be a `MemPlace`.)
/// `Local` places always refer to the current stack frame, so they are unstable under
@ -231,10 +232,12 @@ impl<'tcx, Prov: Provenance> PlaceTy<'tcx, Prov> {
#[inline(always)]
pub fn as_mplace_or_local(
&self,
) -> Either<MPlaceTy<'tcx, Prov>, (mir::Local, Option<Size>, usize)> {
) -> Either<MPlaceTy<'tcx, Prov>, (mir::Local, Option<Size>, usize, TyAndLayout<'tcx>)> {
match self.place {
Place::Ptr(mplace) => Left(MPlaceTy { mplace, layout: self.layout }),
Place::Local { local, offset, locals_addr } => Right((local, offset, locals_addr)),
Place::Local { local, offset, locals_addr } => {
Right((local, offset, locals_addr, self.layout))
}
}
}
@ -277,7 +280,7 @@ impl<'tcx, Prov: Provenance> Projectable<'tcx, Prov> for PlaceTy<'tcx, Prov> {
) -> InterpResult<'tcx, Self> {
Ok(match self.as_mplace_or_local() {
Left(mplace) => mplace.offset_with_meta(offset, mode, meta, layout, ecx)?.into(),
Right((local, old_offset, locals_addr)) => {
Right((local, old_offset, locals_addr, _)) => {
debug_assert!(layout.is_sized(), "unsized locals should live in memory");
assert_matches!(meta, MemPlaceMeta::None); // we couldn't store it anyway...
// `Place::Local` are always in-bounds of their surrounding local, so we can just
@ -328,9 +331,7 @@ impl<'tcx, Prov: Provenance> OpTy<'tcx, Prov> {
/// The `Weiteable` trait describes interpreter values that can be written to.
pub trait Writeable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> {
fn as_mplace_or_local(
&self,
) -> Either<MPlaceTy<'tcx, Prov>, (mir::Local, Option<Size>, usize, TyAndLayout<'tcx>)>;
fn to_place(&self) -> PlaceTy<'tcx, Prov>;
fn force_mplace<M: Machine<'tcx, Provenance = Prov>>(
&self,
@ -340,11 +341,8 @@ pub trait Writeable<'tcx, Prov: Provenance>: Projectable<'tcx, Prov> {
impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for PlaceTy<'tcx, Prov> {
#[inline(always)]
fn as_mplace_or_local(
&self,
) -> Either<MPlaceTy<'tcx, Prov>, (mir::Local, Option<Size>, usize, TyAndLayout<'tcx>)> {
self.as_mplace_or_local()
.map_right(|(local, offset, locals_addr)| (local, offset, locals_addr, self.layout))
fn to_place(&self) -> PlaceTy<'tcx, Prov> {
self.clone()
}
#[inline(always)]
@ -358,10 +356,8 @@ impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for PlaceTy<'tcx, Prov> {
impl<'tcx, Prov: Provenance> Writeable<'tcx, Prov> for MPlaceTy<'tcx, Prov> {
#[inline(always)]
fn as_mplace_or_local(
&self,
) -> Either<MPlaceTy<'tcx, Prov>, (mir::Local, Option<Size>, usize, TyAndLayout<'tcx>)> {
Left(self.clone())
fn to_place(&self) -> PlaceTy<'tcx, Prov> {
self.clone().into()
}
#[inline(always)]
@ -436,7 +432,7 @@ where
#[instrument(skip(self), level = "trace")]
pub fn deref_pointer(
&self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> {
if src.layout().ty.is_box() {
// Derefer should have removed all Box derefs.
@ -562,6 +558,40 @@ where
Ok(place)
}
/// Given a place, returns either the underlying mplace or a reference to where the value of
/// this place is stored.
fn as_mplace_or_mutable_local(
&mut self,
place: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<
'tcx,
Either<MPlaceTy<'tcx, M::Provenance>, (&mut Immediate<M::Provenance>, TyAndLayout<'tcx>)>,
> {
Ok(match place.to_place().as_mplace_or_local() {
Left(mplace) => Left(mplace),
Right((local, offset, locals_addr, layout)) => {
if offset.is_some() {
// This has been projected to a part of this local, or had the type changed.
// FIMXE: there are cases where we could still avoid allocating an mplace.
Left(place.force_mplace(self)?)
} else {
debug_assert_eq!(locals_addr, self.frame().locals_addr());
debug_assert_eq!(self.layout_of_local(self.frame(), local, None)?, layout);
match self.frame_mut().locals[local].access_mut()? {
Operand::Indirect(mplace) => {
// The local is in memory.
Left(MPlaceTy { mplace: *mplace, layout })
}
Operand::Immediate(local_val) => {
// The local still has the optimized representation.
Right((local_val, layout))
}
}
}
}
})
}
/// Write an immediate to a place
#[inline(always)]
#[instrument(skip(self), level = "trace")]
@ -574,9 +604,11 @@ where
if M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
// Also needed to reset padding.
self.validate_operand(
&dest.to_op(self)?,
&dest.to_place(),
M::enforce_validity_recursively(self, dest.layout()),
/*reset_provenance_and_padding*/ true,
)?;
}
@ -606,67 +638,27 @@ where
/// Write an immediate to a place.
/// If you use this you are responsible for validating that things got copied at the
/// right type.
fn write_immediate_no_validate(
pub(super) fn write_immediate_no_validate(
&mut self,
src: Immediate<M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
assert!(dest.layout().is_sized(), "Cannot write unsized immediate data");
// See if we can avoid an allocation. This is the counterpart to `read_immediate_raw`,
// but not factored as a separate function.
let mplace = match dest.as_mplace_or_local() {
Right((local, offset, locals_addr, layout)) => {
if offset.is_some() {
// This has been projected to a part of this local. We could have complicated
// logic to still keep this local as an `Operand`... but it's much easier to
// just fall back to the indirect path.
dest.force_mplace(self)?
} else {
debug_assert_eq!(locals_addr, self.frame().locals_addr());
match self.frame_mut().locals[local].access_mut()? {
Operand::Immediate(local_val) => {
// Local can be updated in-place.
*local_val = src;
// Double-check that the value we are storing and the local fit to each other.
// (*After* doing the update for borrow checker reasons.)
if cfg!(debug_assertions) {
let local_layout =
self.layout_of_local(&self.frame(), local, None)?;
match (src, local_layout.abi) {
(Immediate::Scalar(scalar), Abi::Scalar(s)) => {
assert_eq!(scalar.size(), s.size(self))
}
(
Immediate::ScalarPair(a_val, b_val),
Abi::ScalarPair(a, b),
) => {
assert_eq!(a_val.size(), a.size(self));
assert_eq!(b_val.size(), b.size(self));
}
(Immediate::Uninit, _) => {}
(src, abi) => {
bug!(
"value {src:?} cannot be written into local with type {} (ABI {abi:?})",
local_layout.ty
)
}
};
}
return Ok(());
}
Operand::Indirect(mplace) => {
// The local is in memory, go on below.
MPlaceTy { mplace: *mplace, layout }
}
}
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, local_layout)) => {
// Local can be updated in-place.
*local_val = src;
// Double-check that the value we are storing and the local fit to each other.
if cfg!(debug_assertions) {
src.assert_matches_abi(local_layout.abi, self);
}
}
Left(mplace) => mplace, // already referring to memory
};
// This is already in memory, write there.
self.write_immediate_to_mplace_no_validate(src, mplace.layout, mplace.mplace)
Left(mplace) => {
self.write_immediate_to_mplace_no_validate(src, mplace.layout, mplace.mplace)?;
}
}
Ok(())
}
/// Write an immediate to memory.
@ -678,6 +670,9 @@ where
layout: TyAndLayout<'tcx>,
dest: MemPlace<M::Provenance>,
) -> InterpResult<'tcx> {
if cfg!(debug_assertions) {
value.assert_matches_abi(layout.abi, self);
}
// Note that it is really important that the type here is the right one, and matches the
// type things are read at. In case `value` is a `ScalarPair`, we don't do any magic here
// to handle padding properly, which is only correct if we never look at this data with the
@ -691,15 +686,7 @@ where
match value {
Immediate::Scalar(scalar) => {
let Abi::Scalar(s) = layout.abi else {
span_bug!(
self.cur_span(),
"write_immediate_to_mplace: invalid Scalar layout: {layout:#?}",
)
};
let size = s.size(&tcx);
assert_eq!(size, layout.size, "abi::Scalar size does not match layout size");
alloc.write_scalar(alloc_range(Size::ZERO, size), scalar)
alloc.write_scalar(alloc_range(Size::ZERO, scalar.size()), scalar)
}
Immediate::ScalarPair(a_val, b_val) => {
let Abi::ScalarPair(a, b) = layout.abi else {
@ -709,18 +696,19 @@ where
layout
)
};
let (a_size, b_size) = (a.size(&tcx), b.size(&tcx));
let b_offset = a_size.align_to(b.align(&tcx).abi);
let b_offset = a.size(&tcx).align_to(b.align(&tcx).abi);
assert!(b_offset.bytes() > 0); // in `operand_field` we use the offset to tell apart the fields
// It is tempting to verify `b_offset` against `layout.fields.offset(1)`,
// but that does not work: We could be a newtype around a pair, then the
// fields do not match the `ScalarPair` components.
alloc.write_scalar(alloc_range(Size::ZERO, a_size), a_val)?;
alloc.write_scalar(alloc_range(b_offset, b_size), b_val)
alloc.write_scalar(alloc_range(Size::ZERO, a_val.size()), a_val)?;
alloc.write_scalar(alloc_range(b_offset, b_val.size()), b_val)?;
// We don't have to reset padding here, `write_immediate` will anyway do a validation run.
Ok(())
}
Immediate::Uninit => alloc.write_uninit(),
Immediate::Uninit => alloc.write_uninit_full(),
}
}
@ -728,35 +716,38 @@ where
&mut self,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
let mplace = match dest.as_mplace_or_local() {
Left(mplace) => mplace,
Right((local, offset, locals_addr, layout)) => {
if offset.is_some() {
// This has been projected to a part of this local. We could have complicated
// logic to still keep this local as an `Operand`... but it's much easier to
// just fall back to the indirect path.
// FIXME: share the logic with `write_immediate_no_validate`.
dest.force_mplace(self)?
} else {
debug_assert_eq!(locals_addr, self.frame().locals_addr());
match self.frame_mut().locals[local].access_mut()? {
Operand::Immediate(local) => {
*local = Immediate::Uninit;
return Ok(());
}
Operand::Indirect(mplace) => {
// The local is in memory, go on below.
MPlaceTy { mplace: *mplace, layout }
}
}
}
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
*local_val = Immediate::Uninit;
}
};
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
// Zero-sized access
return Ok(());
};
alloc.write_uninit()?;
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
// Zero-sized access
return Ok(());
};
alloc.write_uninit_full()?;
}
}
Ok(())
}
/// Remove all provenance in the given place.
pub fn clear_provenance(
&mut self,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
match self.as_mplace_or_mutable_local(&dest.to_place())? {
Right((local_val, _local_layout)) => {
local_val.clear_provenance()?;
}
Left(mplace) => {
let Some(mut alloc) = self.get_place_alloc_mut(&mplace)? else {
// Zero-sized access
return Ok(());
};
alloc.clear_provenance()?;
}
}
Ok(())
}
@ -768,7 +759,7 @@ where
#[inline(always)]
pub(super) fn copy_op_no_dest_validation(
&mut self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.copy_op_inner(
@ -781,7 +772,7 @@ where
#[inline(always)]
pub fn copy_op_allow_transmute(
&mut self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.copy_op_inner(
@ -794,7 +785,7 @@ where
#[inline(always)]
pub fn copy_op(
&mut self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.copy_op_inner(
@ -808,28 +799,35 @@ where
#[instrument(skip(self), level = "trace")]
fn copy_op_inner(
&mut self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
allow_transmute: bool,
validate_dest: bool,
) -> InterpResult<'tcx> {
// Generally for transmutation, data must be valid both at the old and new type.
// But if the types are the same, the 2nd validation below suffices.
if src.layout().ty != dest.layout().ty && M::enforce_validity(self, src.layout()) {
self.validate_operand(
&src.to_op(self)?,
M::enforce_validity_recursively(self, src.layout()),
)?;
}
// These are technically *two* typed copies: `src` is a not-yet-loaded value,
// so we're going a typed copy at `src` type from there to some intermediate storage.
// And then we're doing a second typed copy from that intermediate storage to `dest`.
// But as an optimization, we only make a single direct copy here.
// Do the actual copy.
self.copy_op_no_validate(src, dest, allow_transmute)?;
if validate_dest && M::enforce_validity(self, dest.layout()) {
// Data got changed, better make sure it matches the type!
let dest = dest.to_place();
// Given that there were two typed copies, we have to ensure this is valid at both types,
// and we have to ensure this loses provenance and padding according to both types.
// But if the types are identical, we only do one pass.
if allow_transmute && src.layout().ty != dest.layout().ty {
self.validate_operand(
&dest.transmute(src.layout(), self)?,
M::enforce_validity_recursively(self, src.layout()),
/*reset_provenance_and_padding*/ true,
)?;
}
self.validate_operand(
&dest.to_op(self)?,
&dest,
M::enforce_validity_recursively(self, dest.layout()),
/*reset_provenance_and_padding*/ true,
)?;
}
@ -843,7 +841,7 @@ where
#[instrument(skip(self), level = "trace")]
fn copy_op_no_validate(
&mut self,
src: &impl Readable<'tcx, M::Provenance>,
src: &impl Projectable<'tcx, M::Provenance>,
dest: &impl Writeable<'tcx, M::Provenance>,
allow_transmute: bool,
) -> InterpResult<'tcx> {

View File

@ -4,6 +4,7 @@
//! That's useful because it means other passes (e.g. promotion) can rely on `const`s
//! to be const-safe.
use std::borrow::Cow;
use std::fmt::Write;
use std::hash::Hash;
use std::num::NonZero;
@ -16,22 +17,22 @@ use rustc_hir as hir;
use rustc_middle::bug;
use rustc_middle::mir::interpret::ValidationErrorKind::{self, *};
use rustc_middle::mir::interpret::{
ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance,
alloc_range, ExpectedKind, InterpError, InvalidMetaKind, Misalignment, PointerKind, Provenance,
UnsupportedOpInfo, ValidationErrorInfo,
};
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, Ty};
use rustc_middle::ty::layout::{LayoutCx, LayoutOf, TyAndLayout};
use rustc_middle::ty::{self, Ty, TyCtxt};
use rustc_span::symbol::{sym, Symbol};
use rustc_target::abi::{
Abi, FieldIdx, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange,
Abi, FieldIdx, FieldsShape, Scalar as ScalarAbi, Size, VariantIdx, Variants, WrappingRange,
};
use tracing::trace;
use super::machine::AllocMap;
use super::{
err_ub, format_interp_error, throw_ub, AllocId, AllocKind, CheckInAllocMsg, GlobalAlloc, ImmTy,
Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, OpTy, Pointer, Projectable,
Scalar, ValueVisitor,
Immediate, InterpCx, InterpResult, MPlaceTy, Machine, MemPlaceMeta, PlaceTy, Pointer,
Projectable, Scalar, ValueVisitor,
};
// for the validation errors
@ -125,6 +126,7 @@ pub enum PathElem {
EnumTag,
CoroutineTag,
DynDowncast,
Vtable,
}
/// Extra things to check for during validation of CTFE results.
@ -163,22 +165,22 @@ impl<T: Clone + Eq + Hash + std::fmt::Debug, PATH: Default> RefTracking<T, PATH>
pub fn empty() -> Self {
RefTracking { seen: FxHashSet::default(), todo: vec![] }
}
pub fn new(op: T) -> Self {
pub fn new(val: T) -> Self {
let mut ref_tracking_for_consts =
RefTracking { seen: FxHashSet::default(), todo: vec![(op.clone(), PATH::default())] };
ref_tracking_for_consts.seen.insert(op);
RefTracking { seen: FxHashSet::default(), todo: vec![(val.clone(), PATH::default())] };
ref_tracking_for_consts.seen.insert(val);
ref_tracking_for_consts
}
pub fn next(&mut self) -> Option<(T, PATH)> {
self.todo.pop()
}
fn track(&mut self, op: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(op.clone()) {
trace!("Recursing below ptr {:#?}", op);
fn track(&mut self, val: T, path: impl FnOnce() -> PATH) {
if self.seen.insert(val.clone()) {
trace!("Recursing below ptr {:#?}", val);
let path = path();
// Remember to come back to this later.
self.todo.push((op, path));
self.todo.push((val, path));
}
}
}
@ -204,11 +206,62 @@ fn write_path(out: &mut String, path: &[PathElem]) {
// not the root.
Deref => write!(out, ".<deref>"),
DynDowncast => write!(out, ".<dyn-downcast>"),
Vtable => write!(out, ".<vtable>"),
}
.unwrap()
}
}
/// Represents a set of `Size` values as a sorted list of ranges.
// These are (offset, length) pairs, and they are sorted and mutually disjoint,
// and never adjacent (i.e. there's always a gap between two of them).
#[derive(Debug, Clone)]
pub struct RangeSet(Vec<(Size, Size)>);
impl RangeSet {
fn add_range(&mut self, offset: Size, size: Size) {
if size.bytes() == 0 {
// No need to track empty ranges.
return;
}
let v = &mut self.0;
// We scan for a partition point where the left partition is all the elements that end
// strictly before we start. Those are elements that are too "low" to merge with us.
let idx =
v.partition_point(|&(other_offset, other_size)| other_offset + other_size < offset);
// Now we want to either merge with the first element of the second partition, or insert ourselves before that.
if let Some(&(other_offset, other_size)) = v.get(idx)
&& offset + size >= other_offset
{
// Their end is >= our start (otherwise it would not be in the 2nd partition) and
// our end is >= their start. This means we can merge the ranges.
let new_start = other_offset.min(offset);
let mut new_end = (other_offset + other_size).max(offset + size);
// We grew to the right, so merge with overlapping/adjacent elements.
// (We also may have grown to the left, but that can never make us adjacent with
// anything there since we selected the first such candidate via `partition_point`.)
let mut scan_right = 1;
while let Some(&(next_offset, next_size)) = v.get(idx + scan_right)
&& new_end >= next_offset
{
// Increase our size to absorb the next element.
new_end = new_end.max(next_offset + next_size);
// Look at the next element.
scan_right += 1;
}
// Update the element we grew.
v[idx] = (new_start, new_end - new_start);
// Remove the elements we absorbed (if any).
if scan_right > 1 {
drop(v.drain((idx + 1)..(idx + scan_right)));
}
} else {
// Insert new element.
v.insert(idx, (offset, size));
}
}
}
struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> {
/// The `path` may be pushed to, but the part that is present when a function
/// starts must not be changed! `visit_fields` and `visit_array` rely on
@ -217,7 +270,17 @@ struct ValidityVisitor<'rt, 'tcx, M: Machine<'tcx>> {
ref_tracking: Option<&'rt mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>>,
/// `None` indicates this is not validating for CTFE (but for runtime).
ctfe_mode: Option<CtfeValidationMode>,
ecx: &'rt InterpCx<'tcx, M>,
ecx: &'rt mut InterpCx<'tcx, M>,
/// Whether provenance should be reset outside of pointers (emulating the effect of a typed
/// copy).
reset_provenance_and_padding: bool,
/// This tracks which byte ranges in this value contain data; the remaining bytes are padding.
/// The ideal representation here would be pointer-length pairs, but to keep things more compact
/// we only store a (range) set of offsets -- the base pointer is the same throughout the entire
/// visit, after all.
/// If this is `Some`, then `reset_provenance_and_padding` must be true (but not vice versa:
/// we might not track data vs padding bytes if the operand isn't stored in memory anyway).
data_bytes: Option<RangeSet>,
}
impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
@ -287,8 +350,14 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
// arrays/slices
ty::Array(..) | ty::Slice(..) => PathElem::ArrayElem(field),
// dyn* vtables
ty::Dynamic(_, _, ty::DynKind::DynStar) if field == 1 => PathElem::Vtable,
// dyn traits
ty::Dynamic(..) => PathElem::DynDowncast,
ty::Dynamic(..) => {
assert_eq!(field, 0);
PathElem::DynDowncast
}
// nothing else has an aggregate layout
_ => bug!("aggregate_field_path_elem: got non-aggregate type {:?}", layout.ty),
@ -314,11 +383,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
fn read_immediate(
&self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, ImmTy<'tcx, M::Provenance>> {
Ok(try_validation!(
self.ecx.read_immediate(op),
self.ecx.read_immediate(val),
self.path,
Ub(InvalidUninitBytes(None)) =>
Uninit { expected },
@ -332,10 +401,40 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
fn read_scalar(
&self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, Scalar<M::Provenance>> {
Ok(self.read_immediate(op, expected)?.to_scalar())
Ok(self.read_immediate(val, expected)?.to_scalar())
}
fn deref_pointer(
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
expected: ExpectedKind,
) -> InterpResult<'tcx, MPlaceTy<'tcx, M::Provenance>> {
// Not using `ecx.deref_pointer` since we want to use our `read_immediate` wrapper.
let imm = self.read_immediate(val, expected)?;
// Reset provenance: ensure slice tail metadata does not preserve provenance,
// and ensure all pointers do not preserve partial provenance.
if self.reset_provenance_and_padding {
if matches!(imm.layout.abi, Abi::Scalar(..)) {
// A thin pointer. If it has provenance, we don't have to do anything.
// If it does not, ensure we clear the provenance in memory.
if matches!(imm.to_scalar(), Scalar::Int(..)) {
self.ecx.clear_provenance(val)?;
}
} else {
// A wide pointer. This means we have to worry both about the pointer itself and the
// metadata. We do the lazy thing and just write back the value we got. Just
// clearing provenance in a targeted manner would be more efficient, but unless this
// is a perf hotspot it's just not worth the effort.
self.ecx.write_immediate_no_validate(*imm, val)?;
}
// The entire thing is data, not padding.
self.add_data_range_place(val);
}
// Now turn it into a place.
self.ecx.ref_to_mplace(&imm)
}
fn check_wide_ptr_meta(
@ -376,11 +475,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
/// Check a reference or `Box`.
fn check_safe_pointer(
&mut self,
value: &OpTy<'tcx, M::Provenance>,
value: &PlaceTy<'tcx, M::Provenance>,
ptr_kind: PointerKind,
) -> InterpResult<'tcx> {
// Not using `deref_pointer` since we want to use our `read_immediate` wrapper.
let place = self.ecx.ref_to_mplace(&self.read_immediate(value, ptr_kind.into())?)?;
let place = self.deref_pointer(value, ptr_kind.into())?;
// Handle wide pointers.
// Check metadata early, for better diagnostics
if place.layout.is_unsized() {
@ -564,31 +662,39 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
/// Note that not all of these have `FieldsShape::Primitive`, e.g. wide references.
fn try_visit_primitive(
&mut self,
value: &OpTy<'tcx, M::Provenance>,
value: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, bool> {
// Go over all the primitive types
let ty = value.layout.ty;
match ty.kind() {
ty::Bool => {
let value = self.read_scalar(value, ExpectedKind::Bool)?;
let scalar = self.read_scalar(value, ExpectedKind::Bool)?;
try_validation!(
value.to_bool(),
scalar.to_bool(),
self.path,
Ub(InvalidBool(..)) => ValidationErrorKind::InvalidBool {
value: format!("{value:x}"),
value: format!("{scalar:x}"),
}
);
if self.reset_provenance_and_padding {
self.ecx.clear_provenance(value)?;
self.add_data_range_place(value);
}
Ok(true)
}
ty::Char => {
let value = self.read_scalar(value, ExpectedKind::Char)?;
let scalar = self.read_scalar(value, ExpectedKind::Char)?;
try_validation!(
value.to_char(),
scalar.to_char(),
self.path,
Ub(InvalidChar(..)) => ValidationErrorKind::InvalidChar {
value: format!("{value:x}"),
value: format!("{scalar:x}"),
}
);
if self.reset_provenance_and_padding {
self.ecx.clear_provenance(value)?;
self.add_data_range_place(value);
}
Ok(true)
}
ty::Float(_) | ty::Int(_) | ty::Uint(_) => {
@ -602,11 +708,14 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
ExpectedKind::Int
},
)?;
if self.reset_provenance_and_padding {
self.ecx.clear_provenance(value)?;
self.add_data_range_place(value);
}
Ok(true)
}
ty::RawPtr(..) => {
let place =
self.ecx.ref_to_mplace(&self.read_immediate(value, ExpectedKind::RawPtr)?)?;
let place = self.deref_pointer(value, ExpectedKind::RawPtr)?;
if place.layout.is_unsized() {
self.check_wide_ptr_meta(place.meta(), place.layout)?;
}
@ -617,11 +726,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
Ok(true)
}
ty::FnPtr(..) => {
let value = self.read_scalar(value, ExpectedKind::FnPtr)?;
let scalar = self.read_scalar(value, ExpectedKind::FnPtr)?;
// If we check references recursively, also check that this points to a function.
if let Some(_) = self.ref_tracking {
let ptr = value.to_pointer(self.ecx)?;
let ptr = scalar.to_pointer(self.ecx)?;
let _fn = try_validation!(
self.ecx.get_ptr_fn(ptr),
self.path,
@ -631,10 +740,18 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
// FIXME: Check if the signature matches
} else {
// Otherwise (for standalone Miri), we have to still check it to be non-null.
if self.ecx.scalar_may_be_null(value)? {
if self.ecx.scalar_may_be_null(scalar)? {
throw_validation_failure!(self.path, NullFnPtr);
}
}
if self.reset_provenance_and_padding {
// Make sure we do not preserve partial provenance. This matches the thin
// pointer handling in `deref_pointer`.
if matches!(scalar, Scalar::Int(..)) {
self.ecx.clear_provenance(value)?;
}
self.add_data_range_place(value);
}
Ok(true)
}
ty::Never => throw_validation_failure!(self.path, NeverVal),
@ -716,13 +833,178 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValidityVisitor<'rt, 'tcx, M> {
}
}
fn in_mutable_memory(&self, op: &OpTy<'tcx, M::Provenance>) -> bool {
if let Some(mplace) = op.as_mplace_or_imm().left() {
fn in_mutable_memory(&self, val: &PlaceTy<'tcx, M::Provenance>) -> bool {
if let Some(mplace) = val.as_mplace_or_local().left() {
if let Some(alloc_id) = mplace.ptr().provenance.and_then(|p| p.get_alloc_id()) {
return mutability(self.ecx, alloc_id).is_mut();
mutability(self.ecx, alloc_id).is_mut()
} else {
// No memory at all.
false
}
} else {
// A local variable -- definitely mutable.
true
}
}
/// Add the given pointer-length pair to the "data" range of this visit.
fn add_data_range(&mut self, ptr: Pointer<Option<M::Provenance>>, size: Size) {
if let Some(data_bytes) = self.data_bytes.as_mut() {
// We only have to store the offset, the rest is the same for all pointers here.
let (_prov, offset) = ptr.into_parts();
// Add this.
data_bytes.add_range(offset, size);
};
}
/// Add the entire given place to the "data" range of this visit.
fn add_data_range_place(&mut self, place: &PlaceTy<'tcx, M::Provenance>) {
// Only sized places can be added this way.
debug_assert!(place.layout.abi.is_sized());
if let Some(data_bytes) = self.data_bytes.as_mut() {
let offset = Self::data_range_offset(self.ecx, place);
data_bytes.add_range(offset, place.layout.size);
}
}
/// Convert a place into the offset it starts at, for the purpose of data_range tracking.
/// Must only be called if `data_bytes` is `Some(_)`.
fn data_range_offset(ecx: &InterpCx<'tcx, M>, place: &PlaceTy<'tcx, M::Provenance>) -> Size {
// The presence of `data_bytes` implies that our place is in memory.
let ptr = ecx
.place_to_op(place)
.expect("place must be in memory")
.as_mplace_or_imm()
.expect_left("place must be in memory")
.ptr();
let (_prov, offset) = ptr.into_parts();
offset
}
fn reset_padding(&mut self, place: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
let Some(data_bytes) = self.data_bytes.as_mut() else { return Ok(()) };
// Our value must be in memory, otherwise we would not have set up `data_bytes`.
let mplace = self.ecx.force_allocation(place)?;
// Determine starting offset and size.
let (_prov, start_offset) = mplace.ptr().into_parts();
let (size, _align) = self
.ecx
.size_and_align_of_mplace(&mplace)?
.unwrap_or((mplace.layout.size, mplace.layout.align.abi));
// If there is no padding at all, we can skip the rest: check for
// a single data range covering the entire value.
if data_bytes.0 == &[(start_offset, size)] {
return Ok(());
}
// Get a handle for the allocation. Do this only once, to avoid looking up the same
// allocation over and over again. (Though to be fair, iterating the value already does
// exactly that.)
let Some(mut alloc) = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)? else {
// A ZST, no padding to clear.
return Ok(());
};
// Add a "finalizer" data range at the end, so that the iteration below finds all gaps
// between ranges.
data_bytes.0.push((start_offset + size, Size::ZERO));
// Iterate, and reset gaps.
let mut padding_cleared_until = start_offset;
for &(offset, size) in data_bytes.0.iter() {
assert!(
offset >= padding_cleared_until,
"reset_padding on {}: previous field ended at offset {}, next field starts at {} (and has a size of {} bytes)",
mplace.layout.ty,
(padding_cleared_until - start_offset).bytes(),
(offset - start_offset).bytes(),
size.bytes(),
);
if offset > padding_cleared_until {
// We found padding. Adjust the range to be relative to `alloc`, and make it uninit.
let padding_start = padding_cleared_until - start_offset;
let padding_size = offset - padding_cleared_until;
let range = alloc_range(padding_start, padding_size);
trace!("reset_padding on {}: resetting padding range {range:?}", mplace.layout.ty);
alloc.write_uninit(range)?;
}
padding_cleared_until = offset + size;
}
assert!(padding_cleared_until == start_offset + size);
Ok(())
}
/// Computes the data range of this union type:
/// which bytes are inside a field (i.e., not padding.)
fn union_data_range<'e>(
ecx: &'e mut InterpCx<'tcx, M>,
layout: TyAndLayout<'tcx>,
) -> Cow<'e, RangeSet> {
assert!(layout.ty.is_union());
assert!(layout.abi.is_sized(), "there are no unsized unions");
let layout_cx = LayoutCx { tcx: *ecx.tcx, param_env: ecx.param_env };
return M::cached_union_data_range(ecx, layout.ty, || {
let mut out = RangeSet(Vec::new());
union_data_range_uncached(&layout_cx, layout, Size::ZERO, &mut out);
out
});
/// Helper for recursive traversal: add data ranges of the given type to `out`.
fn union_data_range_uncached<'tcx>(
cx: &LayoutCx<'tcx, TyCtxt<'tcx>>,
layout: TyAndLayout<'tcx>,
base_offset: Size,
out: &mut RangeSet,
) {
// If this is a ZST, we don't contain any data. In particular, this helps us to quickly
// skip over huge arrays of ZST.
if layout.is_zst() {
return;
}
// Just recursively add all the fields of everything to the output.
match &layout.fields {
FieldsShape::Primitive => {
out.add_range(base_offset, layout.size);
}
&FieldsShape::Union(fields) => {
// Currently, all fields start at offset 0 (relative to `base_offset`).
for field in 0..fields.get() {
let field = layout.field(cx, field);
union_data_range_uncached(cx, field, base_offset, out);
}
}
&FieldsShape::Array { stride, count } => {
let elem = layout.field(cx, 0);
// Fast-path for large arrays of simple types that do not contain any padding.
if elem.abi.is_scalar() {
out.add_range(base_offset, elem.size * count);
} else {
for idx in 0..count {
// This repeats the same computation for every array element... but the alternative
// is to allocate temporary storage for a dedicated `out` set for the array element,
// and replicating that N times. Is that better?
union_data_range_uncached(cx, elem, base_offset + idx * stride, out);
}
}
}
FieldsShape::Arbitrary { offsets, .. } => {
for (field, &offset) in offsets.iter_enumerated() {
let field = layout.field(cx, field.as_usize());
union_data_range_uncached(cx, field, base_offset + offset, out);
}
}
}
// Don't forget potential other variants.
match &layout.variants {
Variants::Single { .. } => {
// Fully handled above.
}
Variants::Multiple { variants, .. } => {
for variant in variants.indices() {
let variant = layout.for_variant(cx, variant);
union_data_range_uncached(cx, variant, base_offset, out);
}
}
}
}
false
}
}
@ -774,7 +1056,7 @@ fn mutability<'tcx>(ecx: &InterpCx<'tcx, impl Machine<'tcx>>, alloc_id: AllocId)
}
impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt, 'tcx, M> {
type V = OpTy<'tcx, M::Provenance>;
type V = PlaceTy<'tcx, M::Provenance>;
#[inline(always)]
fn ecx(&self) -> &InterpCx<'tcx, M> {
@ -783,11 +1065,11 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
fn read_discriminant(
&mut self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx, VariantIdx> {
self.with_elem(PathElem::EnumTag, move |this| {
Ok(try_validation!(
this.ecx.read_discriminant(op),
this.ecx.read_discriminant(val),
this.path,
Ub(InvalidTag(val)) => InvalidEnumTag {
value: format!("{val:x}"),
@ -802,44 +1084,54 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
#[inline]
fn visit_field(
&mut self,
old_op: &OpTy<'tcx, M::Provenance>,
old_val: &PlaceTy<'tcx, M::Provenance>,
field: usize,
new_op: &OpTy<'tcx, M::Provenance>,
new_val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
let elem = self.aggregate_field_path_elem(old_op.layout, field);
self.with_elem(elem, move |this| this.visit_value(new_op))
let elem = self.aggregate_field_path_elem(old_val.layout, field);
self.with_elem(elem, move |this| this.visit_value(new_val))
}
#[inline]
fn visit_variant(
&mut self,
old_op: &OpTy<'tcx, M::Provenance>,
old_val: &PlaceTy<'tcx, M::Provenance>,
variant_id: VariantIdx,
new_op: &OpTy<'tcx, M::Provenance>,
new_val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
let name = match old_op.layout.ty.kind() {
let name = match old_val.layout.ty.kind() {
ty::Adt(adt, _) => PathElem::Variant(adt.variant(variant_id).name),
// Coroutines also have variants
ty::Coroutine(..) => PathElem::CoroutineState(variant_id),
_ => bug!("Unexpected type with variant: {:?}", old_op.layout.ty),
_ => bug!("Unexpected type with variant: {:?}", old_val.layout.ty),
};
self.with_elem(name, move |this| this.visit_value(new_op))
self.with_elem(name, move |this| this.visit_value(new_val))
}
#[inline(always)]
fn visit_union(
&mut self,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
_fields: NonZero<usize>,
) -> InterpResult<'tcx> {
// Special check for CTFE validation, preventing `UnsafeCell` inside unions in immutable memory.
if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) {
if !op.layout.is_zst() && !op.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) {
if !self.in_mutable_memory(op) {
if !val.layout.is_zst() && !val.layout.ty.is_freeze(*self.ecx.tcx, self.ecx.param_env) {
if !self.in_mutable_memory(val) {
throw_validation_failure!(self.path, UnsafeCellInImmutable);
}
}
}
if self.reset_provenance_and_padding
&& let Some(data_bytes) = self.data_bytes.as_mut()
{
let base_offset = Self::data_range_offset(self.ecx, val);
// Determine and add data range for this union.
let union_data_range = Self::union_data_range(self.ecx, val.layout);
for &(offset, size) in union_data_range.0.iter() {
data_bytes.add_range(base_offset + offset, size);
}
}
Ok(())
}
@ -847,39 +1139,41 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
fn visit_box(
&mut self,
_box_ty: Ty<'tcx>,
op: &OpTy<'tcx, M::Provenance>,
val: &PlaceTy<'tcx, M::Provenance>,
) -> InterpResult<'tcx> {
self.check_safe_pointer(op, PointerKind::Box)?;
self.check_safe_pointer(val, PointerKind::Box)?;
Ok(())
}
#[inline]
fn visit_value(&mut self, op: &OpTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
trace!("visit_value: {:?}, {:?}", *op, op.layout);
fn visit_value(&mut self, val: &PlaceTy<'tcx, M::Provenance>) -> InterpResult<'tcx> {
trace!("visit_value: {:?}, {:?}", *val, val.layout);
// Check primitive types -- the leaves of our recursive descent.
// This is called even for enum discriminants (which are "fields" of their enum),
// so for integer-typed discriminants the provenance reset will happen here.
// We assume that the Scalar validity range does not restrict these values
// any further than `try_visit_primitive` does!
if self.try_visit_primitive(op)? {
if self.try_visit_primitive(val)? {
return Ok(());
}
// Special check preventing `UnsafeCell` in the inner part of constants
if self.ctfe_mode.is_some_and(|c| !c.allow_immutable_unsafe_cell()) {
if !op.layout.is_zst()
&& let Some(def) = op.layout.ty.ty_adt_def()
if !val.layout.is_zst()
&& let Some(def) = val.layout.ty.ty_adt_def()
&& def.is_unsafe_cell()
{
if !self.in_mutable_memory(op) {
if !self.in_mutable_memory(val) {
throw_validation_failure!(self.path, UnsafeCellInImmutable);
}
}
}
// Recursively walk the value at its type. Apply optimizations for some large types.
match op.layout.ty.kind() {
match val.layout.ty.kind() {
ty::Str => {
let mplace = op.assert_mem_place(); // strings are unsized and hence never immediate
let mplace = val.assert_mem_place(); // strings are unsized and hence never immediate
let len = mplace.len(self.ecx)?;
try_validation!(
self.ecx.read_bytes_ptr_strip_provenance(mplace.ptr(), Size::from_bytes(len)),
@ -889,11 +1183,10 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
);
}
ty::Array(tys, ..) | ty::Slice(tys)
// This optimization applies for types that can hold arbitrary bytes (such as
// integer and floating point types) or for structs or tuples with no fields.
// FIXME(wesleywiser) This logic could be extended further to arbitrary structs
// or tuples made up of integer/floating point types or inhabited ZSTs with no
// padding.
// This optimization applies for types that can hold arbitrary non-provenance bytes (such as
// integer and floating point types).
// FIXME(wesleywiser) This logic could be extended further to arbitrary structs or
// tuples made up of integer/floating point types or inhabited ZSTs with no padding.
if matches!(tys.kind(), ty::Int(..) | ty::Uint(..) | ty::Float(..))
=>
{
@ -901,18 +1194,19 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
// Optimized handling for arrays of integer/float type.
// This is the length of the array/slice.
let len = op.len(self.ecx)?;
let len = val.len(self.ecx)?;
// This is the element type size.
let layout = self.ecx.layout_of(*tys)?;
// This is the size in bytes of the whole array. (This checks for overflow.)
let size = layout.size * len;
// If the size is 0, there is nothing to check.
// (`size` can only be 0 of `len` is 0, and empty arrays are always valid.)
// (`size` can only be 0 if `len` is 0, and empty arrays are always valid.)
if size == Size::ZERO {
return Ok(());
}
// Now that we definitely have a non-ZST array, we know it lives in memory.
let mplace = match op.as_mplace_or_imm() {
// Now that we definitely have a non-ZST array, we know it lives in memory -- except it may
// be an uninitialized local variable, those are also "immediate".
let mplace = match val.to_op(self.ecx)?.as_mplace_or_imm() {
Left(mplace) => mplace,
Right(imm) => match *imm {
Immediate::Uninit =>
@ -958,20 +1252,30 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
}
}
}
// Don't forget that these are all non-pointer types, and thus do not preserve
// provenance.
if self.reset_provenance_and_padding {
// We can't share this with above as above, we might be looking at read-only memory.
let mut alloc = self.ecx.get_ptr_alloc_mut(mplace.ptr(), size)?.expect("we already excluded size 0");
alloc.clear_provenance()?;
// Also, mark this as containing data, not padding.
self.add_data_range(mplace.ptr(), size);
}
}
// Fast path for arrays and slices of ZSTs. We only need to check a single ZST element
// of an array and not all of them, because there's only a single value of a specific
// ZST type, so either validation fails for all elements or none.
ty::Array(tys, ..) | ty::Slice(tys) if self.ecx.layout_of(*tys)?.is_zst() => {
// Validate just the first element (if any).
if op.len(self.ecx)? > 0 {
self.visit_field(op, 0, &self.ecx.project_index(op, 0)?)?;
if val.len(self.ecx)? > 0 {
self.visit_field(val, 0, &self.ecx.project_index(val, 0)?)?;
}
}
_ => {
// default handler
try_validation!(
self.walk_value(op),
self.walk_value(val),
self.path,
// It's not great to catch errors here, since we can't give a very good path,
// but it's better than ICEing.
@ -992,15 +1296,15 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
// FIXME: We could avoid some redundant checks here. For newtypes wrapping
// scalars, we do the same check on every "level" (e.g., first we check
// MyNewtype and then the scalar in there).
match op.layout.abi {
match val.layout.abi {
Abi::Uninhabited => {
let ty = op.layout.ty;
let ty = val.layout.ty;
throw_validation_failure!(self.path, UninhabitedVal { ty });
}
Abi::Scalar(scalar_layout) => {
if !scalar_layout.is_uninit_valid() {
// There is something to check here.
let scalar = self.read_scalar(op, ExpectedKind::InitScalar)?;
let scalar = self.read_scalar(val, ExpectedKind::InitScalar)?;
self.visit_scalar(scalar, scalar_layout)?;
}
}
@ -1010,7 +1314,7 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
// the other must be init.
if !a_layout.is_uninit_valid() && !b_layout.is_uninit_valid() {
let (a, b) =
self.read_immediate(op, ExpectedKind::InitScalar)?.to_scalar_pair();
self.read_immediate(val, ExpectedKind::InitScalar)?.to_scalar_pair();
self.visit_scalar(a, a_layout)?;
self.visit_scalar(b, b_layout)?;
}
@ -1031,19 +1335,34 @@ impl<'rt, 'tcx, M: Machine<'tcx>> ValueVisitor<'tcx, M> for ValidityVisitor<'rt,
impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
fn validate_operand_internal(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
path: Vec<PathElem>,
ref_tracking: Option<&mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>>,
ctfe_mode: Option<CtfeValidationMode>,
reset_provenance_and_padding: bool,
) -> InterpResult<'tcx> {
trace!("validate_operand_internal: {:?}, {:?}", *op, op.layout.ty);
trace!("validate_operand_internal: {:?}, {:?}", *val, val.layout.ty);
// Construct a visitor
let mut visitor = ValidityVisitor { path, ref_tracking, ctfe_mode, ecx: self };
// Run it.
match self.run_for_validation(|| visitor.visit_value(op)) {
// Run the visitor.
match self.run_for_validation(|ecx| {
let reset_padding = reset_provenance_and_padding && {
// Check if `val` is actually stored in memory. If not, padding is not even
// represented and we need not reset it.
ecx.place_to_op(val)?.as_mplace_or_imm().is_left()
};
let mut v = ValidityVisitor {
path,
ref_tracking,
ctfe_mode,
ecx,
reset_provenance_and_padding,
data_bytes: reset_padding.then_some(RangeSet(Vec::new())),
};
v.visit_value(val)?;
v.reset_padding(val)?;
InterpResult::Ok(())
}) {
Ok(()) => Ok(()),
// Pass through validation failures and "invalid program" issues.
Err(err)
@ -1079,13 +1398,19 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// - no `UnsafeCell` or non-ZST `&mut`.
#[inline(always)]
pub(crate) fn const_validate_operand(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
path: Vec<PathElem>,
ref_tracking: &mut RefTracking<MPlaceTy<'tcx, M::Provenance>, Vec<PathElem>>,
ctfe_mode: CtfeValidationMode,
) -> InterpResult<'tcx> {
self.validate_operand_internal(op, path, Some(ref_tracking), Some(ctfe_mode))
self.validate_operand_internal(
val,
path,
Some(ref_tracking),
Some(ctfe_mode),
/*reset_provenance*/ false,
)
}
/// This function checks the data at `op` to be runtime-valid.
@ -1093,21 +1418,41 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
/// It will error if the bits at the destination do not match the ones described by the layout.
#[inline(always)]
pub fn validate_operand(
&self,
op: &OpTy<'tcx, M::Provenance>,
&mut self,
val: &PlaceTy<'tcx, M::Provenance>,
recursive: bool,
reset_provenance_and_padding: bool,
) -> InterpResult<'tcx> {
// Note that we *could* actually be in CTFE here with `-Zextra-const-ub-checks`, but it's
// still correct to not use `ctfe_mode`: that mode is for validation of the final constant
// value, it rules out things like `UnsafeCell` in awkward places.
if !recursive {
return self.validate_operand_internal(op, vec![], None, None);
return self.validate_operand_internal(
val,
vec![],
None,
None,
reset_provenance_and_padding,
);
}
// Do a recursive check.
let mut ref_tracking = RefTracking::empty();
self.validate_operand_internal(op, vec![], Some(&mut ref_tracking), None)?;
self.validate_operand_internal(
val,
vec![],
Some(&mut ref_tracking),
None,
reset_provenance_and_padding,
)?;
while let Some((mplace, path)) = ref_tracking.todo.pop() {
self.validate_operand_internal(&mplace.into(), path, Some(&mut ref_tracking), None)?;
// Things behind reference do *not* have the provenance reset.
self.validate_operand_internal(
&mplace.into(),
path,
Some(&mut ref_tracking),
None,
/*reset_provenance_and_padding*/ false,
)?;
}
Ok(())
}

View File

@ -5,6 +5,7 @@ use std::num::NonZero;
use rustc_index::IndexVec;
use rustc_middle::mir::interpret::InterpResult;
use rustc_middle::ty::layout::LayoutOf;
use rustc_middle::ty::{self, Ty};
use rustc_target::abi::{FieldIdx, FieldsShape, VariantIdx, Variants};
use tracing::trace;
@ -82,6 +83,7 @@ pub trait ValueVisitor<'tcx, M: Machine<'tcx>>: Sized {
self.visit_value(new_val)
}
/// Traversal logic; should not be overloaded.
fn walk_value(&mut self, v: &Self::V) -> InterpResult<'tcx> {
let ty = v.layout().ty;
trace!("walk_value: type: {ty}");
@ -104,6 +106,17 @@ pub trait ValueVisitor<'tcx, M: Machine<'tcx>>: Sized {
// DynStar types. Very different from a dyn type (but strangely part of the
// same variant in `TyKind`): These are pairs where the 2nd component is the
// vtable, and the first component is the data (which must be ptr-sized).
// First make sure the vtable can be read at its type.
// The type of this vtable is fake, it claims to be a reference to some actual memory but that isn't true.
// So we transmute it to a raw pointer.
let raw_ptr_ty = Ty::new_mut_ptr(*self.ecx().tcx, self.ecx().tcx.types.unit);
let raw_ptr_ty = self.ecx().layout_of(raw_ptr_ty)?;
let vtable_field =
self.ecx().project_field(v, 1)?.transmute(raw_ptr_ty, self.ecx())?;
self.visit_field(v, 1, &vtable_field)?;
// Then unpack the first field, and continue.
let data = self.ecx().unpack_dyn_star(v, data)?;
return self.visit_field(v, 0, &data);
}

View File

@ -4,7 +4,7 @@ use rustc_middle::ty::{ParamEnvAnd, Ty, TyCtxt};
use rustc_target::abi::{Abi, FieldsShape, Scalar, Variants};
use crate::const_eval::{CanAccessMutGlobal, CheckAlignment, CompileTimeMachine};
use crate::interpret::{InterpCx, MemoryKind, OpTy};
use crate::interpret::{InterpCx, MemoryKind};
/// Determines if this type permits "raw" initialization by just transmuting some memory into an
/// instance of `T`.
@ -32,15 +32,15 @@ pub fn check_validity_requirement<'tcx>(
let layout_cx = LayoutCx { tcx, param_env: param_env_and_ty.param_env };
if kind == ValidityRequirement::Uninit || tcx.sess.opts.unstable_opts.strict_init_checks {
might_permit_raw_init_strict(layout, &layout_cx, kind)
check_validity_requirement_strict(layout, &layout_cx, kind)
} else {
might_permit_raw_init_lax(layout, &layout_cx, kind)
check_validity_requirement_lax(layout, &layout_cx, kind)
}
}
/// Implements the 'strict' version of the `might_permit_raw_init` checks; see that function for
/// details.
fn might_permit_raw_init_strict<'tcx>(
/// Implements the 'strict' version of the [`check_validity_requirement`] checks; see that function
/// for details.
fn check_validity_requirement_strict<'tcx>(
ty: TyAndLayout<'tcx>,
cx: &LayoutCx<'tcx, TyCtxt<'tcx>>,
kind: ValidityRequirement,
@ -61,18 +61,24 @@ fn might_permit_raw_init_strict<'tcx>(
.expect("failed to write bytes for zero valid check");
}
let ot: OpTy<'_, _> = allocated.into();
// Assume that if it failed, it's a validation failure.
// This does *not* actually check that references are dereferenceable, but since all types that
// require dereferenceability also require non-null, we don't actually get any false negatives
// due to this.
Ok(cx.validate_operand(&ot, /*recursive*/ false).is_ok())
// The value we are validating is temporary and discarded at the end of this function, so
// there is no point in reseting provenance and padding.
Ok(cx
.validate_operand(
&allocated.into(),
/*recursive*/ false,
/*reset_provenance_and_padding*/ false,
)
.is_ok())
}
/// Implements the 'lax' (default) version of the `might_permit_raw_init` checks; see that function for
/// details.
fn might_permit_raw_init_lax<'tcx>(
/// Implements the 'lax' (default) version of the [`check_validity_requirement`] checks; see that
/// function for details.
fn check_validity_requirement_lax<'tcx>(
this: TyAndLayout<'tcx>,
cx: &LayoutCx<'tcx, TyCtxt<'tcx>>,
init_kind: ValidityRequirement,
@ -137,7 +143,7 @@ fn might_permit_raw_init_lax<'tcx>(
}
FieldsShape::Arbitrary { offsets, .. } => {
for idx in 0..offsets.len() {
if !might_permit_raw_init_lax(this.field(cx, idx), cx, init_kind)? {
if !check_validity_requirement_lax(this.field(cx, idx), cx, init_kind)? {
// We found a field that is unhappy with this kind of initialization.
return Ok(false);
}

View File

@ -17,7 +17,7 @@ mod tests;
/// first value of the following element.
#[derive(Debug, Clone)]
pub struct IntervalSet<I> {
// Start, end
// Start, end (both inclusive)
map: SmallVec<[(u32, u32); 2]>,
domain: usize,
_data: PhantomData<I>,

View File

@ -644,6 +644,12 @@ impl<Prov: Provenance, Extra, Bytes: AllocBytes> Allocation<Prov, Extra, Bytes>
return Ok(());
}
/// Remove all provenance in the given memory range.
pub fn clear_provenance(&mut self, cx: &impl HasDataLayout, range: AllocRange) -> AllocResult {
self.provenance.clear(range, cx)?;
return Ok(());
}
/// Applies a previously prepared provenance copy.
/// The affected range, as defined in the parameters to `provenance().prepare_copy` is expected
/// to be clear of provenance.

View File

@ -307,6 +307,13 @@ impl<'tcx, Prov: Provenance> Scalar<Prov> {
}
}
pub fn clear_provenance(&mut self) -> InterpResult<'tcx> {
if matches!(self, Scalar::Ptr(..)) {
*self = self.to_scalar_int()?.into();
}
Ok(())
}
#[inline(always)]
pub fn to_scalar_int(self) -> InterpResult<'tcx, ScalarInt> {
self.try_to_scalar_int().map_err(|_| err_unsup!(ReadPointerAsInt(None)).into())

View File

@ -1136,6 +1136,7 @@ impl<'tcx> Ty<'tcx> {
}
/// Tests if this is any kind of primitive pointer type (reference, raw pointer, fn pointer).
/// `Box` is *not* considered a pointer here!
#[inline]
pub fn is_any_ptr(self) -> bool {
self.is_ref() || self.is_unsafe_ptr() || self.is_fn_ptr()

View File

@ -773,15 +773,20 @@ fn offset_of_addr() {
#[test]
fn const_maybe_uninit_zeroed() {
// Sanity check for `MaybeUninit::zeroed` in a realistic const situation (plugin array term)
// It is crucial that this type has no padding!
#[repr(C)]
struct Foo {
a: Option<&'static str>,
a: Option<&'static u8>,
b: Bar,
c: f32,
_pad: u32,
d: *const u8,
}
#[repr(C)]
struct Bar(usize);
struct FooPtr(*const Foo);
unsafe impl Sync for FooPtr {}

View File

@ -637,7 +637,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
// The program didn't actually do a read, so suppress the memory access hooks.
// This is also a very special exception where we just ignore an error -- if this read
// was UB e.g. because the memory is uninitialized, we don't want to know!
let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
let old_val = this.run_for_validation(|this| this.read_scalar(dest)).ok();
this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
this.validate_atomic_store(dest, atomic)?;
this.buffered_atomic_write(val, dest, atomic, old_val)

View File

@ -869,7 +869,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Dereference a pointer operand to a place using `layout` instead of the pointer's declared type
fn deref_pointer_as(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
layout: TyAndLayout<'tcx>,
) -> InterpResult<'tcx, MPlaceTy<'tcx>> {
let this = self.eval_context_ref();
@ -880,7 +880,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Calculates the MPlaceTy given the offset and layout of an access on an operand
fn deref_pointer_and_offset(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
base_layout: TyAndLayout<'tcx>,
value_layout: TyAndLayout<'tcx>,
@ -897,7 +897,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn deref_pointer_and_read(
&self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
base_layout: TyAndLayout<'tcx>,
value_layout: TyAndLayout<'tcx>,
@ -909,7 +909,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn deref_pointer_and_write(
&mut self,
op: &impl Readable<'tcx, Provenance>,
op: &impl Projectable<'tcx, Provenance>,
offset: u64,
value: impl Into<Scalar>,
base_layout: TyAndLayout<'tcx>,

View File

@ -152,8 +152,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// ```
// Would not be considered UB, or the other way around (`is_val_statically_known(0)`).
"is_val_statically_known" => {
let [arg] = check_arg_count(args)?;
this.validate_operand(arg, /*recursive*/ false)?;
let [_arg] = check_arg_count(args)?;
// FIXME: should we check for validity here? It's tricky because we do not have a
// place. Codegen does not seem to set any attributes like `noundef` for intrinsic
// calls, so we don't *have* to do anything.
let branch: bool = this.machine.rng.get_mut().gen();
this.write_scalar(Scalar::from_bool(branch), dest)?;
}

View File

@ -572,6 +572,9 @@ pub struct MiriMachine<'tcx> {
/// Invariant: the promised alignment will never be less than the native alignment of the
/// allocation.
pub(crate) symbolic_alignment: RefCell<FxHashMap<AllocId, (Size, Align)>>,
/// A cache of "data range" computations for unions (i.e., the offsets of non-padding bytes).
union_data_ranges: FxHashMap<Ty<'tcx>, RangeSet>,
}
impl<'tcx> MiriMachine<'tcx> {
@ -714,6 +717,7 @@ impl<'tcx> MiriMachine<'tcx> {
allocation_spans: RefCell::new(FxHashMap::default()),
const_cache: RefCell::new(FxHashMap::default()),
symbolic_alignment: RefCell::new(FxHashMap::default()),
union_data_ranges: FxHashMap::default(),
}
}
@ -826,6 +830,7 @@ impl VisitProvenance for MiriMachine<'_> {
allocation_spans: _,
const_cache: _,
symbolic_alignment: _,
union_data_ranges: _,
} = self;
threads.visit_provenance(visit);
@ -1627,4 +1632,12 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> {
ecx.machine.rng.borrow_mut().gen::<usize>() % ADDRS_PER_ANON_GLOBAL
}
}
fn cached_union_data_range<'e>(
ecx: &'e mut InterpCx<'tcx, Self>,
ty: Ty<'tcx>,
compute_range: impl FnOnce() -> RangeSet,
) -> Cow<'e, RangeSet> {
Cow::Borrowed(ecx.machine.union_data_ranges.entry(ty).or_insert_with(compute_range))
}
}

View File

@ -0,0 +1,10 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the unoptimized base case.
fn main() {
let ptrs = [(&42, true)];
let ints: [(usize, bool); 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0].0).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance0.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance0.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,10 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the optimized-array case of integer copies.
fn main() {
let ptrs = [&42];
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
let ptr = (&raw const ints[0]).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance1.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance1.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,12 @@
use std::mem;
// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the metadata of a pointer.
fn main() {
let ptrs = [(&42, &42)];
// Typed copy at wide pointer type (with integer-typed metadata).
let ints: [*const [usize]; 1] = unsafe { mem::transmute(ptrs) };
// Get a pointer to the metadata field.
let ptr = (&raw const ints[0]).wrapping_byte_add(mem::size_of::<*const ()>()).cast::<&i32>();
let _val = unsafe { *ptr.read() }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
--> $DIR/int_copy_looses_provenance2.rs:LL:CC
|
LL | let _val = unsafe { *ptr.read() };
| ^^^^^^^^^^ constructing invalid value: encountered a dangling reference ($HEX[noalloc] has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance2.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,29 @@
#![feature(strict_provenance)]
use std::mem;
#[repr(C, usize)]
#[allow(unused)]
enum E {
Var1(usize),
Var2(usize),
}
// Doing a copy at integer type should lose provenance.
// This tests the case where provenacne is hiding in the discriminant of an enum.
fn main() {
assert_eq!(mem::size_of::<E>(), 2*mem::size_of::<usize>());
// We want to store provenance in the enum discriminant, but the value still needs to
// be valid atfor the type. So we split provenance and data.
let ptr = &42;
let ptr = ptr as *const i32;
let ptrs = [(ptr.with_addr(0), ptr)];
// Typed copy at the enum type.
let ints: [E; 1] = unsafe { mem::transmute(ptrs) };
// Read the discriminant.
let discr = unsafe { (&raw const ints[0]).cast::<*const i32>().read() };
// Take the provenance from there, together with the original address.
let ptr = discr.with_addr(ptr.addr());
// There should be no provenance is `discr`, so this should be UB.
let _val = unsafe { *ptr }; //~ERROR: dangling
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/int_copy_looses_provenance3.rs:LL:CC
|
LL | let _val = unsafe { *ptr };
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/int_copy_looses_provenance3.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,18 @@
fn main() {
let half_ptr = std::mem::size_of::<*const ()>() / 2;
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
unsafe {
// Put a pointer in the middle.
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[*const (); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance0.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,18 @@
fn main() {
let half_ptr = std::mem::size_of::<*const ()>() / 2;
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
unsafe {
// Put a pointer in the middle.
bytes.add(half_ptr).cast::<&i32>().write_unaligned(&42);
// Typed copy of the entire thing as two *function* pointers, but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[fn(); 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(half_ptr).cast::<*const i32>().read_unaligned();
// Dereferencing this should fail as the copy has removed the provenance.
let _val = *ptr; //~ERROR: dangling
}
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
|
LL | let _val = *ptr;
| ^^^^ memory access failed: expected a pointer to 4 bytes of memory, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/ptr_copy_loses_partial_provenance1.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,23 @@
use std::mem;
// We have three fields to avoid the ScalarPair optimization.
#[allow(unused)]
enum E {
None,
Some(&'static (), &'static (), usize),
}
fn main() { unsafe {
let mut p: mem::MaybeUninit<E> = mem::MaybeUninit::zeroed();
// The copy when `E` is returned from `transmute` should destroy padding
// (even when we use `write_unaligned`, which under the hood uses an untyped copy).
p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize, 0usize)));
// This is a `None`, so everything but the discriminant is padding.
assert!(matches!(*p.as_ptr(), E::None));
// Turns out the discriminant is (currently) stored
// in the 2nd pointer, so the first half is padding.
let c = &p as *const _ as *const u8;
let _val = *c.add(0); // Get a padding byte.
//~^ERROR: uninitialized
} }

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-enum.rs:LL:CC
|
LL | let _val = *c.add(0); // Get a padding byte.
| ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-enum.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,25 @@
#![feature(core_intrinsics)]
use std::mem::{self, MaybeUninit};
fn main() {
// This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not.
// Ensure that these 9 bytes are indeed initialized, and the rest is indeed not.
// This should be the case even if we write into previously initialized storage.
let mut x: MaybeUninit<Box<[u8]>> = MaybeUninit::zeroed();
let z = std::intrinsics::add_with_overflow(0usize, 0usize);
unsafe { x.as_mut_ptr().cast::<(usize, bool)>().write(z) };
// Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by
// (`ptr_size - 1`) undef bytes (the padding after the bool) in there.
let z: *const u8 = &x as *const _ as *const _;
let first_undef = mem::size_of::<usize>() as isize + 1;
for i in 0..first_undef {
let byte = unsafe { *z.offset(i) };
assert_eq!(byte, 0);
}
let v = unsafe { *z.offset(first_undef) };
//~^ ERROR: uninitialized
if v == 0 {
println!("it is zero");
}
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-pair.rs:LL:CC
|
LL | let v = unsafe { *z.offset(first_undef) };
| ^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-pair.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,32 @@
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct Foo {
val16: u16,
// Padding bytes go here!
val32: u32,
}
#[repr(C)]
#[derive(Debug, Copy, Clone)]
struct Bar {
bytes: [u8; 8],
}
#[repr(C)]
union FooBar {
foo: Foo,
bar: Bar,
}
pub fn main() {
// Initialize as u8 to ensure padding bytes are zeroed.
let mut foobar = FooBar { bar: Bar { bytes: [0u8; 8] } };
// Reading either field is ok.
let _val = unsafe { (foobar.foo, foobar.bar) };
// Does this assignment copy the uninitialized padding bytes
// over the initialized padding bytes? miri doesn't seem to think so.
foobar.foo = Foo { val16: 1, val32: 2 };
// This resets the padding to uninit.
let _val = unsafe { (foobar.foo, foobar.bar) };
//~^ ERROR: uninitialized
}

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer
--> $DIR/padding-struct-in-union.rs:LL:CC
|
LL | let _val = unsafe { (foobar.foo, foobar.bar) };
| ^^^^^^^^^^ constructing invalid value at .bytes[2]: encountered uninitialized memory, but expected an integer
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-struct-in-union.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,11 @@
use std::mem;
#[repr(C)]
struct Pair(u8, u16);
fn main() { unsafe {
let p: Pair = mem::transmute(0u32); // The copy when `Pair` is returned from `transmute` should destroy padding.
let c = &p as *const _ as *const u8;
let _val = *c.add(1); // Get the padding byte.
//~^ERROR: uninitialized
} }

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-struct.rs:LL:CC
|
LL | let _val = *c.add(1); // Get the padding byte.
| ^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-struct.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,14 @@
use std::mem;
#[allow(unused)]
#[repr(C)]
union U {
field: (u8, u16),
}
fn main() { unsafe {
let p: U = mem::transmute(0u32); // The copy when `U` is returned from `transmute` should destroy padding.
let c = &p as *const _ as *const [u8; 4];
let _val = *c; // Read the entire thing, definitely contains the padding byte.
//~^ERROR: uninitialized
} }

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: constructing invalid value at [1]: encountered uninitialized memory, but expected an integer
--> $DIR/padding-union.rs:LL:CC
|
LL | let _val = *c; // Read the entire thing, definitely contains the padding byte.
| ^^ constructing invalid value at [1]: encountered uninitialized memory, but expected an integer
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-union.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -0,0 +1,18 @@
use std::mem;
// If this is `None`, the metadata becomes padding.
type T = Option<&'static str>;
fn main() { unsafe {
let mut p: mem::MaybeUninit<T> = mem::MaybeUninit::zeroed();
// The copy when `T` is returned from `transmute` should destroy padding
// (even when we use `write_unaligned`, which under the hood uses an untyped copy).
p.as_mut_ptr().write_unaligned(mem::transmute((0usize, 0usize)));
// Null epresents `None`.
assert!(matches!(*p.as_ptr(), None));
// The second part, with the length, becomes padding.
let c = &p as *const _ as *const u8;
let _val = *c.add(mem::size_of::<*const u8>()); // Get a padding byte.
//~^ERROR: uninitialized
} }

View File

@ -0,0 +1,15 @@
error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> $DIR/padding-wide-ptr.rs:LL:CC
|
LL | let _val = *c.add(mem::size_of::<*const u8>()); // Get a padding byte.
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `main` at $DIR/padding-wide-ptr.rs:LL:CC
note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
error: aborting due to 1 previous error

View File

@ -1,16 +1,17 @@
#![feature(core_intrinsics)]
use std::mem;
use std::mem::{self, MaybeUninit};
fn main() {
let x: Option<Box<[u8]>> = unsafe {
// This constructs a `(usize, bool)` pair: 9 bytes initialized, the rest not.
// Ensure that these 9 bytes are indeed initialized, and the rest is indeed not.
let x: MaybeUninit<Box<[u8]>> = unsafe {
let z = std::intrinsics::add_with_overflow(0usize, 0usize);
std::mem::transmute::<(usize, bool), Option<Box<[u8]>>>(z)
std::mem::transmute::<(usize, bool), MaybeUninit<Box<[u8]>>>(z)
};
let y = &x;
// Now read this bytewise. There should be (`ptr_size + 1`) def bytes followed by
// (`ptr_size - 1`) undef bytes (the padding after the bool) in there.
let z: *const u8 = y as *const _ as *const _;
let z: *const u8 = &x as *const _ as *const _;
let first_undef = mem::size_of::<usize>() as isize + 1;
for i in 0..first_undef {
let byte = unsafe { *z.offset(i) };

View File

@ -61,6 +61,20 @@ fn debug() {
println!("{:?}", array);
}
fn huge_zst() {
fn id<T>(x: T) -> T { x }
// A "huge" zero-sized array. Make sure we don't loop over it in any part of Miri.
let val = [(); usize::MAX];
id(val); // make a copy
let val = [val; 2];
id(val);
// Also wrap it in a union (which, in particular, hits the logic for computing union padding).
let _copy = std::mem::MaybeUninit::new(val);
}
fn main() {
assert_eq!(empty_array(), []);
assert_eq!(index_unsafe(), 20);
@ -73,4 +87,5 @@ fn main() {
from();
eq();
debug();
huge_zst();
}

View File

@ -132,6 +132,43 @@ fn overaligned_casts() {
assert_eq!(aligned as u8, 0);
}
// This hits a corner case in the logic for clearing padding on typed copies.
fn padding_clear_corner_case() {
#[allow(unused)]
#[derive(Copy, Clone)]
#[repr(C)]
pub struct Decoded {
/// The scaled mantissa.
pub mant: u64,
/// The lower error range.
pub minus: u64,
/// The upper error range.
pub plus: u64,
/// The shared exponent in base 2.
pub exp: i16,
/// True when the error range is inclusive.
///
/// In IEEE 754, this is true when the original mantissa was even.
pub inclusive: bool,
}
#[allow(unused)]
#[derive(Copy, Clone)]
pub enum FullDecoded {
/// Not-a-number.
Nan,
/// Infinities, either positive or negative.
Infinite,
/// Zero, either positive or negative.
Zero,
/// Finite numbers with further decoded fields.
Finite(Decoded),
}
let val = FullDecoded::Finite(Decoded { mant: 0, minus: 0, plus: 0, exp: 0, inclusive: false });
let _val2 = val; // trigger typed copy
}
fn main() {
test(MyEnum::MyEmptyVariant);
test(MyEnum::MyNewtypeVariant(42));
@ -141,4 +178,5 @@ fn main() {
discriminant_overflow();
more_discriminant_overflow();
overaligned_casts();
padding_clear_corner_case();
}

View File

@ -12,6 +12,7 @@ fn main() {
bytewise_custom_memcpy();
bytewise_custom_memcpy_chunked();
int_load_strip_provenance();
maybe_uninit_preserves_partial_provenance();
}
/// Some basic smoke tests for provenance.
@ -145,3 +146,24 @@ fn int_load_strip_provenance() {
let ints: [usize; 1] = unsafe { mem::transmute(ptrs) };
assert_eq!(ptrs[0] as *const _ as usize, ints[0]);
}
fn maybe_uninit_preserves_partial_provenance() {
// This is the same test as ptr_copy_loses_partial_provenance.rs, but using MaybeUninit and thus
// properly preserving partial provenance.
unsafe {
let mut bytes = [1u8; 16];
let bytes = bytes.as_mut_ptr();
// Put a pointer in the middle.
bytes.add(4).cast::<&i32>().write_unaligned(&42);
// Copy the entire thing as two pointers but not perfectly
// overlapping with the pointer we have in there.
let copy = bytes.cast::<[mem::MaybeUninit<*const ()>; 2]>().read_unaligned();
let copy_bytes = copy.as_ptr().cast::<u8>();
// Now go to the middle of the copy and get the pointer back out.
let ptr = copy_bytes.add(4).cast::<*const i32>().read_unaligned();
// And deref this to ensure we get the right value.
let val = *ptr;
assert_eq!(val, 42);
}
}