Rollup merge of #128942 - RalfJung:interpret-weak-memory, r=saethlin

miri weak memory emulation: put previous value into initial store buffer Fixes https://github.com/rust-lang/miri/issues/2164 by doing a read before each atomic write so that we can initialize the store buffer. The read suppresses memory access hooks and UB exceptions, to avoid otherwise influencing the program behavior. If the read fails, we store that as `None` in the store buffer, so that when an atomic read races with the first atomic write to some memory and previously the memory was uninitialized, we can report UB due to reading uninit memory. ``@cbeuw`` this changes a bit the way we initialize the store buffers. Not sure if you still remember all this code, but if you could have a look to make sure this still makes sense, that would be great. :) r? ``@saethlin``
2025-04-28 02:57:37 +00:00 · 2024-08-27 01:46:51 -05:00 · 2024-08-27 01:46:51 -05:00 · 427019e37f
commit 427019e37f
parent e209b05037 8b18c6bdd3
6 changed files with 153 additions and 109 deletions
--- a/compiler/rustc_const_eval/src/interpret/memory.rs
+++ b/compiler/rustc_const_eval/src/interpret/memory.rs
@ -1014,7 +1014,7 @@ impl<'tcx, M: Machine<'tcx>> InterpCx<'tcx, M> {
    ///
    /// We do this so Miri's allocation access tracking does not show the validation
    /// reads as spurious accesses.
-    pub(super) fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
+    pub fn run_for_validation<R>(&self, f: impl FnOnce() -> R) -> R {
        // This deliberately uses `==` on `bool` to follow the pattern
        // `assert!(val.replace(new) == old)`.
        assert!(
--- a/src/tools/miri/src/concurrency/data_race.rs
+++ b/src/tools/miri/src/concurrency/data_race.rs
@ -617,9 +617,10 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
        // the *value* (including the associated provenance if this is an AtomicPtr) at this location.
        // Only metadata on the location itself is used.
        let scalar = this.allow_data_races_ref(move |this| this.read_scalar(place))?;
-        this.buffered_atomic_read(place, atomic, scalar, || {
+        let buffered_scalar = this.buffered_atomic_read(place, atomic, scalar, || {
            this.validate_atomic_load(place, atomic)
-        })
+        })?;
+        Ok(buffered_scalar.ok_or_else(|| err_ub!(InvalidUninitBytes(None)))?)
    }

    /// Perform an atomic write operation at the memory location.
@ -632,14 +633,14 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
        let this = self.eval_context_mut();
        this.atomic_access_check(dest, AtomicAccessType::Store)?;

+        // Read the previous value so we can put it in the store buffer later.
+        // The program didn't actually do a read, so suppress the memory access hooks.
+        // This is also a very special exception where we just ignore an error -- if this read
+        // was UB e.g. because the memory is uninitialized, we don't want to know!
+        let old_val = this.run_for_validation(|| this.read_scalar(dest)).ok();
        this.allow_data_races_mut(move |this| this.write_scalar(val, dest))?;
        this.validate_atomic_store(dest, atomic)?;
-        // FIXME: it's not possible to get the value before write_scalar. A read_scalar will cause
-        // side effects from a read the program did not perform. So we have to initialise
-        // the store buffer with the value currently being written
-        // ONCE this is fixed please remove the hack in buffered_atomic_write() in weak_memory.rs
-        // https://github.com/rust-lang/miri/issues/2164
-        this.buffered_atomic_write(val, dest, atomic, val)
+        this.buffered_atomic_write(val, dest, atomic, old_val)
    }

    /// Perform an atomic RMW operation on a memory location.
@ -768,7 +769,7 @@ pub trait EvalContextExt<'tcx>: MiriInterpCxExt<'tcx> {
            // in the modification order.
            // Since `old` is only a value and not the store element, we need to separately
            // find it in our store buffer and perform load_impl on it.
-            this.perform_read_on_buffered_latest(place, fail, old.to_scalar())?;
+            this.perform_read_on_buffered_latest(place, fail)?;
        }

        // Return the old value.
--- a/src/tools/miri/src/concurrency/weak_memory.rs
+++ b/src/tools/miri/src/concurrency/weak_memory.rs
@ -39,11 +39,10 @@
 //! to attach store buffers to atomic objects. However, Rust follows LLVM in that it only has
 //! 'atomic accesses'. Therefore Miri cannot know when and where atomic 'objects' are being
 //! created or destroyed, to manage its store buffers. Instead, we hence lazily create an
-//! atomic object on the first atomic access to a given region, and we destroy that object
-//! on the next non-atomic or imperfectly overlapping atomic access to that region.
+//! atomic object on the first atomic write to a given region, and we destroy that object
+//! on the next non-atomic or imperfectly overlapping atomic write to that region.
 //! These lazy (de)allocations happen in memory_accessed() on non-atomic accesses, and
-//! get_or_create_store_buffer() on atomic accesses. This mostly works well, but it does
-//! lead to some issues (<https://github.com/rust-lang/miri/issues/2164>).
+//! get_or_create_store_buffer_mut() on atomic writes.
 //!
 //! One consequence of this difference is that safe/sound Rust allows for more operations on atomic locations
 //! than the C++20 atomic API was intended to allow, such as non-atomically accessing
@ -144,11 +143,9 @@ struct StoreElement {

    /// The timestamp of the storing thread when it performed the store
    timestamp: VTimestamp,
-    /// The value of this store
-    // FIXME: this means the store must be fully initialized;
-    // we will have to change this if we want to support atomics on
-    // (partially) uninitialized data.
-    val: Scalar,
+    /// The value of this store. `None` means uninitialized.
+    // FIXME: Currently, we cannot represent partial initialization.
+    val: Option<Scalar>,

    /// Metadata about loads from this store element,
    /// behind a RefCell to keep load op take &self
@ -170,7 +167,7 @@ impl StoreBufferAlloc {

    /// When a non-atomic access happens on a location that has been atomically accessed
    /// before without data race, we can determine that the non-atomic access fully happens
-    /// after all the prior atomic accesses so the location no longer needs to exhibit
+    /// after all the prior atomic writes so the location no longer needs to exhibit
    /// any weak memory behaviours until further atomic accesses.
    pub fn memory_accessed(&self, range: AllocRange, global: &DataRaceState) {
        if !global.ongoing_action_data_race_free() {
@ -192,37 +189,29 @@ impl StoreBufferAlloc {
        }
    }

-    /// Gets a store buffer associated with an atomic object in this allocation,
-    /// or creates one with the specified initial value if no atomic object exists yet.
-    fn get_or_create_store_buffer<'tcx>(
+    /// Gets a store buffer associated with an atomic object in this allocation.
+    /// Returns `None` if there is no store buffer.
+    fn get_store_buffer<'tcx>(
        &self,
        range: AllocRange,
-        init: Scalar,
-    ) -> InterpResult<'tcx, Ref<'_, StoreBuffer>> {
+    ) -> InterpResult<'tcx, Option<Ref<'_, StoreBuffer>>> {
        let access_type = self.store_buffers.borrow().access_type(range);
        let pos = match access_type {
            AccessType::PerfectlyOverlapping(pos) => pos,
-            AccessType::Empty(pos) => {
-                let mut buffers = self.store_buffers.borrow_mut();
-                buffers.insert_at_pos(pos, range, StoreBuffer::new(init));
-                pos
-            }
-            AccessType::ImperfectlyOverlapping(pos_range) => {
-                // Once we reach here we would've already checked that this access is not racy.
-                let mut buffers = self.store_buffers.borrow_mut();
-                buffers.remove_pos_range(pos_range.clone());
-                buffers.insert_at_pos(pos_range.start, range, StoreBuffer::new(init));
-                pos_range.start
-            }
+            // If there is nothing here yet, that means there wasn't an atomic write yet so
+            // we can't return anything outdated.
+            _ => return Ok(None),
        };
-        Ok(Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]))
+        let store_buffer = Ref::map(self.store_buffers.borrow(), |buffer| &buffer[pos]);
+        Ok(Some(store_buffer))
    }

-    /// Gets a mutable store buffer associated with an atomic object in this allocation
+    /// Gets a mutable store buffer associated with an atomic object in this allocation,
+    /// or creates one with the specified initial value if no atomic object exists yet.
    fn get_or_create_store_buffer_mut<'tcx>(
        &mut self,
        range: AllocRange,
-        init: Scalar,
+        init: Option<Scalar>,
    ) -> InterpResult<'tcx, &mut StoreBuffer> {
        let buffers = self.store_buffers.get_mut();
        let access_type = buffers.access_type(range);
@ -244,10 +233,8 @@ impl StoreBufferAlloc {
 }

 impl<'tcx> StoreBuffer {
-    fn new(init: Scalar) -> Self {
+    fn new(init: Option<Scalar>) -> Self {
        let mut buffer = VecDeque::new();
-        buffer.reserve(STORE_BUFFER_LIMIT);
-        let mut ret = Self { buffer };
        let store_elem = StoreElement {
            // The thread index and timestamp of the initialisation write
            // are never meaningfully used, so it's fine to leave them as 0
@ -257,11 +244,11 @@ impl<'tcx> StoreBuffer {
            is_seqcst: false,
            load_info: RefCell::new(LoadInfo::default()),
        };
-        ret.buffer.push_back(store_elem);
-        ret
+        buffer.push_back(store_elem);
+        Self { buffer }
    }

-    /// Reads from the last store in modification order
+    /// Reads from the last store in modification order, if any.
    fn read_from_last_store(
        &self,
        global: &DataRaceState,
@ -282,7 +269,7 @@ impl<'tcx> StoreBuffer {
        is_seqcst: bool,
        rng: &mut (impl rand::Rng + ?Sized),
        validate: impl FnOnce() -> InterpResult<'tcx>,
-    ) -> InterpResult<'tcx, (Scalar, LoadRecency)> {
+    ) -> InterpResult<'tcx, (Option<Scalar>, LoadRecency)> {
        // Having a live borrow to store_buffer while calling validate_atomic_load is fine
        // because the race detector doesn't touch store_buffer

@ -419,15 +406,15 @@ impl<'tcx> StoreBuffer {
            // In the language provided in the paper, an atomic store takes the value from a
            // non-atomic memory location.
            // But we already have the immediate value here so we don't need to do the memory
-            // access
-            val,
+            // access.
+            val: Some(val),
            is_seqcst,
            load_info: RefCell::new(LoadInfo::default()),
        };
-        self.buffer.push_back(store_elem);
-        if self.buffer.len() > STORE_BUFFER_LIMIT {
+        if self.buffer.len() >= STORE_BUFFER_LIMIT {
            self.buffer.pop_front();
        }
+        self.buffer.push_back(store_elem);
        if is_seqcst {
            // Every store that happens before this needs to be marked as SC
            // so that in a later SC load, only the last SC store (i.e. this one) or stores that
@ -450,7 +437,12 @@ impl StoreElement {
    /// buffer regardless of subsequent loads by the same thread; if the earliest load of another
    /// thread doesn't happen before the current one, then no subsequent load by the other thread
    /// can happen before the current one.
-    fn load_impl(&self, index: VectorIdx, clocks: &ThreadClockSet, is_seqcst: bool) -> Scalar {
+    fn load_impl(
+        &self,
+        index: VectorIdx,
+        clocks: &ThreadClockSet,
+        is_seqcst: bool,
+    ) -> Option<Scalar> {
        let mut load_info = self.load_info.borrow_mut();
        load_info.sc_loaded |= is_seqcst;
        let _ = load_info.timestamps.try_insert(index, clocks.clock[index]);
@ -479,7 +471,7 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                global.sc_write(threads);
            }
            let range = alloc_range(base_offset, place.layout.size);
-            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, init)?;
+            let buffer = alloc_buffers.get_or_create_store_buffer_mut(range, Some(init))?;
            buffer.read_from_last_store(global, threads, atomic == AtomicRwOrd::SeqCst);
            buffer.buffered_write(new_val, global, threads, atomic == AtomicRwOrd::SeqCst)?;
        }
@ -492,47 +484,55 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
        atomic: AtomicReadOrd,
        latest_in_mo: Scalar,
        validate: impl FnOnce() -> InterpResult<'tcx>,
-    ) -> InterpResult<'tcx, Scalar> {
+    ) -> InterpResult<'tcx, Option<Scalar>> {
        let this = self.eval_context_ref();
-        if let Some(global) = &this.machine.data_race {
-            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
-            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                if atomic == AtomicReadOrd::SeqCst {
-                    global.sc_read(&this.machine.threads);
-                }
-                let mut rng = this.machine.rng.borrow_mut();
-                let buffer = alloc_buffers.get_or_create_store_buffer(
-                    alloc_range(base_offset, place.layout.size),
-                    latest_in_mo,
-                )?;
-                let (loaded, recency) = buffer.buffered_read(
-                    global,
-                    &this.machine.threads,
-                    atomic == AtomicReadOrd::SeqCst,
-                    &mut *rng,
-                    validate,
-                )?;
-                if global.track_outdated_loads && recency == LoadRecency::Outdated {
-                    this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad {
-                        ptr: place.ptr(),
-                    });
-                }
+        'fallback: {
+            if let Some(global) = &this.machine.data_race {
+                let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
+                if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
+                    if atomic == AtomicReadOrd::SeqCst {
+                        global.sc_read(&this.machine.threads);
+                    }
+                    let mut rng = this.machine.rng.borrow_mut();
+                    let Some(buffer) = alloc_buffers
+                        .get_store_buffer(alloc_range(base_offset, place.layout.size))?
+                    else {
+                        // No old writes available, fall back to base case.
+                        break 'fallback;
+                    };
+                    let (loaded, recency) = buffer.buffered_read(
+                        global,
+                        &this.machine.threads,
+                        atomic == AtomicReadOrd::SeqCst,
+                        &mut *rng,
+                        validate,
+                    )?;
+                    if global.track_outdated_loads && recency == LoadRecency::Outdated {
+                        this.emit_diagnostic(NonHaltingDiagnostic::WeakMemoryOutdatedLoad {
+                            ptr: place.ptr(),
+                        });
+                    }

-                return Ok(loaded);
+                    return Ok(loaded);
+                }
            }
        }

        // Race detector or weak memory disabled, simply read the latest value
        validate()?;
-        Ok(latest_in_mo)
+        Ok(Some(latest_in_mo))
    }

+    /// Add the given write to the store buffer. (Does not change machine memory.)
+    ///
+    /// `init` says with which value to initialize the store buffer in case there wasn't a store
+    /// buffer for this memory range before.
    fn buffered_atomic_write(
        &mut self,
        val: Scalar,
        dest: &MPlaceTy<'tcx>,
        atomic: AtomicWriteOrd,
-        init: Scalar,
+        init: Option<Scalar>,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_mut();
        let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(dest.ptr(), 0)?;
@ -545,23 +545,8 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
                global.sc_write(threads);
            }

-            // UGLY HACK: in write_scalar_atomic() we don't know the value before our write,
-            // so init == val always. If the buffer is fresh then we would've duplicated an entry,
-            // so we need to remove it.
-            // See https://github.com/rust-lang/miri/issues/2164
-            let was_empty = matches!(
-                alloc_buffers
-                    .store_buffers
-                    .borrow()
-                    .access_type(alloc_range(base_offset, dest.layout.size)),
-                AccessType::Empty(_)
-            );
            let buffer = alloc_buffers
                .get_or_create_store_buffer_mut(alloc_range(base_offset, dest.layout.size), init)?;
-            if was_empty {
-                buffer.buffer.pop_front();
-            }
-
            buffer.buffered_write(val, global, threads, atomic == AtomicWriteOrd::SeqCst)?;
        }

@ -576,7 +561,6 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
        &self,
        place: &MPlaceTy<'tcx>,
        atomic: AtomicReadOrd,
-        init: Scalar,
    ) -> InterpResult<'tcx> {
        let this = self.eval_context_ref();

@ -587,8 +571,12 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
            let size = place.layout.size;
            let (alloc_id, base_offset, ..) = this.ptr_get_alloc_id(place.ptr(), 0)?;
            if let Some(alloc_buffers) = this.get_alloc_extra(alloc_id)?.weak_memory.as_ref() {
-                let buffer = alloc_buffers
-                    .get_or_create_store_buffer(alloc_range(base_offset, size), init)?;
+                let Some(buffer) =
+                    alloc_buffers.get_store_buffer(alloc_range(base_offset, size))?
+                else {
+                    // No store buffer, nothing to do.
+                    return Ok(());
+                };
                buffer.read_from_last_store(
                    global,
                    &this.machine.threads,
--- a/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs
+++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.rs
@ -0,0 +1,43 @@
+//@compile-flags: -Zmiri-ignore-leaks -Zmiri-preemption-rate=0
+
+// Tests showing weak memory behaviours are exhibited. All tests
+// return true when the desired behaviour is seen.
+// This is scheduler and pseudo-RNG dependent, so each test is
+// run multiple times until one try returns true.
+// Spurious failure is possible, if you are really unlucky with
+// the RNG and always read the latest value from the store buffer.
+#![feature(new_uninit)]
+
+use std::sync::atomic::*;
+use std::thread::spawn;
+
+#[allow(dead_code)]
+#[derive(Copy, Clone)]
+struct EvilSend<T>(pub T);
+
+unsafe impl<T> Send for EvilSend<T> {}
+unsafe impl<T> Sync for EvilSend<T> {}
+
+// We can't create static items because we need to run each test multiple times.
+fn static_uninit_atomic() -> &'static AtomicUsize {
+    unsafe { Box::leak(Box::new_uninit()).assume_init_ref() }
+}
+
+fn relaxed() {
+    let x = static_uninit_atomic();
+    let j1 = spawn(move || {
+        x.store(1, Ordering::Relaxed);
+    });
+
+    let j2 = spawn(move || x.load(Ordering::Relaxed)); //~ERROR: using uninitialized data
+
+    j1.join().unwrap();
+    j2.join().unwrap();
+}
+
+pub fn main() {
+    // If we try often enough, we should hit UB.
+    for _ in 0..100 {
+        relaxed();
+    }
+}
--- a/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr
+++ b/src/tools/miri/tests/fail/weak_memory/weak_uninit.stderr
@ -0,0 +1,15 @@
+error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
+  --> $DIR/weak_uninit.rs:LL:CC
+   |
+LL |     let j2 = spawn(move || x.load(Ordering::Relaxed));
+   |                            ^^^^^^^^^^^^^^^^^^^^^^^^^ using uninitialized data, but this operation requires initialized memory
+   |
+   = help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
+   = help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
+   = note: BACKTRACE on thread `unnamed-ID`:
+   = note: inside closure at $DIR/weak_uninit.rs:LL:CC
+
+note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace
+
+error: aborting due to 1 previous error
+
--- a/src/tools/miri/tests/pass/weak_memory/weak.rs
+++ b/src/tools/miri/tests/pass/weak_memory/weak.rs
@ -18,11 +18,9 @@ struct EvilSend<T>(pub T);
 unsafe impl<T> Send for EvilSend<T> {}
 unsafe impl<T> Sync for EvilSend<T> {}

-// We can't create static items because we need to run each test
-// multiple times
+// We can't create static items because we need to run each test multiple times.
 fn static_atomic(val: usize) -> &'static AtomicUsize {
-    let ret = Box::leak(Box::new(AtomicUsize::new(val)));
-    ret
+    Box::leak(Box::new(AtomicUsize::new(val)))
 }

 // Spins until it reads the given value
@ -33,7 +31,7 @@ fn reads_value(loc: &AtomicUsize, val: usize) -> usize {
    val
 }

-fn relaxed() -> bool {
+fn relaxed(initial_read: bool) -> bool {
    let x = static_atomic(0);
    let j1 = spawn(move || {
        x.store(1, Relaxed);
@ -47,7 +45,9 @@ fn relaxed() -> bool {
    j1.join().unwrap();
    let r2 = j2.join().unwrap();

-    r2 == 1
+    // There are three possible values here: 0 (from the initial read), 1 (from the first relaxed
+    // read), and 2 (the last read). The last case is boring and we cover the other two.
+    r2 == if initial_read { 0 } else { 1 }
 }

 // https://www.doc.ic.ac.uk/~afd/homepages/papers/pdfs/2017/POPL.pdf Figure 8
@ -74,7 +74,6 @@ fn seq_cst() -> bool {

 fn initialization_write(add_fence: bool) -> bool {
    let x = static_atomic(11);
-    assert_eq!(x.load(Relaxed), 11); // work around https://github.com/rust-lang/miri/issues/2164

    let wait = static_atomic(0);

@ -112,11 +111,8 @@ fn faa_replaced_by_load() -> bool {
    }

    let x = static_atomic(0);
-    assert_eq!(x.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164
    let y = static_atomic(0);
-    assert_eq!(y.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164
    let z = static_atomic(0);
-    assert_eq!(z.load(Relaxed), 0); // work around https://github.com/rust-lang/miri/issues/2164

    // Since each thread is so short, we need to make sure that they truely run at the same time
    // Otherwise t1 will finish before t2 even starts
@ -146,7 +142,8 @@ fn assert_once(f: fn() -> bool) {
 }

 pub fn main() {
-    assert_once(relaxed);
+    assert_once(|| relaxed(false));
+    assert_once(|| relaxed(true));
    assert_once(seq_cst);
    assert_once(|| initialization_write(false));
    assert_once(|| initialization_write(true));