From bbdbafdf8a947b563b46f632a778632b906d9eb4 Mon Sep 17 00:00:00 2001
From: Jim Blandy <jimb@red-bean.com>
Date: Mon, 22 Apr 2024 10:59:21 -0700
Subject: [PATCH] [core] Add `lock::observing` module, for analyzing lock
 acquisition.

Add a new module `lock::observing`, enabled by the `observe-locks`
feature, that records all nested lock acquisitions in trace files.

Add a new utility to the workspace, `lock-analyzer`, that reads the
files written by the `observe-locks` feature and writes out a new
`define_lock_ranks!` macro invocation that covers all observed lock
usage, along with comments giving the held and acquired source
locations.
---
 Cargo.lock                       |   9 +
 Cargo.toml                       |   2 +
 lock-analyzer/Cargo.toml         |  18 ++
 lock-analyzer/src/main.rs        | 254 +++++++++++++++++
 wgpu-core/Cargo.toml             |   3 +
 wgpu-core/src/device/resource.rs |   1 +
 wgpu-core/src/lock/mod.rs        |  25 +-
 wgpu-core/src/lock/observing.rs  | 475 +++++++++++++++++++++++++++++++
 wgpu-core/src/lock/rank.rs       |  10 +
 wgpu-core/src/lock/ranked.rs     |  20 +-
 10 files changed, 796 insertions(+), 21 deletions(-)
 create mode 100644 lock-analyzer/Cargo.toml
 create mode 100644 lock-analyzer/src/main.rs
 create mode 100644 wgpu-core/src/lock/observing.rs

diff --git a/Cargo.lock b/Cargo.lock
index 0153bf693..90b535010 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1766,6 +1766,15 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4ce301924b7887e9d637144fdade93f9dfff9b60981d4ac161db09720d39aa5"
 
+[[package]]
+name = "lock-analyzer"
+version = "22.0.0"
+dependencies = [
+ "anyhow",
+ "ron",
+ "serde",
+]
+
 [[package]]
 name = "lock_api"
 version = "0.4.12"
diff --git a/Cargo.toml b/Cargo.toml
index 60f338f59..ff5557fae 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ members = [
     # default members
     "benches",
     "examples",
+    "lock-analyzer",
     "naga-cli",
     "naga",
     "naga/fuzz",
@@ -24,6 +25,7 @@ exclude = []
 default-members = [
     "benches",
     "examples",
+    "lock-analyzer",
     "naga-cli",
     "naga",
     "naga/fuzz",
diff --git a/lock-analyzer/Cargo.toml b/lock-analyzer/Cargo.toml
new file mode 100644
index 000000000..20eaee0f8
--- /dev/null
+++ b/lock-analyzer/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "lock-analyzer"
+edition.workspace = true
+rust-version.workspace = true
+keywords.workspace = true
+license.workspace = true
+homepage.workspace = true
+repository.workspace = true
+version.workspace = true
+authors.workspace = true
+
+[dependencies]
+ron.workspace = true
+anyhow.workspace = true
+
+[dependencies.serde]
+workspace = true
+features = ["serde_derive"]
diff --git a/lock-analyzer/src/main.rs b/lock-analyzer/src/main.rs
new file mode 100644
index 000000000..18f131ab8
--- /dev/null
+++ b/lock-analyzer/src/main.rs
@@ -0,0 +1,254 @@
+//! Analyzer for data produced by `wgpu-core`'s `observe_locks` feature.
+//!
+//! When `wgpu-core`'s `observe_locks` feature is enabled, if the
+//! `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is set to the
+//! path of an existing directory, then every thread that acquires a
+//! lock in `wgpu-core` will write its own log file to that directory.
+//! You can then run this program to read those files and summarize
+//! the results.
+//!
+//! This program also consults the `WGPU_CORE_LOCK_OBSERVE_DIR`
+//! environment variable to find the log files written by `wgpu-core`.
+//!
+//! See `wgpu_core/src/lock/observing.rs` for a general explanation of
+//! this analysis.
+
+use std::sync::Arc;
+use std::{
+    collections::{btree_map::Entry, BTreeMap, BTreeSet, HashMap},
+    fmt,
+    path::PathBuf,
+};
+
+use anyhow::{Context, Result};
+
+fn main() -> Result<()> {
+    let mut ranks: BTreeMap<u32, Rank> = BTreeMap::default();
+
+    let Ok(dir) = std::env::var("WGPU_CORE_LOCK_OBSERVE_DIR") else {
+        eprintln!(concat!(
+            "Please set the `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable\n",
+            "to the path of the directory containing the files written by\n",
+            "`wgpu-core`'s `observe_locks` feature."
+        ));
+        anyhow::bail!("`WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is not set");
+    };
+    let entries =
+        std::fs::read_dir(&dir).with_context(|| format!("failed to read directory {dir}"))?;
+    for entry in entries {
+        let entry = entry.with_context(|| format!("failed to read directory entry from {dir}"))?;
+        let name = PathBuf::from(&entry.file_name());
+        let Some(extension) = name.extension() else {
+            eprintln!("Ignoring {}", name.display());
+            continue;
+        };
+        if extension != "ron" {
+            eprintln!("Ignoring {}", name.display());
+            continue;
+        }
+
+        let contents = std::fs::read(entry.path())
+            .with_context(|| format!("failed to read lock observations from {}", name.display()))?;
+        // The addresses of `&'static Location<'static>` values could
+        // vary from run to run.
+        let mut locations: HashMap<u64, Arc<Location>> = HashMap::default();
+        for line in contents.split(|&b| b == b'\n') {
+            if line.is_empty() {
+                continue;
+            }
+            let action = ron::de::from_bytes::<Action>(line)
+                .with_context(|| format!("Error parsing action from {}", name.display()))?;
+            match action {
+                Action::Location {
+                    address,
+                    file,
+                    line,
+                    column,
+                } => {
+                    let file = match file.split_once("src/") {
+                        Some((_, after)) => after.to_string(),
+                        None => file,
+                    };
+                    assert!(locations
+                        .insert(address, Arc::new(Location { file, line, column }))
+                        .is_none());
+                }
+                Action::Rank {
+                    bit,
+                    member_name,
+                    const_name,
+                } => match ranks.entry(bit) {
+                    Entry::Occupied(occupied) => {
+                        let rank = occupied.get();
+                        assert_eq!(rank.member_name, member_name);
+                        assert_eq!(rank.const_name, const_name);
+                    }
+                    Entry::Vacant(vacant) => {
+                        vacant.insert(Rank {
+                            member_name,
+                            const_name,
+                            acquisitions: BTreeMap::default(),
+                        });
+                    }
+                },
+                Action::Acquisition {
+                    older_rank,
+                    older_location,
+                    newer_rank,
+                    newer_location,
+                } => {
+                    let older_location = locations[&older_location].clone();
+                    let newer_location = locations[&newer_location].clone();
+                    ranks
+                        .get_mut(&older_rank)
+                        .unwrap()
+                        .acquisitions
+                        .entry(newer_rank)
+                        .or_default()
+                        .entry(older_location)
+                        .or_default()
+                        .insert(newer_location);
+                }
+            }
+        }
+    }
+
+    for older_rank in ranks.values() {
+        if older_rank.is_leaf() {
+            // We'll print leaf locks separately, below.
+            continue;
+        }
+        println!(
+            "    rank {} {:?} followed by {{",
+            older_rank.const_name, older_rank.member_name
+        );
+        let mut acquired_any_leaf_locks = false;
+        let mut first_newer = true;
+        for (newer_rank, locations) in &older_rank.acquisitions {
+            // List acquisitions of leaf locks at the end.
+            if ranks[newer_rank].is_leaf() {
+                acquired_any_leaf_locks = true;
+                continue;
+            }
+            if !first_newer {
+                println!();
+            }
+            for (older_location, newer_locations) in locations {
+                if newer_locations.len() == 1 {
+                    for newer_loc in newer_locations {
+                        println!("        // holding {older_location} while locking {newer_loc}");
+                    }
+                } else {
+                    println!("        // holding {older_location} while locking:");
+                    for newer_loc in newer_locations {
+                        println!("        //     {newer_loc}");
+                    }
+                }
+            }
+            println!("        {},", ranks[newer_rank].const_name);
+            first_newer = false;
+        }
+
+        if acquired_any_leaf_locks {
+            // We checked that older_rank isn't a leaf lock, so we
+            // must have printed something above.
+            if !first_newer {
+                println!();
+            }
+            println!("        // leaf lock acquisitions:");
+            for newer_rank in older_rank.acquisitions.keys() {
+                if !ranks[newer_rank].is_leaf() {
+                    continue;
+                }
+                println!("        {},", ranks[newer_rank].const_name);
+            }
+        }
+        println!("    }};");
+        println!();
+    }
+
+    for older_rank in ranks.values() {
+        if !older_rank.is_leaf() {
+            continue;
+        }
+
+        println!(
+            "    rank {} {:?} followed by {{ }};",
+            older_rank.const_name, older_rank.member_name
+        );
+    }
+
+    Ok(())
+}
+
+#[derive(Debug, serde::Deserialize)]
+#[serde(deny_unknown_fields)]
+enum Action {
+    /// A location that we will refer to in later actions.
+    Location {
+        address: LocationAddress,
+        file: String,
+        line: u32,
+        column: u32,
+    },
+
+    /// A lock rank that we will refer to in later actions.
+    Rank {
+        bit: u32,
+        member_name: String,
+        const_name: String,
+    },
+
+    /// An attempt to acquire a lock while holding another lock.
+    Acquisition {
+        /// The number of the already acquired lock's rank.
+        older_rank: u32,
+
+        /// The source position at which we acquired it. Specifically,
+        /// its `Location`'s address, as an integer.
+        older_location: LocationAddress,
+
+        /// The number of the rank of the lock we are acquiring.
+        newer_rank: u32,
+
+        /// The source position at which we are acquiring it.
+        /// Specifically, its `Location`'s address, as an integer.
+        newer_location: LocationAddress,
+    },
+}
+
+/// The memory address at which the `Location` was stored in the
+/// observed process.
+///
+/// This is not `usize` because it does not represent an address in
+/// this `lock-analyzer` process. We might generate logs on a 64-bit
+/// machine and analyze them on a 32-bit machine. The `u64` type is a
+/// reasonable universal type for addresses on any machine.
+type LocationAddress = u64;
+
+struct Rank {
+    member_name: String,
+    const_name: String,
+    acquisitions: BTreeMap<u32, LocationSet>,
+}
+
+impl Rank {
+    fn is_leaf(&self) -> bool {
+        self.acquisitions.is_empty()
+    }
+}
+
+type LocationSet = BTreeMap<Arc<Location>, BTreeSet<Arc<Location>>>;
+
+#[derive(Eq, Ord, PartialEq, PartialOrd)]
+struct Location {
+    file: String,
+    line: u32,
+    column: u32,
+}
+
+impl fmt::Display for Location {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "{}:{}", self.file, self.line)
+    }
+}
diff --git a/wgpu-core/Cargo.toml b/wgpu-core/Cargo.toml
index 22d813c4c..1b9ce9848 100644
--- a/wgpu-core/Cargo.toml
+++ b/wgpu-core/Cargo.toml
@@ -57,6 +57,9 @@ serde = ["dep:serde", "wgt/serde", "arrayvec/serde"]
 ## Enable API tracing.
 trace = ["dep:ron", "serde", "naga/serialize"]
 
+## Enable lock order observation.
+observe_locks = ["dep:ron", "serde/serde_derive"]
+
 ## Enable API replaying
 replay = ["serde", "naga/deserialize"]
 
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index 5f50d38c8..45a5a58f4 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -341,6 +341,7 @@ impl Device {
         assert!(self.queue_to_drop.set(queue).is_ok());
     }
 
+    #[track_caller]
     pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
         self.life_tracker.lock()
     }
diff --git a/wgpu-core/src/lock/mod.rs b/wgpu-core/src/lock/mod.rs
index a6593a062..2927bf3aa 100644
--- a/wgpu-core/src/lock/mod.rs
+++ b/wgpu-core/src/lock/mod.rs
@@ -9,17 +9,22 @@
 //!   checks to ensure that each thread acquires locks only in a
 //!   specific order, to prevent deadlocks.
 //!
+//! - The [`observing`] module defines lock types that record
+//!   `wgpu-core`'s lock acquisition activity to disk, for later
+//!   analysis by the `lock-analyzer` binary.
+//!
 //! - The [`vanilla`] module defines lock types that are
 //!   uninstrumented, no-overhead wrappers around the standard lock
 //!   types.
 //!
-//! (We plan to add more wrappers in the future.)
-//!
 //! If the `wgpu_validate_locks` config is set (for example, with
 //! `RUSTFLAGS='--cfg wgpu_validate_locks'`), `wgpu-core` uses the
 //! [`ranked`] module's locks. We hope to make this the default for
 //! debug builds soon.
 //!
+//! If the `observe_locks` feature is enabled, `wgpu-core` uses the
+//! [`observing`] module's locks.
+//!
 //! Otherwise, `wgpu-core` uses the [`vanilla`] module's locks.
 //!
 //! [`Mutex`]: parking_lot::Mutex
@@ -31,11 +36,19 @@ pub mod rank;
 #[cfg_attr(not(wgpu_validate_locks), allow(dead_code))]
 mod ranked;
 
-#[cfg_attr(wgpu_validate_locks, allow(dead_code))]
+#[cfg(feature = "observe_locks")]
+mod observing;
+
+#[cfg_attr(any(wgpu_validate_locks, feature = "observe_locks"), allow(dead_code))]
 mod vanilla;
 
 #[cfg(wgpu_validate_locks)]
-pub use ranked::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};
+use ranked as chosen;
 
-#[cfg(not(wgpu_validate_locks))]
-pub use vanilla::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};
+#[cfg(feature = "observe_locks")]
+use observing as chosen;
+
+#[cfg(not(any(wgpu_validate_locks, feature = "observe_locks")))]
+use vanilla as chosen;
+
+pub use chosen::{Mutex, MutexGuard, RwLock, RwLockReadGuard, RwLockWriteGuard};
diff --git a/wgpu-core/src/lock/observing.rs b/wgpu-core/src/lock/observing.rs
new file mode 100644
index 000000000..a5e02c54b
--- /dev/null
+++ b/wgpu-core/src/lock/observing.rs
@@ -0,0 +1,475 @@
+//! Lock types that observe lock acquisition order.
+//!
+//! This module's [`Mutex`] type is instrumented to observe the
+//! nesting of `wgpu-core` lock acquisitions. Whenever `wgpu-core`
+//! acquires one lock while it is already holding another, we note
+//! that nesting pair. This tells us what the [`LockRank::followers`]
+//! set for each lock would need to include to accommodate
+//! `wgpu-core`'s observed behavior.
+//!
+//! When `wgpu-core`'s `observe_locks` feature is enabled, if the
+//! `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is set to the
+//! path of an existing directory, then every thread that acquires a
+//! lock in `wgpu-core` will write its own log file to that directory.
+//! You can then run the `wgpu` workspace's `lock-analyzer` binary to
+//! read those files and summarize the results. The output from
+//! `lock-analyzer` has the same form as the lock ranks given in
+//! [`lock/rank.rs`].
+//!
+//! If the `WGPU_CORE_LOCK_OBSERVE_DIR` environment variable is not
+//! set, then no instrumentation takes place, and the locks behave
+//! normally.
+//!
+//! To make sure we capture all acquisitions regardless of when the
+//! program exits, each thread writes events directly to its log file
+//! as they occur. A `write` system call is generally just a copy from
+//! userspace into the kernel's buffer, so hopefully this approach
+//! will still have tolerable performance.
+//!
+//! [`lock/rank.rs`]: ../../../src/wgpu_core/lock/rank.rs.html
+
+use crate::FastHashSet;
+
+use super::rank::{LockRank, LockRankSet};
+use std::{
+    cell::RefCell,
+    fs::File,
+    panic::Location,
+    path::{Path, PathBuf},
+};
+
+/// A `Mutex` instrumented for lock acquisition order observation.
+///
+/// This is just a wrapper around a [`parking_lot::Mutex`], along with
+/// its rank in the `wgpu_core` lock ordering.
+///
+/// For details, see [the module documentation][self].
+pub struct Mutex<T> {
+    inner: parking_lot::Mutex<T>,
+    rank: LockRank,
+}
+
+/// A guard produced by locking [`Mutex`].
+///
+/// This is just a wrapper around a [`parking_lot::MutexGuard`], along
+/// with the state needed to track lock acquisition.
+///
+/// For details, see [the module documentation][self].
+pub struct MutexGuard<'a, T> {
+    inner: parking_lot::MutexGuard<'a, T>,
+    _state: LockStateGuard,
+}
+
+impl<T> Mutex<T> {
+    pub fn new(rank: LockRank, value: T) -> Mutex<T> {
+        Mutex {
+            inner: parking_lot::Mutex::new(value),
+            rank,
+        }
+    }
+
+    #[track_caller]
+    pub fn lock(&self) -> MutexGuard<T> {
+        let saved = acquire(self.rank, Location::caller());
+        MutexGuard {
+            inner: self.inner.lock(),
+            _state: LockStateGuard { saved },
+        }
+    }
+}
+
+impl<'a, T> std::ops::Deref for MutexGuard<'a, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.deref()
+    }
+}
+
+impl<'a, T> std::ops::DerefMut for MutexGuard<'a, T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.inner.deref_mut()
+    }
+}
+
+impl<T: std::fmt::Debug> std::fmt::Debug for Mutex<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+/// An `RwLock` instrumented for lock acquisition order observation.
+///
+/// This is just a wrapper around a [`parking_lot::RwLock`], along with
+/// its rank in the `wgpu_core` lock ordering.
+///
+/// For details, see [the module documentation][self].
+pub struct RwLock<T> {
+    inner: parking_lot::RwLock<T>,
+    rank: LockRank,
+}
+
+/// A read guard produced by locking [`RwLock`] for reading.
+///
+/// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with
+/// the state needed to track lock acquisition.
+///
+/// For details, see [the module documentation][self].
+pub struct RwLockReadGuard<'a, T> {
+    inner: parking_lot::RwLockReadGuard<'a, T>,
+    _state: LockStateGuard,
+}
+
+/// A write guard produced by locking [`RwLock`] for writing.
+///
+/// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along
+/// with the state needed to track lock acquisition.
+///
+/// For details, see [the module documentation][self].
+pub struct RwLockWriteGuard<'a, T> {
+    inner: parking_lot::RwLockWriteGuard<'a, T>,
+    _state: LockStateGuard,
+}
+
+impl<T> RwLock<T> {
+    pub fn new(rank: LockRank, value: T) -> RwLock<T> {
+        RwLock {
+            inner: parking_lot::RwLock::new(value),
+            rank,
+        }
+    }
+
+    #[track_caller]
+    pub fn read(&self) -> RwLockReadGuard<T> {
+        let saved = acquire(self.rank, Location::caller());
+        RwLockReadGuard {
+            inner: self.inner.read(),
+            _state: LockStateGuard { saved },
+        }
+    }
+
+    #[track_caller]
+    pub fn write(&self) -> RwLockWriteGuard<T> {
+        let saved = acquire(self.rank, Location::caller());
+        RwLockWriteGuard {
+            inner: self.inner.write(),
+            _state: LockStateGuard { saved },
+        }
+    }
+}
+
+impl<'a, T> RwLockWriteGuard<'a, T> {
+    pub fn downgrade(this: Self) -> RwLockReadGuard<'a, T> {
+        RwLockReadGuard {
+            inner: parking_lot::RwLockWriteGuard::downgrade(this.inner),
+            _state: this._state,
+        }
+    }
+}
+
+impl<T: std::fmt::Debug> std::fmt::Debug for RwLock<T> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        self.inner.fmt(f)
+    }
+}
+
+impl<'a, T> std::ops::Deref for RwLockReadGuard<'a, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.deref()
+    }
+}
+
+impl<'a, T> std::ops::Deref for RwLockWriteGuard<'a, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.inner.deref()
+    }
+}
+
+impl<'a, T> std::ops::DerefMut for RwLockWriteGuard<'a, T> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        self.inner.deref_mut()
+    }
+}
+
+/// A container that restores a prior per-thread lock state when dropped.
+///
+/// This type serves two purposes:
+///
+/// - Operations like `RwLockWriteGuard::downgrade` would like to be able to
+///   destructure lock guards and reassemble their pieces into new guards, but
+///   if the guard type itself implements `Drop`, we can't destructure it
+///   without unsafe code or pointless `Option`s whose state is almost always
+///   statically known.
+///
+/// - We can just implement `Drop` for this type once, and then use it in lock
+///   guards, rather than implementing `Drop` separately for each guard type.
+struct LockStateGuard {
+    /// The youngest lock that was already held when we acquired this
+    /// one, if any.
+    saved: Option<HeldLock>,
+}
+
+impl Drop for LockStateGuard {
+    fn drop(&mut self) {
+        release(self.saved)
+    }
+}
+
+/// Check and record the acquisition of a lock with `new_rank`.
+///
+/// Log the acquisition of a lock with `new_rank`, and
+/// update the per-thread state accordingly.
+///
+/// Return the `Option<HeldLock>` state that must be restored when this lock is
+/// released.
+fn acquire(new_rank: LockRank, location: &'static Location<'static>) -> Option<HeldLock> {
+    LOCK_STATE.with_borrow_mut(|state| match *state {
+        ThreadState::Disabled => None,
+        ThreadState::Initial => {
+            let Ok(dir) = std::env::var("WGPU_CORE_LOCK_OBSERVE_DIR") else {
+                *state = ThreadState::Disabled;
+                return None;
+            };
+
+            // Create the observation log file.
+            let mut log = ObservationLog::create(dir)
+                .expect("Failed to open lock observation file (does the dir exist?)");
+
+            // Log the full set of lock ranks, so that the analysis can even see
+            // locks that are only acquired in isolation.
+            for rank in LockRankSet::all().iter() {
+                log.write_rank(rank);
+            }
+
+            // Update our state to reflect that we are logging acquisitions, and
+            // that we have acquired this lock.
+            *state = ThreadState::Enabled {
+                held_lock: Some(HeldLock {
+                    rank: new_rank,
+                    location,
+                }),
+                log,
+            };
+
+            // Since this is the first acquisition on this thread, we know that
+            // there is no prior lock held, and thus nothing to log yet.
+            None
+        }
+        ThreadState::Enabled {
+            ref mut held_lock,
+            ref mut log,
+        } => {
+            if let Some(ref held_lock) = held_lock {
+                log.write_acquisition(held_lock, new_rank, location);
+            }
+
+            std::mem::replace(
+                held_lock,
+                Some(HeldLock {
+                    rank: new_rank,
+                    location,
+                }),
+            )
+        }
+    })
+}
+
+/// Record the release of a lock whose saved state was `saved`.
+fn release(saved: Option<HeldLock>) {
+    LOCK_STATE.with_borrow_mut(|state| {
+        if let ThreadState::Enabled {
+            ref mut held_lock, ..
+        } = *state
+        {
+            *held_lock = saved;
+        }
+    });
+}
+
+thread_local! {
+    static LOCK_STATE: RefCell<ThreadState> = const { RefCell::new(ThreadState::Initial) };
+}
+
+/// Thread-local state for lock observation.
+enum ThreadState {
+    /// This thread hasn't yet checked the environment variable.
+    Initial,
+
+    /// This thread checked the environment variable, and it was
+    /// unset, so this thread is not observing lock acquisitions.
+    Disabled,
+
+    /// Lock observation is enabled for this thread.
+    Enabled {
+        held_lock: Option<HeldLock>,
+        log: ObservationLog,
+    },
+}
+
+/// Information about a currently held lock.
+#[derive(Debug, Copy, Clone)]
+struct HeldLock {
+    /// The lock's rank.
+    rank: LockRank,
+
+    /// Where we acquired the lock.
+    location: &'static Location<'static>,
+}
+
+/// A log to which we can write observations of lock activity.
+struct ObservationLog {
+    /// The file to which we are logging lock observations.
+    log_file: File,
+
+    /// [`Location`]s we've seen so far.
+    ///
+    /// This is a hashset of raw pointers because raw pointers have
+    /// the [`Eq`] and [`Hash`] relations we want: the pointer value, not
+    /// the contents. There's no unsafe code in this module.
+    locations_seen: FastHashSet<*const Location<'static>>,
+
+    /// Buffer for serializing events, retained for allocation reuse.
+    buffer: Vec<u8>,
+}
+
+#[allow(trivial_casts)]
+impl ObservationLog {
+    /// Create an observation log in `dir` for the current pid and thread.
+    fn create(dir: impl AsRef<Path>) -> Result<Self, std::io::Error> {
+        let mut path = PathBuf::from(dir.as_ref());
+        path.push(format!(
+            "locks-{}.{:?}.ron",
+            std::process::id(),
+            std::thread::current().id()
+        ));
+        let log_file = File::create(&path)?;
+        Ok(ObservationLog {
+            log_file,
+            locations_seen: FastHashSet::default(),
+            buffer: Vec::new(),
+        })
+    }
+
+    /// Record the acquisition of one lock while holding another.
+    ///
+    /// Log that we acquired a lock of `new_rank` at `new_location` while still
+    /// holding other locks, the most recently acquired of which has
+    /// `older_rank`.
+    fn write_acquisition(
+        &mut self,
+        older_lock: &HeldLock,
+        new_rank: LockRank,
+        new_location: &'static Location<'static>,
+    ) {
+        self.write_location(older_lock.location);
+        self.write_location(new_location);
+        self.write_action(&Action::Acquisition {
+            older_rank: older_lock.rank.bit.number(),
+            older_location: older_lock.location as *const _ as usize,
+            newer_rank: new_rank.bit.number(),
+            newer_location: new_location as *const _ as usize,
+        });
+    }
+
+    fn write_location(&mut self, location: &'static Location<'static>) {
+        if self.locations_seen.insert(location) {
+            self.write_action(&Action::Location {
+                address: location as *const _ as usize,
+                file: location.file(),
+                line: location.line(),
+                column: location.column(),
+            });
+        }
+    }
+
+    fn write_rank(&mut self, rank: LockRankSet) {
+        self.write_action(&Action::Rank {
+            bit: rank.number(),
+            member_name: rank.member_name(),
+            const_name: rank.const_name(),
+        });
+    }
+
+    fn write_action(&mut self, action: &Action) {
+        use std::io::Write;
+
+        self.buffer.clear();
+        ron::ser::to_writer(&mut self.buffer, &action)
+            .expect("error serializing `lock::observing::Action`");
+        self.buffer.push(b'\n');
+        self.log_file
+            .write_all(&self.buffer)
+            .expect("error writing `lock::observing::Action`");
+    }
+}
+
+/// An action logged by a thread that is observing lock acquisition order.
+///
+/// Each thread's log file is a sequence of these enums, serialized
+/// using the [`ron`] crate, one action per line.
+///
+/// Lock observation cannot assume that there will be any convenient
+/// finalization point before the program exits, so in practice,
+/// actions must be written immediately when they occur. This means we
+/// can't, say, accumulate tables and write them out when they're
+/// complete. The `lock-analyzer` binary is then responsible for
+/// consolidating the data into a single table of observed transitions.
+#[derive(serde::Serialize)]
+enum Action {
+    /// A location that we will refer to in later actions.
+    ///
+    /// We write one of these events the first time we see a
+    /// particular `Location`. Treating this as a separate action
+    /// simply lets us avoid repeating the content over and over
+    /// again in every [`Acquisition`] action.
+    ///
+    /// [`Acquisition`]: Action::Acquisition
+    Location {
+        address: usize,
+        file: &'static str,
+        line: u32,
+        column: u32,
+    },
+
+    /// A lock rank that we will refer to in later actions.
+    ///
+    /// We write out one these events for every lock rank at the
+    /// beginning of each thread's log file. Treating this as a
+    /// separate action simply lets us avoid repeating the names over
+    /// and over again in every [`Acquisition`] action.
+    ///
+    /// [`Acquisition`]: Action::Acquisition
+    Rank {
+        bit: u32,
+        member_name: &'static str,
+        const_name: &'static str,
+    },
+
+    /// An attempt to acquire a lock while holding another lock.
+    Acquisition {
+        /// The number of the already acquired lock's rank.
+        older_rank: u32,
+
+        /// The source position at which we acquired it. Specifically,
+        /// its `Location`'s address, as an integer.
+        older_location: usize,
+
+        /// The number of the rank of the lock we are acquiring.
+        newer_rank: u32,
+
+        /// The source position at which we are acquiring it.
+        /// Specifically, its `Location`'s address, as an integer.
+        newer_location: usize,
+    },
+}
+
+impl LockRankSet {
+    /// Return the number of this rank's first member.
+    fn number(self) -> u32 {
+        self.bits().trailing_zeros()
+    }
+}
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index cfb93116d..abb5cb002 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -71,6 +71,16 @@ macro_rules! define_lock_ranks {
                     _ => "<unrecognized LockRankSet bit>",
                 }
             }
+
+            #[cfg_attr(not(feature = "observe_locks"), allow(dead_code))]
+            pub fn const_name(self) -> &'static str {
+                match self {
+                    $(
+                        LockRankSet:: $name => stringify!($name),
+                    )*
+                    _ => "<unrecognized LockRankSet bit>",
+                }
+            }
         }
 
         $(
diff --git a/wgpu-core/src/lock/ranked.rs b/wgpu-core/src/lock/ranked.rs
index 89805cb18..c3aedb1b0 100644
--- a/wgpu-core/src/lock/ranked.rs
+++ b/wgpu-core/src/lock/ranked.rs
@@ -63,9 +63,7 @@ use std::{cell::Cell, panic::Location};
 /// This is just a wrapper around a [`parking_lot::Mutex`], along with
 /// its rank in the `wgpu_core` lock ordering.
 ///
-/// For details, see [the module documentation][mod].
-///
-/// [mod]: crate::lock::ranked
+/// For details, see [the module documentation][self].
 pub struct Mutex<T> {
     inner: parking_lot::Mutex<T>,
     rank: LockRank,
@@ -76,9 +74,7 @@ pub struct Mutex<T> {
 /// This is just a wrapper around a [`parking_lot::MutexGuard`], along
 /// with the state needed to track lock acquisition.
 ///
-/// For details, see [the module documentation][mod].
-///
-/// [mod]: crate::lock::ranked
+/// For details, see [the module documentation][self].
 pub struct MutexGuard<'a, T> {
     inner: parking_lot::MutexGuard<'a, T>,
     saved: LockStateGuard,
@@ -220,9 +216,7 @@ impl<T: std::fmt::Debug> std::fmt::Debug for Mutex<T> {
 /// This is just a wrapper around a [`parking_lot::RwLock`], along with
 /// its rank in the `wgpu_core` lock ordering.
 ///
-/// For details, see [the module documentation][mod].
-///
-/// [mod]: crate::lock::ranked
+/// For details, see [the module documentation][self].
 pub struct RwLock<T> {
     inner: parking_lot::RwLock<T>,
     rank: LockRank,
@@ -233,9 +227,7 @@ pub struct RwLock<T> {
 /// This is just a wrapper around a [`parking_lot::RwLockReadGuard`], along with
 /// the state needed to track lock acquisition.
 ///
-/// For details, see [the module documentation][mod].
-///
-/// [mod]: crate::lock::ranked
+/// For details, see [the module documentation][self].
 pub struct RwLockReadGuard<'a, T> {
     inner: parking_lot::RwLockReadGuard<'a, T>,
     saved: LockStateGuard,
@@ -246,9 +238,7 @@ pub struct RwLockReadGuard<'a, T> {
 /// This is just a wrapper around a [`parking_lot::RwLockWriteGuard`], along
 /// with the state needed to track lock acquisition.
 ///
-/// For details, see [the module documentation][mod].
-///
-/// [mod]: crate::lock::ranked
+/// For details, see [the module documentation][self].
 pub struct RwLockWriteGuard<'a, T> {
     inner: parking_lot::RwLockWriteGuard<'a, T>,
     saved: LockStateGuard,