diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0122c106..76742558a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -128,6 +128,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148]
 - Check for device mismatches when beginning render and compute passes. By @ErichDonGubler in [#6497](https://github.com/gfx-rs/wgpu/pull/6497). 
 - Lower `QUERY_SET_MAX_QUERIES` (and enforced limits) from 8192 to 4096 to match WebGPU spec. By @ErichDonGubler in [#6525](https://github.com/gfx-rs/wgpu/pull/6525).
 - Allow non-filterable float on texture bindings never used with samplers when using a derived bind group layout. By @ErichDonGubler in [#6531](https://github.com/gfx-rs/wgpu/pull/6531/).
+- Replace potentially unsound usage of `PreHashedMap` with `FastHashMap`. By @jamienicol in [#6541](https://github.com/gfx-rs/wgpu/pull/6541).
 
 #### Naga
 
diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs
index d13abb199..2e719055e 100644
--- a/naga/src/back/glsl/mod.rs
+++ b/naga/src/back/glsl/mod.rs
@@ -3497,13 +3497,13 @@ impl<'a, W: Write> Writer<'a, W> {
                     Mf::Transpose => "transpose",
                     Mf::Determinant => "determinant",
                     Mf::QuantizeToF16 => match *ctx.resolve_type(arg, &self.module.types) {
-                        crate::TypeInner::Scalar { .. } => {
+                        TypeInner::Scalar { .. } => {
                             write!(self.out, "unpackHalf2x16(packHalf2x16(vec2(")?;
                             self.write_expr(arg, ctx)?;
                             write!(self.out, "))).x")?;
                             return Ok(());
                         }
-                        crate::TypeInner::Vector {
+                        TypeInner::Vector {
                             size: crate::VectorSize::Bi,
                             ..
                         } => {
@@ -3512,7 +3512,7 @@ impl<'a, W: Write> Writer<'a, W> {
                             write!(self.out, "))")?;
                             return Ok(());
                         }
-                        crate::TypeInner::Vector {
+                        TypeInner::Vector {
                             size: crate::VectorSize::Tri,
                             ..
                         } => {
@@ -3523,7 +3523,7 @@ impl<'a, W: Write> Writer<'a, W> {
                             write!(self.out, ".zz)).x)")?;
                             return Ok(());
                         }
-                        crate::TypeInner::Vector {
+                        TypeInner::Vector {
                             size: crate::VectorSize::Quad,
                             ..
                         } => {
diff --git a/tests/tests/device.rs b/tests/tests/device.rs
index 2774bfd52..87935c831 100644
--- a/tests/tests/device.rs
+++ b/tests/tests/device.rs
@@ -630,7 +630,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default().expect_fail(FailureCase::webgl2()))
     .run_sync(|ctx| {
         // This test checks that when the device is dropped (such as in a GC),
-        // the provided DeviceLostClosure is called with reason DeviceLostReason::Unknown.
+        // the provided DeviceLostClosure is called with reason DeviceLostReason::Dropped.
         // Fails on webgl because webgl doesn't implement drop.
         static WAS_CALLED: std::sync::atomic::AtomicBool = AtomicBool::new(false);
 
@@ -642,8 +642,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
         });
         ctx.device.set_device_lost_callback(callback);
 
-        // Drop the device.
-        drop(ctx.device);
+        drop(ctx);
 
         assert!(
             WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
@@ -676,35 +675,6 @@ static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguratio
         );
     });
 
-#[gpu_test]
-static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
-    .parameters(TestParameters::default().skip(FailureCase::always()))
-    .run_sync(|ctx| {
-        // What we want to do is to drop the Global, forcing a code path that
-        // eventually calls Device.prepare_to_die, without having first dropped
-        // the device. This models what might happen in a user agent that kills
-        // wgpu without providing a more orderly shutdown. In such a case, the
-        // device lost callback should be invoked with the message "Device is
-        // dying."
-        static WAS_CALLED: AtomicBool = AtomicBool::new(false);
-
-        // Set a LoseDeviceCallback on the device.
-        let callback = Box::new(|reason, message| {
-            WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst);
-            assert_eq!(reason, wgt::DeviceLostReason::Dropped);
-            assert_eq!(message, "Device is dying.");
-        });
-        ctx.device.set_device_lost_callback(callback);
-
-        // TODO: Drop the Global, somehow.
-
-        // Confirm that the callback was invoked.
-        assert!(
-            WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
-            "Device lost callback should have been called."
-        );
-    });
-
 #[gpu_test]
 static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConfiguration::new()
     .parameters(TestParameters::default())
diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs
index ac3bbb67b..efc3ebbe2 100644
--- a/wgpu-core/src/command/ray_tracing.rs
+++ b/wgpu-core/src/command/ray_tracing.rs
@@ -1,5 +1,5 @@
 use crate::{
-    device::queue::TempResource,
+    device::{queue::TempResource, Device},
     global::Global,
     hub::Hub,
     id::CommandEncoderId,
@@ -20,10 +20,7 @@ use crate::{
 use wgt::{math::align_to, BufferUsages, Features};
 
 use super::CommandBufferMutable;
-use crate::device::queue::PendingWrites;
 use hal::BufferUses;
-use std::mem::ManuallyDrop;
-use std::ops::DerefMut;
 use std::{
     cmp::max,
     num::NonZeroU64,
@@ -184,7 +181,7 @@ impl Global {
             build_command_index,
             &mut buf_storage,
             hub,
-            device.pending_writes.lock().deref_mut(),
+            device,
         )?;
 
         let snatch_guard = device.snatchable_lock.read();
@@ -248,7 +245,9 @@ impl Global {
                 .get()
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
             cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
-            device.pending_writes.lock().insert_tlas(&tlas);
+            if let Some(queue) = device.get_queue() {
+                queue.pending_writes.lock().insert_tlas(&tlas);
+            }
 
             cmd_buf_data.tlas_actions.push(TlasAction {
                 tlas: tlas.clone(),
@@ -349,10 +348,12 @@ impl Global {
             }
         }
 
-        device
-            .pending_writes
-            .lock()
-            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
+        if let Some(queue) = device.get_queue() {
+            queue
+                .pending_writes
+                .lock()
+                .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
+        }
 
         Ok(())
     }
@@ -495,7 +496,7 @@ impl Global {
             build_command_index,
             &mut buf_storage,
             hub,
-            device.pending_writes.lock().deref_mut(),
+            device,
         )?;
 
         let snatch_guard = device.snatchable_lock.read();
@@ -516,7 +517,9 @@ impl Global {
                 .get(package.tlas_id)
                 .get()
                 .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
-            device.pending_writes.lock().insert_tlas(&tlas);
+            if let Some(queue) = device.get_queue() {
+                queue.pending_writes.lock().insert_tlas(&tlas);
+            }
             cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
 
             tlas_lock_store.push((Some(package), tlas.clone()))
@@ -689,7 +692,7 @@ impl Global {
                 if let Some(ref staging_buffer) = staging_buffer {
                     cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
                         buffer: staging_buffer.raw(),
-                        usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
+                        usage: BufferUses::MAP_WRITE..BufferUses::COPY_SRC,
                     }]);
                 }
             }
@@ -711,7 +714,7 @@ impl Global {
                 unsafe {
                     cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
                         buffer: tlas.instance_buffer.as_ref(),
-                        usage: hal::BufferUses::MAP_READ..hal::BufferUses::COPY_DST,
+                        usage: BufferUses::MAP_READ..BufferUses::COPY_DST,
                     }]);
                     let temp = hal::BufferCopy {
                         src_offset: range.start as u64,
@@ -742,17 +745,21 @@ impl Global {
             }
 
             if let Some(staging_buffer) = staging_buffer {
-                device
-                    .pending_writes
-                    .lock()
-                    .consume_temp(TempResource::StagingBuffer(staging_buffer));
+                if let Some(queue) = device.get_queue() {
+                    queue
+                        .pending_writes
+                        .lock()
+                        .consume_temp(TempResource::StagingBuffer(staging_buffer));
+                }
             }
         }
 
-        device
-            .pending_writes
-            .lock()
-            .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
+        if let Some(queue) = device.get_queue() {
+            queue
+                .pending_writes
+                .lock()
+                .consume_temp(TempResource::ScratchBuffer(scratch_buffer));
+        }
 
         Ok(())
     }
@@ -839,7 +846,7 @@ fn iter_blas<'a>(
     build_command_index: NonZeroU64,
     buf_storage: &mut Vec<TriangleBufferStore<'a>>,
     hub: &Hub,
-    pending_writes: &mut ManuallyDrop<PendingWrites>,
+    device: &Device,
 ) -> Result<(), BuildAccelerationStructureError> {
     let mut temp_buffer = Vec::new();
     for entry in blas_iter {
@@ -849,7 +856,9 @@ fn iter_blas<'a>(
             .get()
             .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?;
         cmd_buf_data.trackers.blas_s.set_single(blas.clone());
-        pending_writes.insert_blas(&blas);
+        if let Some(queue) = device.get_queue() {
+            queue.pending_writes.lock().insert_blas(&blas);
+        }
 
         cmd_buf_data.blas_actions.push(BlasAction {
             blas: blas.clone(),
@@ -951,7 +960,7 @@ fn iter_blas<'a>(
                         }
                         let data = cmd_buf_data.trackers.buffers.set_single(
                             &index_buffer,
-                            hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT,
+                            BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT,
                         );
                         Some((index_buffer.clone(), data))
                     } else {
diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs
index d87bd1bc8..cd0c14e12 100644
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@@ -219,9 +219,11 @@ impl Global {
         device.check_is_valid()?;
         buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
 
-        let last_submission = device
-            .lock_life()
-            .get_buffer_latest_submission_index(&buffer);
+        let last_submission = device.get_queue().and_then(|queue| {
+            queue
+                .lock_life()
+                .get_buffer_latest_submission_index(&buffer)
+        });
 
         if let Some(last_submission) = last_submission {
             device.wait_for_submit(last_submission)?;
@@ -2081,20 +2083,7 @@ impl Global {
         profiling::scope!("Device::drop");
         api_log!("Device::drop {device_id:?}");
 
-        let device = self.hub.devices.remove(device_id);
-        let device_lost_closure = device.lock_life().device_lost_closure.take();
-        if let Some(closure) = device_lost_closure {
-            closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
-        }
-
-        // The things `Device::prepare_to_die` takes care are mostly
-        // unnecessary here. We know our queue is empty, so we don't
-        // need to wait for submissions or triage them. We know we were
-        // just polled, so `life_tracker.free_resources` is empty.
-        debug_assert!(device.lock_life().queue_empty());
-        device.pending_writes.lock().deactivate();
-
-        drop(device);
+        self.hub.devices.remove(device_id);
     }
 
     // This closure will be called exactly once during "lose the device",
@@ -2106,14 +2095,14 @@ impl Global {
     ) {
         let device = self.hub.devices.get(device_id);
 
-        let mut life_tracker = device.lock_life();
-        if let Some(existing_closure) = life_tracker.device_lost_closure.take() {
-            // It's important to not hold the lock while calling the closure.
-            drop(life_tracker);
-            existing_closure.call(DeviceLostReason::ReplacedCallback, "".to_string());
-            life_tracker = device.lock_life();
+        let old_device_lost_closure = device
+            .device_lost_closure
+            .lock()
+            .replace(device_lost_closure);
+
+        if let Some(old_device_lost_closure) = old_device_lost_closure {
+            old_device_lost_closure.call(DeviceLostReason::ReplacedCallback, "".to_string());
         }
-        life_tracker.device_lost_closure = Some(device_lost_closure);
     }
 
     pub fn device_destroy(&self, device_id: DeviceId) {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 70e5337a7..c7d3a59d6 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -1,9 +1,9 @@
 use crate::{
     device::{
         queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
-        DeviceError, DeviceLostClosure,
+        DeviceError,
     },
-    resource::{self, Buffer, Texture, Trackable},
+    resource::{Buffer, Texture, Trackable},
     snatch::SnatchGuard,
     SubmissionIndex,
 };
@@ -196,11 +196,6 @@ pub(crate) struct LifetimeTracker {
     /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
     /// here until the next time the device is maintained.
     work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
-
-    /// Closure to be called on "lose the device". This is invoked directly by
-    /// device.lose or by the UserCallbacks returned from maintain when the device
-    /// has been destroyed and its queues are empty.
-    pub device_lost_closure: Option<DeviceLostClosure>,
 }
 
 impl LifetimeTracker {
@@ -209,7 +204,6 @@ impl LifetimeTracker {
             active: Vec::new(),
             ready_to_map: Vec::new(),
             work_done_closures: SmallVec::new(),
-            device_lost_closure: None,
         }
     }
 
@@ -394,7 +388,6 @@ impl LifetimeTracker {
     #[must_use]
     pub(crate) fn handle_mapping(
         &mut self,
-        raw: &dyn hal::DynDevice,
         snatch_guard: &SnatchGuard,
     ) -> Vec<super::BufferMapPendingClosure> {
         if self.ready_to_map.is_empty() {
@@ -404,61 +397,10 @@ impl LifetimeTracker {
             Vec::with_capacity(self.ready_to_map.len());
 
         for buffer in self.ready_to_map.drain(..) {
-            // This _cannot_ be inlined into the match. If it is, the lock will be held
-            // open through the whole match, resulting in a deadlock when we try to re-lock
-            // the buffer back to active.
-            let mapping = std::mem::replace(
-                &mut *buffer.map_state.lock(),
-                resource::BufferMapState::Idle,
-            );
-            let pending_mapping = match mapping {
-                resource::BufferMapState::Waiting(pending_mapping) => pending_mapping,
-                // Mapping cancelled
-                resource::BufferMapState::Idle => continue,
-                // Mapping queued at least twice by map -> unmap -> map
-                // and was already successfully mapped below
-                resource::BufferMapState::Active { .. } => {
-                    *buffer.map_state.lock() = mapping;
-                    continue;
-                }
-                _ => panic!("No pending mapping."),
-            };
-            let status = if pending_mapping.range.start != pending_mapping.range.end {
-                let host = pending_mapping.op.host;
-                let size = pending_mapping.range.end - pending_mapping.range.start;
-                match super::map_buffer(
-                    raw,
-                    &buffer,
-                    pending_mapping.range.start,
-                    size,
-                    host,
-                    snatch_guard,
-                ) {
-                    Ok(mapping) => {
-                        *buffer.map_state.lock() = resource::BufferMapState::Active {
-                            mapping,
-                            range: pending_mapping.range.clone(),
-                            host,
-                        };
-                        Ok(())
-                    }
-                    Err(e) => {
-                        log::error!("Mapping failed: {e}");
-                        Err(e)
-                    }
-                }
-            } else {
-                *buffer.map_state.lock() = resource::BufferMapState::Active {
-                    mapping: hal::BufferMapping {
-                        ptr: std::ptr::NonNull::dangling(),
-                        is_coherent: true,
-                    },
-                    range: pending_mapping.range,
-                    host: pending_mapping.op.host,
-                };
-                Ok(())
-            };
-            pending_callbacks.push((pending_mapping.op, status));
+            match buffer.map(snatch_guard) {
+                Some(cb) => pending_callbacks.push(cb),
+                None => continue,
+            }
         }
         pending_callbacks
     }
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index 81cb4654d..18e35206b 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -298,24 +298,25 @@ impl DeviceLostClosure {
     }
 }
 
-fn map_buffer(
-    raw: &dyn hal::DynDevice,
+pub(crate) fn map_buffer(
     buffer: &Buffer,
     offset: BufferAddress,
     size: BufferAddress,
     kind: HostMap,
     snatch_guard: &SnatchGuard,
 ) -> Result<hal::BufferMapping, BufferAccessError> {
+    let raw_device = buffer.device.raw();
     let raw_buffer = buffer.try_raw(snatch_guard)?;
     let mapping = unsafe {
-        raw.map_buffer(raw_buffer, offset..offset + size)
+        raw_device
+            .map_buffer(raw_buffer, offset..offset + size)
             .map_err(|e| buffer.device.handle_hal_error(e))?
     };
 
     if !mapping.is_coherent && kind == HostMap::Read {
         #[allow(clippy::single_range_in_vec_init)]
         unsafe {
-            raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
+            raw_device.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
         }
     }
 
@@ -370,7 +371,7 @@ fn map_buffer(
                 && kind == HostMap::Read
                 && buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
             {
-                unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
+                unsafe { raw_device.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
             }
         }
     }
diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs
index ade609f20..370880a93 100644
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@@ -14,13 +14,14 @@ use crate::{
     hal_label,
     id::{self, QueueId},
     init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
-    lock::RwLockWriteGuard,
+    lock::{rank, Mutex, MutexGuard, RwLockWriteGuard},
     resource::{
         Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
         DestroyedTexture, Fallible, FlushedStagingBuffer, InvalidResourceError, Labeled,
         ParentDevice, ResourceErrorIdent, StagingBuffer, Texture, TextureInner, Trackable,
     },
     resource_log,
+    snatch::SnatchGuard,
     track::{self, Tracker, TrackerIndex},
     FastHashMap, SubmissionIndex,
 };
@@ -37,24 +38,95 @@ use std::{
 };
 use thiserror::Error;
 
-use super::Device;
+use super::{life::LifetimeTracker, Device};
 
 pub struct Queue {
-    raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
+    raw: Box<dyn hal::DynQueue>,
     pub(crate) device: Arc<Device>,
+    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
+    life_tracker: Mutex<LifetimeTracker>,
 }
 
 impl Queue {
-    pub(crate) fn new(device: Arc<Device>, raw: Box<dyn hal::DynQueue>) -> Self {
-        Queue {
-            raw: ManuallyDrop::new(raw),
-            device,
+    pub(crate) fn new(
+        device: Arc<Device>,
+        raw: Box<dyn hal::DynQueue>,
+    ) -> Result<Self, DeviceError> {
+        let pending_encoder = device
+            .command_allocator
+            .acquire_encoder(device.raw(), raw.as_ref())
+            .map_err(DeviceError::from_hal);
+
+        let pending_encoder = match pending_encoder {
+            Ok(pending_encoder) => pending_encoder,
+            Err(e) => {
+                return Err(e);
+            }
+        };
+
+        let mut pending_writes = PendingWrites::new(pending_encoder);
+
+        let zero_buffer = device.zero_buffer.as_ref();
+        pending_writes.activate();
+        unsafe {
+            pending_writes
+                .command_encoder
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer,
+                    usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
+                }]);
+            pending_writes
+                .command_encoder
+                .clear_buffer(zero_buffer, 0..super::ZERO_BUFFER_SIZE);
+            pending_writes
+                .command_encoder
+                .transition_buffers(&[hal::BufferBarrier {
+                    buffer: zero_buffer,
+                    usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
+                }]);
         }
+
+        let pending_writes = Mutex::new(
+            rank::QUEUE_PENDING_WRITES,
+            ManuallyDrop::new(pending_writes),
+        );
+
+        Ok(Queue {
+            raw,
+            device,
+            pending_writes,
+            life_tracker: Mutex::new(rank::QUEUE_LIFE_TRACKER, LifetimeTracker::new()),
+        })
     }
 
     pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
         self.raw.as_ref()
     }
+
+    #[track_caller]
+    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
+        self.life_tracker.lock()
+    }
+
+    pub(crate) fn maintain(
+        &self,
+        submission_index: u64,
+        snatch_guard: &SnatchGuard,
+    ) -> (
+        SmallVec<[SubmittedWorkDoneClosure; 1]>,
+        Vec<super::BufferMapPendingClosure>,
+        bool,
+    ) {
+        let mut life_tracker = self.lock_life();
+        let submission_closures =
+            life_tracker.triage_submissions(submission_index, &self.device.command_allocator);
+
+        let mapping_closures = life_tracker.handle_mapping(snatch_guard);
+
+        let queue_empty = life_tracker.queue_empty();
+
+        (submission_closures, mapping_closures, queue_empty)
+    }
 }
 
 crate::impl_resource_type!(Queue);
@@ -70,9 +142,101 @@ crate::impl_storage_item!(Queue);
 impl Drop for Queue {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        // SAFETY: we never access `self.raw` beyond this point.
-        let queue = unsafe { ManuallyDrop::take(&mut self.raw) };
-        self.device.release_queue(queue);
+
+        let last_successful_submission_index = self
+            .device
+            .last_successful_submission_index
+            .load(Ordering::Acquire);
+
+        let fence = self.device.fence.read();
+
+        // Try waiting on the last submission using the following sequence of timeouts
+        let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
+
+        for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
+            let is_last_iter = i == timeouts_in_ms.len() - 1;
+
+            api_log!(
+                "Waiting on last submission. try: {}/{}. timeout: {}ms",
+                i + 1,
+                timeouts_in_ms.len(),
+                timeout_ms
+            );
+
+            let wait_res = unsafe {
+                self.device.raw().wait(
+                    fence.as_ref(),
+                    last_successful_submission_index,
+                    #[cfg(not(target_arch = "wasm32"))]
+                    timeout_ms,
+                    #[cfg(target_arch = "wasm32")]
+                    0, // WebKit and Chromium don't support a non-0 timeout
+                )
+            };
+            // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
+            match wait_res {
+                Ok(true) => break,
+                Ok(false) => {
+                    // It's fine that we timed out on WebGL; GL objects can be deleted early as they
+                    // will be kept around by the driver if GPU work hasn't finished.
+                    // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
+                    // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
+                    // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
+                    // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
+                    #[cfg(target_arch = "wasm32")]
+                    {
+                        break;
+                    }
+                    #[cfg(not(target_arch = "wasm32"))]
+                    {
+                        if is_last_iter {
+                            panic!(
+                                "We timed out while waiting on the last successful submission to complete!"
+                            );
+                        }
+                    }
+                }
+                Err(e) => match e {
+                    hal::DeviceError::OutOfMemory => {
+                        if is_last_iter {
+                            panic!(
+                                "We ran into an OOM error while waiting on the last successful submission to complete!"
+                            );
+                        }
+                    }
+                    hal::DeviceError::Lost => {
+                        self.device.handle_hal_error(e); // will lose the device
+                        break;
+                    }
+                    hal::DeviceError::ResourceCreationFailed => unreachable!(),
+                    hal::DeviceError::Unexpected => {
+                        panic!(
+                            "We ran into an unexpected error while waiting on the last successful submission to complete!"
+                        );
+                    }
+                },
+            }
+        }
+        drop(fence);
+
+        let snatch_guard = self.device.snatchable_lock.read();
+        let (submission_closures, mapping_closures, queue_empty) =
+            self.maintain(last_successful_submission_index, &snatch_guard);
+        drop(snatch_guard);
+
+        assert!(queue_empty);
+
+        let closures = crate::device::UserClosures {
+            mappings: mapping_closures,
+            submissions: submission_closures,
+            device_lost_invocations: SmallVec::new(),
+        };
+
+        // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
+        let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
+        pending_writes.dispose(self.device.raw());
+
+        closures.fire();
     }
 }
 
@@ -345,15 +509,6 @@ impl PendingWrites {
         }
         self.command_encoder.as_mut()
     }
-
-    pub fn deactivate(&mut self) {
-        if self.is_recording {
-            unsafe {
-                self.command_encoder.discard_encoding();
-            }
-            self.is_recording = false;
-        }
-    }
 }
 
 #[derive(Clone, Debug, Error)]
@@ -427,7 +582,7 @@ impl Queue {
         // freed, even if an error occurs. All paths from here must call
         // `device.pending_writes.consume`.
         let mut staging_buffer = StagingBuffer::new(&self.device, data_size)?;
-        let mut pending_writes = self.device.pending_writes.lock();
+        let mut pending_writes = self.pending_writes.lock();
 
         let staging_buffer = {
             profiling::scope!("copy");
@@ -469,7 +624,7 @@ impl Queue {
 
         let buffer = buffer.get()?;
 
-        let mut pending_writes = self.device.pending_writes.lock();
+        let mut pending_writes = self.pending_writes.lock();
 
         // At this point, we have taken ownership of the staging_buffer from the
         // user. Platform validation requires that the staging buffer always
@@ -645,7 +800,7 @@ impl Queue {
                 .map_err(TransferError::from)?;
         }
 
-        let mut pending_writes = self.device.pending_writes.lock();
+        let mut pending_writes = self.pending_writes.lock();
         let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
@@ -897,7 +1052,7 @@ impl Queue {
 
         let (selector, dst_base) = extract_texture_selector(&destination, &size, &dst)?;
 
-        let mut pending_writes = self.device.pending_writes.lock();
+        let mut pending_writes = self.pending_writes.lock();
         let encoder = pending_writes.activate();
 
         // If the copy does not fully cover the layers, we need to initialize to
@@ -1054,6 +1209,13 @@ impl Queue {
                             }
                         }
 
+                        if first_error.is_some() {
+                            if let Ok(cmd_buf_data) = cmd_buf_data {
+                                cmd_buf_data.destroy(&command_buffer.device);
+                            }
+                            continue;
+                        }
+
                         let mut baked = match cmd_buf_data {
                             Ok(cmd_buf_data) => {
                                 let res = validate_command_buffer(
@@ -1077,10 +1239,6 @@ impl Queue {
                             }
                         };
 
-                        if first_error.is_some() {
-                            continue;
-                        }
-
                         // execute resource transitions
                         if let Err(e) = unsafe {
                             baked.encoder.begin_encoding(hal_label(
@@ -1166,7 +1324,7 @@ impl Queue {
                 }
             }
 
-            let mut pending_writes = self.device.pending_writes.lock();
+            let mut pending_writes = self.pending_writes.lock();
 
             {
                 used_surface_textures.set_size(self.device.tracker_indices.textures.size());
@@ -1253,7 +1411,7 @@ impl Queue {
             profiling::scope!("cleanup");
 
             // this will register the new submission to the life time tracker
-            self.device.lock_life().track_submission(
+            self.lock_life().track_submission(
                 submit_index,
                 pending_writes.temp_resources.drain(..),
                 active_executions,
@@ -1302,7 +1460,7 @@ impl Queue {
     ) -> Option<SubmissionIndex> {
         api_log!("Queue::on_submitted_work_done");
         //TODO: flush pending writes
-        self.device.lock_life().add_work_done_closure(closure)
+        self.lock_life().add_work_done_closure(closure)
     }
 }
 
@@ -1459,7 +1617,7 @@ fn validate_command_buffer(
     command_buffer: &CommandBuffer,
     queue: &Queue,
     cmd_buf_data: &crate::command::CommandBufferMutable,
-    snatch_guard: &crate::snatch::SnatchGuard<'_>,
+    snatch_guard: &SnatchGuard,
     submit_surface_textures_owned: &mut FastHashMap<*const Texture, Arc<Texture>>,
     used_surface_textures: &mut track::TextureUsageScope,
 ) -> Result<(), QueueSubmitError> {
diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs
index e374ade0b..061040338 100644
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@@ -4,12 +4,9 @@ use crate::{
     binding_model::{self, BindGroup, BindGroupLayout, BindGroupLayoutEntryError},
     command, conv,
     device::{
-        bgl, create_validator,
-        life::{LifetimeTracker, WaitIdleError},
-        map_buffer,
-        queue::PendingWrites,
-        AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures,
-        RenderPassContext, CLEANUP_WAIT_MS,
+        bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData,
+        DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext,
+        CLEANUP_WAIT_MS,
     },
     hal_label,
     init_tracker::{
@@ -17,7 +14,7 @@ use crate::{
         TextureInitTrackerAction,
     },
     instance::Adapter,
-    lock::{rank, Mutex, MutexGuard, RwLock},
+    lock::{rank, Mutex, RwLock},
     pipeline,
     pool::ResourcePool,
     resource::{
@@ -32,7 +29,7 @@ use crate::{
     },
     validation::{self, validate_color_attachment_bytes_per_sample},
     weak_vec::WeakVec,
-    FastHashMap, LabelHelpers, PreHashedKey, PreHashedMap,
+    FastHashMap, LabelHelpers,
 };
 
 use arrayvec::ArrayVec;
@@ -53,35 +50,16 @@ use std::{
 };
 
 use super::{
-    queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR,
-    ZERO_BUFFER_SIZE,
+    queue::Queue, DeviceDescriptor, DeviceError, DeviceLostClosure, UserClosures,
+    ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE,
 };
 
 /// Structure describing a logical device. Some members are internally mutable,
 /// stored behind mutexes.
-///
-/// TODO: establish clear order of locking for these:
-/// `life_tracker`, `trackers`, `render_passes`, `pending_writes`, `trace`.
-///
-/// Currently, the rules are:
-/// 1. `life_tracker` is locked after `hub.devices`, enforced by the type system
-/// 1. `self.trackers` is locked last (unenforced)
-/// 1. `self.trace` is locked last (unenforced)
-///
-/// Right now avoid locking twice same resource or registry in a call execution
-/// and minimize the locking to the minimum scope possible
-/// Unless otherwise specified, no lock may be acquired while holding another lock.
-/// This means that you must inspect function calls made while a lock is held
-/// to see what locks the callee may try to acquire.
-///
-/// Important:
-/// When locking pending_writes please check that trackers is not locked
-/// trackers should be locked only when needed for the shortest time possible
 pub struct Device {
-    raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
+    raw: Box<dyn hal::DynDevice>,
     pub(crate) adapter: Arc<Adapter>,
     pub(crate) queue: OnceLock<Weak<Queue>>,
-    queue_to_drop: OnceLock<Box<dyn hal::DynQueue>>,
     pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
     /// The `label` from the descriptor used to create the resource.
     label: String,
@@ -126,14 +104,14 @@ pub struct Device {
     /// using ref-counted references for internal access.
     pub(crate) valid: AtomicBool,
 
-    /// All live resources allocated with this [`Device`].
-    ///
-    /// Has to be locked temporarily only (locked last)
-    /// and never before pending_writes
+    /// Closure to be called on "lose the device". This is invoked directly by
+    /// device.lose or by the UserCallbacks returned from maintain when the device
+    /// has been destroyed and its queues are empty.
+    pub(crate) device_lost_closure: Mutex<Option<DeviceLostClosure>>,
+
+    /// Stores the state of buffers and textures.
     pub(crate) trackers: Mutex<DeviceTracker>,
     pub(crate) tracker_indices: TrackerIndexAllocators,
-    // Life tracker should be locked right after the device and before anything else.
-    life_tracker: Mutex<LifetimeTracker>,
     /// Pool of bind group layouts, allowing deduplication.
     pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout>,
     pub(crate) alignments: hal::Alignments,
@@ -141,15 +119,14 @@ pub struct Device {
     pub(crate) features: wgt::Features,
     pub(crate) downlevel: wgt::DownlevelCapabilities,
     pub(crate) instance_flags: wgt::InstanceFlags,
-    pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
     pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
-    #[cfg(feature = "trace")]
-    pub(crate) trace: Mutex<Option<trace::Trace>>,
     pub(crate) usage_scopes: UsageScopePool,
     pub(crate) last_acceleration_structure_build_command_index: AtomicU64,
-
     #[cfg(feature = "indirect-validation")]
     pub(crate) indirect_validation: Option<crate::indirect_validation::IndirectValidation>,
+    // needs to be dropped last
+    #[cfg(feature = "trace")]
+    pub(crate) trace: Mutex<Option<trace::Trace>>,
 }
 
 pub(crate) enum DeferredDestroy {
@@ -171,26 +148,24 @@ impl std::fmt::Debug for Device {
 impl Drop for Device {
     fn drop(&mut self) {
         resource_log!("Drop {}", self.error_ident());
-        // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
-        let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
+
+        if let Some(closure) = self.device_lost_closure.lock().take() {
+            closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
+        }
+
         // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
         let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
-        // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
-        let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
         // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
         let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
-        pending_writes.dispose(raw.as_ref());
-        self.command_allocator.dispose(raw.as_ref());
+        self.command_allocator.dispose(self.raw.as_ref());
         #[cfg(feature = "indirect-validation")]
         self.indirect_validation
             .take()
             .unwrap()
-            .dispose(raw.as_ref());
+            .dispose(self.raw.as_ref());
         unsafe {
-            raw.destroy_buffer(zero_buffer);
-            raw.destroy_fence(fence);
-            let queue = self.queue_to_drop.take().unwrap();
-            raw.exit(queue);
+            self.raw.destroy_buffer(zero_buffer);
+            self.raw.destroy_fence(fence);
         }
     }
 }
@@ -222,7 +197,6 @@ impl Device {
 impl Device {
     pub(crate) fn new(
         raw_device: Box<dyn hal::DynDevice>,
-        raw_queue: &dyn hal::DynQueue,
         adapter: &Arc<Adapter>,
         desc: &DeviceDescriptor,
         trace_path: Option<&std::path::Path>,
@@ -235,10 +209,6 @@ impl Device {
         let fence = unsafe { raw_device.create_fence() }.map_err(DeviceError::from_hal)?;
 
         let command_allocator = command::CommandAllocator::new();
-        let pending_encoder = command_allocator
-            .acquire_encoder(raw_device.as_ref(), raw_queue)
-            .map_err(DeviceError::from_hal)?;
-        let mut pending_writes = PendingWrites::new(pending_encoder);
 
         // Create zeroed buffer used for texture clears.
         let zero_buffer = unsafe {
@@ -250,24 +220,6 @@ impl Device {
             })
         }
         .map_err(DeviceError::from_hal)?;
-        pending_writes.activate();
-        unsafe {
-            pending_writes
-                .command_encoder
-                .transition_buffers(&[hal::BufferBarrier {
-                    buffer: zero_buffer.as_ref(),
-                    usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
-                }]);
-            pending_writes
-                .command_encoder
-                .clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE);
-            pending_writes
-                .command_encoder
-                .transition_buffers(&[hal::BufferBarrier {
-                    buffer: zero_buffer.as_ref(),
-                    usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
-                }]);
-        }
 
         let alignments = adapter.raw.capabilities.alignments.clone();
         let downlevel = adapter.raw.capabilities.downlevel.clone();
@@ -292,10 +244,9 @@ impl Device {
         };
 
         Ok(Self {
-            raw: ManuallyDrop::new(raw_device),
+            raw: raw_device,
             adapter: adapter.clone(),
             queue: OnceLock::new(),
-            queue_to_drop: OnceLock::new(),
             zero_buffer: ManuallyDrop::new(zero_buffer),
             label: desc.label.to_string(),
             command_allocator,
@@ -304,9 +255,9 @@ impl Device {
             fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)),
             snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
             valid: AtomicBool::new(true),
+            device_lost_closure: Mutex::new(rank::DEVICE_LOST_CLOSURE, None),
             trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()),
             tracker_indices: TrackerIndexAllocators::new(),
-            life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, LifetimeTracker::new()),
             bgl_pool: ResourcePool::new(),
             #[cfg(feature = "trace")]
             trace: Mutex::new(
@@ -330,10 +281,6 @@ impl Device {
             features: desc.required_features,
             downlevel,
             instance_flags,
-            pending_writes: Mutex::new(
-                rank::DEVICE_PENDING_WRITES,
-                ManuallyDrop::new(pending_writes),
-            ),
             deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()),
             usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()),
             // By starting at one, we can put the result in a NonZeroU64.
@@ -372,15 +319,6 @@ impl Device {
         DeviceError::from_hal(error)
     }
 
-    pub(crate) fn release_queue(&self, queue: Box<dyn hal::DynQueue>) {
-        assert!(self.queue_to_drop.set(queue).is_ok());
-    }
-
-    #[track_caller]
-    pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
-        self.life_tracker.lock()
-    }
-
     /// Run some destroy operations that were deferred.
     ///
     /// Destroying the resources requires taking a write lock on the device's snatch lock,
@@ -493,13 +431,12 @@ impl Device {
             .map_err(|e| self.handle_hal_error(e))?;
         }
 
-        let mut life_tracker = self.lock_life();
-        let submission_closures =
-            life_tracker.triage_submissions(submission_index, &self.command_allocator);
-
-        let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard);
-
-        let queue_empty = life_tracker.queue_empty();
+        let (submission_closures, mapping_closures, queue_empty) =
+            if let Some(queue) = self.get_queue() {
+                queue.maintain(submission_index, &snatch_guard)
+            } else {
+                (SmallVec::new(), Vec::new(), true)
+            };
 
         // Detect if we have been destroyed and now need to lose the device.
         // If we are invalid (set at start of destroy) and our queue is empty,
@@ -515,9 +452,9 @@ impl Device {
 
             // If we have a DeviceLostClosure, build an invocation with the
             // reason DeviceLostReason::Destroyed and no message.
-            if life_tracker.device_lost_closure.is_some() {
+            if let Some(device_lost_closure) = self.device_lost_closure.lock().take() {
                 device_lost_invocations.push(DeviceLostInvocation {
-                    closure: life_tracker.device_lost_closure.take().unwrap(),
+                    closure: device_lost_closure,
                     reason: DeviceLostReason::Destroyed,
                     message: String::new(),
                 });
@@ -525,7 +462,6 @@ impl Device {
         }
 
         // Don't hold the locks while calling release_gpu_resources.
-        drop(life_tracker);
         drop(fence);
         drop(snatch_guard);
 
@@ -666,14 +602,7 @@ impl Device {
                 }
             } else {
                 let snatch_guard: SnatchGuard = self.snatchable_lock.read();
-                map_buffer(
-                    self.raw(),
-                    &buffer,
-                    0,
-                    map_size,
-                    HostMap::Write,
-                    &snatch_guard,
-                )?
+                map_buffer(&buffer, 0, map_size, HostMap::Write, &snatch_guard)?
             };
             *buffer.map_state.lock() = resource::BufferMapState::Active {
                 mapping,
@@ -2712,18 +2641,18 @@ impl Device {
             derived_group_layouts.pop();
         }
 
-        let mut unique_bind_group_layouts = PreHashedMap::default();
+        let mut unique_bind_group_layouts = FastHashMap::default();
 
         let bind_group_layouts = derived_group_layouts
             .into_iter()
             .map(|mut bgl_entry_map| {
                 bgl_entry_map.sort();
-                match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) {
+                match unique_bind_group_layouts.entry(bgl_entry_map) {
                     std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())),
                     std::collections::hash_map::Entry::Vacant(e) => {
                         match self.create_bind_group_layout(
                             &None,
-                            bgl_entry_map,
+                            e.key().clone(),
                             bgl::Origin::Derived,
                         ) {
                             Ok(bgl) => {
@@ -3609,13 +3538,15 @@ impl Device {
             unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) }
                 .map_err(|e| self.handle_hal_error(e))?;
             drop(fence);
-            let closures = self
-                .lock_life()
-                .triage_submissions(submission_index, &self.command_allocator);
-            assert!(
-                closures.is_empty(),
-                "wait_for_submit is not expected to work with closures"
-            );
+            if let Some(queue) = self.get_queue() {
+                let closures = queue
+                    .lock_life()
+                    .triage_submissions(submission_index, &self.command_allocator);
+                assert!(
+                    closures.is_empty(),
+                    "wait_for_submit is not expected to work with closures"
+                );
+            }
         }
         Ok(())
     }
@@ -3675,13 +3606,7 @@ impl Device {
         self.valid.store(false, Ordering::Release);
 
         // 1) Resolve the GPUDevice device.lost promise.
-        let mut life_lock = self.lock_life();
-        let closure = life_lock.device_lost_closure.take();
-        // It's important to not hold the lock while calling the closure and while calling
-        // release_gpu_resources which may take the lock again.
-        drop(life_lock);
-
-        if let Some(device_lost_closure) = closure {
+        if let Some(device_lost_closure) = self.device_lost_closure.lock().take() {
             device_lost_closure.call(DeviceLostReason::Unknown, message.to_string());
         }
 
@@ -3733,34 +3658,6 @@ impl Device {
     }
 }
 
-impl Device {
-    /// Wait for idle and remove resources that we can, before we die.
-    pub(crate) fn prepare_to_die(&self) {
-        self.pending_writes.lock().deactivate();
-        let current_index = self
-            .last_successful_submission_index
-            .load(Ordering::Acquire);
-        if let Err(error) = unsafe {
-            let fence = self.fence.read();
-            self.raw()
-                .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
-        } {
-            log::error!("failed to wait for the device: {error}");
-        }
-        let mut life_tracker = self.lock_life();
-        let _ = life_tracker.triage_submissions(current_index, &self.command_allocator);
-        if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() {
-            // It's important to not hold the lock while calling the closure.
-            drop(life_tracker);
-            device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string());
-        }
-        #[cfg(feature = "trace")]
-        {
-            *self.trace.lock() = None;
-        }
-    }
-}
-
 crate::impl_resource_type!(Device);
 crate::impl_labeled!(Device);
 crate::impl_storage_item!(Device);
diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs
index 25a864ec2..cd4508d14 100644
--- a/wgpu-core/src/global.rs
+++ b/wgpu-core/src/global.rs
@@ -89,10 +89,6 @@ impl Drop for Global {
     fn drop(&mut self) {
         profiling::scope!("Global::drop");
         resource_log!("Global::drop");
-
-        for (_, device) in self.hub.devices.read().iter() {
-            device.prepare_to_die();
-        }
     }
 }
 
diff --git a/wgpu-core/src/hash_utils.rs b/wgpu-core/src/hash_utils.rs
index f44aad2f1..056c84f53 100644
--- a/wgpu-core/src/hash_utils.rs
+++ b/wgpu-core/src/hash_utils.rs
@@ -12,75 +12,3 @@ pub type FastHashSet<K> =
 /// IndexMap using a fast, non-cryptographic hash algorithm.
 pub type FastIndexMap<K, V> =
     indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
-
-/// HashMap that uses pre-hashed keys and an identity hasher.
-///
-/// This is useful when you only need the key to lookup the value, and don't need to store the key,
-/// particularly when the key is large.
-pub type PreHashedMap<K, V> =
-    std::collections::HashMap<PreHashedKey<K>, V, std::hash::BuildHasherDefault<IdentityHasher>>;
-
-/// A pre-hashed key using FxHash which allows the hashing operation to be disconnected
-/// from the storage in the map.
-pub struct PreHashedKey<K>(u64, std::marker::PhantomData<fn() -> K>);
-
-impl<K> std::fmt::Debug for PreHashedKey<K> {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        f.debug_tuple("PreHashedKey").field(&self.0).finish()
-    }
-}
-
-impl<K> Copy for PreHashedKey<K> {}
-
-impl<K> Clone for PreHashedKey<K> {
-    fn clone(&self) -> Self {
-        *self
-    }
-}
-
-impl<K> PartialEq for PreHashedKey<K> {
-    fn eq(&self, other: &Self) -> bool {
-        self.0 == other.0
-    }
-}
-
-impl<K> Eq for PreHashedKey<K> {}
-
-impl<K> std::hash::Hash for PreHashedKey<K> {
-    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.0.hash(state);
-    }
-}
-
-impl<K: std::hash::Hash> PreHashedKey<K> {
-    pub fn from_key(key: &K) -> Self {
-        use std::hash::Hasher;
-
-        let mut hasher = rustc_hash::FxHasher::default();
-        key.hash(&mut hasher);
-        Self(hasher.finish(), std::marker::PhantomData)
-    }
-}
-
-/// A hasher which does nothing. Useful for when you want to use a map with pre-hashed keys.
-///
-/// When hashing with this hasher, you must provide exactly 8 bytes. Multiple calls to `write`
-/// will overwrite the previous value.
-#[derive(Default)]
-pub struct IdentityHasher {
-    hash: u64,
-}
-
-impl std::hash::Hasher for IdentityHasher {
-    fn write(&mut self, bytes: &[u8]) {
-        self.hash = u64::from_ne_bytes(
-            bytes
-                .try_into()
-                .expect("identity hasher must be given exactly 8 bytes"),
-        );
-    }
-
-    fn finish(&self) -> u64 {
-        self.hash
-    }
-}
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 2c517d1ab..53a498502 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -573,18 +573,14 @@ impl Adapter {
     ) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
         api_log!("Adapter::create_device");
 
-        let device = Device::new(
-            hal_device.device,
-            hal_device.queue.as_ref(),
-            self,
-            desc,
-            trace_path,
-            instance_flags,
-        )?;
-
+        let device = Device::new(hal_device.device, self, desc, trace_path, instance_flags)?;
         let device = Arc::new(device);
-        let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
+
+        let queue = Queue::new(device.clone(), hal_device.queue)?;
+        let queue = Arc::new(queue);
+
         device.set_queue(&queue);
+
         Ok((device, queue))
     }
 
diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs
index 842dadf26..51c6c5431 100644
--- a/wgpu-core/src/lock/rank.rs
+++ b/wgpu-core/src/lock/rank.rs
@@ -111,16 +111,16 @@ define_lock_ranks! {
         // COMMAND_BUFFER_DATA,
     }
     rank BUFFER_MAP_STATE "Buffer::map_state" followed by {
-        DEVICE_PENDING_WRITES,
+        QUEUE_PENDING_WRITES,
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
         DEVICE_TRACE,
     }
-    rank DEVICE_PENDING_WRITES "Device::pending_writes" followed by {
+    rank QUEUE_PENDING_WRITES "Queue::pending_writes" followed by {
         COMMAND_ALLOCATOR_FREE_ENCODERS,
         SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
-        DEVICE_LIFE_TRACKER,
+        QUEUE_LIFE_TRACKER,
     }
-    rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by {
+    rank QUEUE_LIFE_TRACKER "Queue::life_tracker" followed by {
         COMMAND_ALLOCATOR_FREE_ENCODERS,
         DEVICE_TRACE,
     }
@@ -135,6 +135,7 @@ define_lock_ranks! {
     #[allow(dead_code)]
     rank DEVICE_TRACE "Device::trace" followed by { }
     rank DEVICE_TRACKERS "Device::trackers" followed by { }
+    rank DEVICE_LOST_CLOSURE "Device::device_lost_closure" followed by { }
     rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
     rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
     rank REGISTRY_STORAGE "Registry::storage" followed by { }
diff --git a/wgpu-core/src/pool.rs b/wgpu-core/src/pool.rs
index 7d17f3a7a..d14b8162e 100644
--- a/wgpu-core/src/pool.rs
+++ b/wgpu-core/src/pool.rs
@@ -7,16 +7,13 @@ use std::{
 use once_cell::sync::OnceCell;
 
 use crate::lock::{rank, Mutex};
-use crate::{PreHashedKey, PreHashedMap};
+use crate::FastHashMap;
 
 type SlotInner<V> = Weak<V>;
 type ResourcePoolSlot<V> = Arc<OnceCell<SlotInner<V>>>;
 
 pub struct ResourcePool<K, V> {
-    // We use a pre-hashed map as we never actually need to read the keys.
-    //
-    // This additionally allows us to not need to hash more than once on get_or_init.
-    inner: Mutex<PreHashedMap<K, ResourcePoolSlot<V>>>,
+    inner: Mutex<FastHashMap<K, ResourcePoolSlot<V>>>,
 }
 
 impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
@@ -35,9 +32,6 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
     where
         F: FnOnce(K) -> Result<Arc<V>, E>,
     {
-        // Hash the key outside of the lock.
-        let hashed_key = PreHashedKey::from_key(&key);
-
         // We can't prove at compile time that these will only ever be consumed once,
         // so we need to do the check at runtime.
         let mut key = Some(key);
@@ -46,7 +40,7 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
         'race: loop {
             let mut map_guard = self.inner.lock();
 
-            let entry = match map_guard.entry(hashed_key) {
+            let entry = match map_guard.entry(key.clone().unwrap()) {
                 // An entry exists for this resource.
                 //
                 // We know that either:
@@ -86,9 +80,11 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
                 return Ok(strong);
             }
 
-            // The resource is in the process of being dropped, because upgrade failed. The entry still exists in the map, but it points to nothing.
+            // The resource is in the process of being dropped, because upgrade failed.
+            // The entry still exists in the map, but it points to nothing.
             //
-            // We're in a race with the drop implementation of the resource, so lets just go around again. When we go around again:
+            // We're in a race with the drop implementation of the resource,
+            //  so lets just go around again. When we go around again:
             // - If the entry exists, we might need to go around a few more times.
             // - If the entry doesn't exist, we'll create a new one.
             continue 'race;
@@ -101,13 +97,11 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
     ///
     /// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout
     pub fn remove(&self, key: &K) {
-        let hashed_key = PreHashedKey::from_key(key);
-
         let mut map_guard = self.inner.lock();
 
         // Weak::upgrade will be failing long before this code is called. All threads trying to access the resource will be spinning,
         // waiting for the entry to be removed. It is safe to remove the entry from the map.
-        map_guard.remove(&hashed_key);
+        map_guard.remove(key);
     }
 }
 
diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs
index 11ccb714f..f6102b552 100644
--- a/wgpu-core/src/ray_tracing.rs
+++ b/wgpu-core/src/ray_tracing.rs
@@ -13,7 +13,7 @@ use crate::{
     id::{BlasId, BufferId, TlasId},
     resource::CreateBufferError,
 };
-use std::sync::Arc;
+use std::{mem::size_of, sync::Arc};
 use std::{num::NonZeroU64, slice};
 
 use crate::resource::{Blas, ResourceErrorIdent, Tlas};
@@ -325,11 +325,8 @@ pub(crate) fn tlas_instance_into_bytes(
             };
             let temp: *const _ = &temp;
             unsafe {
-                slice::from_raw_parts::<u8>(
-                    temp.cast::<u8>(),
-                    std::mem::size_of::<RawTlasInstance>(),
-                )
-                .to_vec()
+                slice::from_raw_parts::<u8>(temp.cast::<u8>(), size_of::<RawTlasInstance>())
+                    .to_vec()
             }
         }
         _ => unimplemented!(),
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 8ea9dd07e..0b9cdf821 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -634,11 +634,76 @@ impl Buffer {
             .buffers
             .set_single(self, internal_use);
 
-        let submit_index = device.lock_life().map(self).unwrap_or(0); // '0' means no wait is necessary
+        let submit_index = if let Some(queue) = device.get_queue() {
+            queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary
+        } else {
+            // We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`.
+            let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap();
+            if let Some(callback) = operation.callback.take() {
+                callback.call(status);
+            }
+            0
+        };
 
         Ok(submit_index)
     }
 
+    /// This function returns [`None`] only if [`Self::map_state`] is not [`BufferMapState::Waiting`].
+    #[must_use]
+    pub(crate) fn map(&self, snatch_guard: &SnatchGuard) -> Option<BufferMapPendingClosure> {
+        // This _cannot_ be inlined into the match. If it is, the lock will be held
+        // open through the whole match, resulting in a deadlock when we try to re-lock
+        // the buffer back to active.
+        let mapping = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle);
+        let pending_mapping = match mapping {
+            BufferMapState::Waiting(pending_mapping) => pending_mapping,
+            // Mapping cancelled
+            BufferMapState::Idle => return None,
+            // Mapping queued at least twice by map -> unmap -> map
+            // and was already successfully mapped below
+            BufferMapState::Active { .. } => {
+                *self.map_state.lock() = mapping;
+                return None;
+            }
+            _ => panic!("No pending mapping."),
+        };
+        let status = if pending_mapping.range.start != pending_mapping.range.end {
+            let host = pending_mapping.op.host;
+            let size = pending_mapping.range.end - pending_mapping.range.start;
+            match crate::device::map_buffer(
+                self,
+                pending_mapping.range.start,
+                size,
+                host,
+                snatch_guard,
+            ) {
+                Ok(mapping) => {
+                    *self.map_state.lock() = BufferMapState::Active {
+                        mapping,
+                        range: pending_mapping.range.clone(),
+                        host,
+                    };
+                    Ok(())
+                }
+                Err(e) => {
+                    log::error!("Mapping failed: {e}");
+                    Err(e)
+                }
+            }
+        } else {
+            *self.map_state.lock() = BufferMapState::Active {
+                mapping: hal::BufferMapping {
+                    ptr: NonNull::dangling(),
+                    is_coherent: true,
+                },
+                range: pending_mapping.range,
+                host: pending_mapping.op.host,
+            };
+            Ok(())
+        };
+        Some((pending_mapping.op, status))
+    }
+
     // Note: This must not be called while holding a lock.
     pub(crate) fn unmap(
         self: &Arc<Self>,
@@ -676,36 +741,37 @@ impl Buffer {
                     });
                 }
 
-                let mut pending_writes = device.pending_writes.lock();
-
                 let staging_buffer = staging_buffer.flush();
 
-                let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
-                    src_offset: 0,
-                    dst_offset: 0,
-                    size,
-                });
-                let transition_src = hal::BufferBarrier {
-                    buffer: staging_buffer.raw(),
-                    usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
-                };
-                let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
-                    buffer: raw_buf,
-                    usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
-                };
-                let encoder = pending_writes.activate();
-                unsafe {
-                    encoder.transition_buffers(&[transition_src, transition_dst]);
-                    if self.size > 0 {
-                        encoder.copy_buffer_to_buffer(
-                            staging_buffer.raw(),
-                            raw_buf,
-                            region.as_slice(),
-                        );
+                if let Some(queue) = device.get_queue() {
+                    let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
+                        src_offset: 0,
+                        dst_offset: 0,
+                        size,
+                    });
+                    let transition_src = hal::BufferBarrier {
+                        buffer: staging_buffer.raw(),
+                        usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
+                    };
+                    let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
+                        buffer: raw_buf,
+                        usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
+                    };
+                    let mut pending_writes = queue.pending_writes.lock();
+                    let encoder = pending_writes.activate();
+                    unsafe {
+                        encoder.transition_buffers(&[transition_src, transition_dst]);
+                        if self.size > 0 {
+                            encoder.copy_buffer_to_buffer(
+                                staging_buffer.raw(),
+                                raw_buf,
+                                region.as_slice(),
+                            );
+                        }
                     }
+                    pending_writes.consume(staging_buffer);
+                    pending_writes.insert_buffer(self);
                 }
-                pending_writes.consume(staging_buffer);
-                pending_writes.insert_buffer(self);
             }
             BufferMapState::Idle => {
                 return Err(BufferAccessError::NotMapped);
@@ -778,14 +844,16 @@ impl Buffer {
             })
         };
 
-        let mut pending_writes = device.pending_writes.lock();
-        if pending_writes.contains_buffer(self) {
-            pending_writes.consume_temp(temp);
-        } else {
-            let mut life_lock = device.lock_life();
-            let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
-            if let Some(last_submit_index) = last_submit_index {
-                life_lock.schedule_resource_destruction(temp, last_submit_index);
+        if let Some(queue) = device.get_queue() {
+            let mut pending_writes = queue.pending_writes.lock();
+            if pending_writes.contains_buffer(self) {
+                pending_writes.consume_temp(temp);
+            } else {
+                let mut life_lock = queue.lock_life();
+                let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
+                if let Some(last_submit_index) = last_submit_index {
+                    life_lock.schedule_resource_destruction(temp, last_submit_index);
+                }
             }
         }
 
@@ -1244,14 +1312,16 @@ impl Texture {
             })
         };
 
-        let mut pending_writes = device.pending_writes.lock();
-        if pending_writes.contains_texture(self) {
-            pending_writes.consume_temp(temp);
-        } else {
-            let mut life_lock = device.lock_life();
-            let last_submit_index = life_lock.get_texture_latest_submission_index(self);
-            if let Some(last_submit_index) = last_submit_index {
-                life_lock.schedule_resource_destruction(temp, last_submit_index);
+        if let Some(queue) = device.get_queue() {
+            let mut pending_writes = queue.pending_writes.lock();
+            if pending_writes.contains_texture(self) {
+                pending_writes.consume_temp(temp);
+            } else {
+                let mut life_lock = queue.lock_life();
+                let last_submit_index = life_lock.get_texture_latest_submission_index(self);
+                if let Some(last_submit_index) = last_submit_index {
+                    life_lock.schedule_resource_destruction(temp, last_submit_index);
+                }
             }
         }
 
@@ -1960,14 +2030,16 @@ impl Blas {
             })
         };
 
-        let mut pending_writes = device.pending_writes.lock();
-        if pending_writes.contains_blas(self) {
-            pending_writes.consume_temp(temp);
-        } else {
-            let mut life_lock = device.lock_life();
-            let last_submit_index = life_lock.get_blas_latest_submission_index(self);
-            if let Some(last_submit_index) = last_submit_index {
-                life_lock.schedule_resource_destruction(temp, last_submit_index);
+        if let Some(queue) = device.get_queue() {
+            let mut pending_writes = queue.pending_writes.lock();
+            if pending_writes.contains_blas(self) {
+                pending_writes.consume_temp(temp);
+            } else {
+                let mut life_lock = queue.lock_life();
+                let last_submit_index = life_lock.get_blas_latest_submission_index(self);
+                if let Some(last_submit_index) = last_submit_index {
+                    life_lock.schedule_resource_destruction(temp, last_submit_index);
+                }
             }
         }
 
@@ -2047,14 +2119,16 @@ impl Tlas {
             })
         };
 
-        let mut pending_writes = device.pending_writes.lock();
-        if pending_writes.contains_tlas(self) {
-            pending_writes.consume_temp(temp);
-        } else {
-            let mut life_lock = device.lock_life();
-            let last_submit_index = life_lock.get_tlas_latest_submission_index(self);
-            if let Some(last_submit_index) = last_submit_index {
-                life_lock.schedule_resource_destruction(temp, last_submit_index);
+        if let Some(queue) = device.get_queue() {
+            let mut pending_writes = queue.pending_writes.lock();
+            if pending_writes.contains_tlas(self) {
+                pending_writes.consume_temp(temp);
+            } else {
+                let mut life_lock = queue.lock_life();
+                let last_submit_index = life_lock.get_tlas_latest_submission_index(self);
+                if let Some(last_submit_index) = last_submit_index {
+                    life_lock.schedule_resource_destruction(temp, last_submit_index);
+                }
             }
         }
 
diff --git a/wgpu-core/src/scratch.rs b/wgpu-core/src/scratch.rs
index f5930a5e0..dcd2d28fb 100644
--- a/wgpu-core/src/scratch.rs
+++ b/wgpu-core/src/scratch.rs
@@ -21,7 +21,7 @@ impl ScratchBuffer {
                     usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
                     memory_flags: hal::MemoryFlags::empty(),
                 })
-                .map_err(crate::device::DeviceError::from_hal)?
+                .map_err(DeviceError::from_hal)?
         };
         Ok(Self {
             raw: ManuallyDrop::new(raw),
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index 010c74aa8..16b3f9181 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -581,7 +581,8 @@ impl<A: hal::Api> Example<A> {
             self.device.destroy_pipeline_layout(self.pipeline_layout);
 
             self.surface.unconfigure(&self.device);
-            self.device.exit(self.queue);
+            drop(self.queue);
+            drop(self.device);
             drop(self.surface);
             drop(self.adapter);
         }
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index de4e5a9b4..a4d51c712 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -1069,7 +1069,8 @@ impl<A: hal::Api> Example<A> {
             self.device.destroy_shader_module(self.shader_module);
 
             self.surface.unconfigure(&self.device);
-            self.device.exit(self.queue);
+            drop(self.queue);
+            drop(self.device);
             drop(self.surface);
             drop(self.adapter);
         }
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index 12edf6179..b2b9629ed 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -391,10 +391,6 @@ impl super::Device {
 impl crate::Device for super::Device {
     type A = super::Api;
 
-    unsafe fn exit(self, _queue: super::Queue) {
-        self.rtv_pool.lock().free_handle(self.null_rtv_handle);
-    }
-
     unsafe fn create_buffer(
         &self,
         desc: &crate::BufferDescriptor,
diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs
index bc9f0db15..fdda89b16 100644
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@@ -602,6 +602,12 @@ pub struct Device {
     counters: wgt::HalCounters,
 }
 
+impl Drop for Device {
+    fn drop(&mut self) {
+        self.rtv_pool.lock().free_handle(self.null_rtv_handle);
+    }
+}
+
 unsafe impl Send for Device {}
 unsafe impl Sync for Device {}
 
diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs
index f044a001d..f38d9fd32 100644
--- a/wgpu-hal/src/dynamic/device.rs
+++ b/wgpu-hal/src/dynamic/device.rs
@@ -16,8 +16,6 @@ use super::{
 };
 
 pub trait DynDevice: DynResource {
-    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>);
-
     unsafe fn create_buffer(
         &self,
         desc: &BufferDescriptor,
@@ -166,10 +164,6 @@ pub trait DynDevice: DynResource {
 }
 
 impl<D: Device + DynResource> DynDevice for D {
-    unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>) {
-        unsafe { D::exit(*self, queue.unbox()) }
-    }
-
     unsafe fn create_buffer(
         &self,
         desc: &BufferDescriptor,
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 72d9784d6..e74c91c07 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -163,7 +163,6 @@ impl crate::Queue for Context {
 impl crate::Device for Context {
     type A = Api;
 
-    unsafe fn exit(self, queue: Context) {}
     unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<Resource> {
         Ok(Resource)
     }
diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs
index 0421adc04..365ac7d72 100644
--- a/wgpu-hal/src/gles/device.rs
+++ b/wgpu-hal/src/gles/device.rs
@@ -501,14 +501,6 @@ impl super::Device {
 impl crate::Device for super::Device {
     type A = super::Api;
 
-    unsafe fn exit(self, queue: super::Queue) {
-        let gl = &self.shared.context.lock();
-        unsafe { gl.delete_vertex_array(self.main_vao) };
-        unsafe { gl.delete_framebuffer(queue.draw_fbo) };
-        unsafe { gl.delete_framebuffer(queue.copy_fbo) };
-        unsafe { gl.delete_buffer(queue.zero_buffer) };
-    }
-
     unsafe fn create_buffer(
         &self,
         desc: &crate::BufferDescriptor,
@@ -1563,6 +1555,10 @@ impl crate::Device for super::Device {
     ) -> Result<bool, crate::DeviceError> {
         if fence.last_completed.load(Ordering::Relaxed) < wait_value {
             let gl = &self.shared.context.lock();
+            // MAX_CLIENT_WAIT_TIMEOUT_WEBGL is:
+            // - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386
+            // - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110
+            // - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551
             let timeout_ns = if cfg!(any(webgl, Emscripten)) {
                 0
             } else {
diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs
index 55f853715..b6e690a5f 100644
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@@ -295,6 +295,13 @@ pub struct Device {
     counters: wgt::HalCounters,
 }
 
+impl Drop for Device {
+    fn drop(&mut self) {
+        let gl = &self.shared.context.lock();
+        unsafe { gl.delete_vertex_array(self.main_vao) };
+    }
+}
+
 pub struct ShaderClearProgram {
     pub program: glow::Program,
     pub color_uniform_location: glow::UniformLocation,
@@ -316,6 +323,15 @@ pub struct Queue {
     current_index_buffer: Mutex<Option<glow::Buffer>>,
 }
 
+impl Drop for Queue {
+    fn drop(&mut self) {
+        let gl = &self.shared.context.lock();
+        unsafe { gl.delete_framebuffer(self.draw_fbo) };
+        unsafe { gl.delete_framebuffer(self.copy_fbo) };
+        unsafe { gl.delete_buffer(self.zero_buffer) };
+    }
+}
+
 #[derive(Clone, Debug)]
 pub struct Buffer {
     raw: Option<glow::Buffer>,
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index 1daba7019..86e28aeb1 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -676,7 +676,7 @@ pub trait Adapter: WasmNotSendSync {
 /// 1)  Free resources with methods like [`Device::destroy_texture`] or
 ///     [`Device::destroy_shader_module`].
 ///
-/// 1)  Shut down the device by calling [`Device::exit`].
+/// 1)  Drop the device.
 ///
 /// [`vkDevice`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VkDevice
 /// [`ID3D12Device`]: https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device
@@ -706,8 +706,6 @@ pub trait Adapter: WasmNotSendSync {
 pub trait Device: WasmNotSendSync {
     type A: Api;
 
-    /// Exit connection to this logical device.
-    unsafe fn exit(self, queue: <Self::A as Api>::Queue);
     /// Creates a new buffer.
     ///
     /// The initial usage is `BufferUses::empty()`.
diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs
index fc6704445..6f2ae7818 100644
--- a/wgpu-hal/src/metal/device.rs
+++ b/wgpu-hal/src/metal/device.rs
@@ -321,8 +321,6 @@ impl super::Device {
 impl crate::Device for super::Device {
     type A = super::Api;
 
-    unsafe fn exit(self, _queue: super::Queue) {}
-
     unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> {
         let map_read = desc.usage.contains(crate::BufferUses::MAP_READ);
         let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);
diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs
index 39afe88ef..fe72ec40f 100644
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@@ -289,18 +289,6 @@ impl super::DeviceShared {
                 .size((range.end - range.start + mask) & !mask)
         }))
     }
-
-    unsafe fn free_resources(&self) {
-        for &raw in self.render_passes.lock().values() {
-            unsafe { self.raw.destroy_render_pass(raw, None) };
-        }
-        for &raw in self.framebuffers.lock().values() {
-            unsafe { self.raw.destroy_framebuffer(raw, None) };
-        }
-        if self.drop_guard.is_none() {
-            unsafe { self.raw.destroy_device(None) };
-        }
-    }
 }
 
 impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
@@ -1023,18 +1011,6 @@ impl super::Device {
 impl crate::Device for super::Device {
     type A = super::Api;
 
-    unsafe fn exit(self, queue: super::Queue) {
-        unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) };
-        unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) };
-        unsafe {
-            queue
-                .relay_semaphores
-                .into_inner()
-                .destroy(&self.shared.raw)
-        };
-        unsafe { self.shared.free_resources() };
-    }
-
     unsafe fn create_buffer(
         &self,
         desc: &crate::BufferDescriptor,
diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs
index f81dc0893..ff3c865fc 100644
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@@ -646,6 +646,20 @@ struct DeviceShared {
     memory_allocations_counter: InternalCounter,
 }
 
+impl Drop for DeviceShared {
+    fn drop(&mut self) {
+        for &raw in self.render_passes.lock().values() {
+            unsafe { self.raw.destroy_render_pass(raw, None) };
+        }
+        for &raw in self.framebuffers.lock().values() {
+            unsafe { self.raw.destroy_framebuffer(raw, None) };
+        }
+        if self.drop_guard.is_none() {
+            unsafe { self.raw.destroy_device(None) };
+        }
+    }
+}
+
 pub struct Device {
     shared: Arc<DeviceShared>,
     mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
@@ -658,6 +672,13 @@ pub struct Device {
     counters: wgt::HalCounters,
 }
 
+impl Drop for Device {
+    fn drop(&mut self) {
+        unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
+        unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
+    }
+}
+
 /// Semaphores for forcing queue submissions to run in order.
 ///
 /// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
@@ -741,6 +762,12 @@ pub struct Queue {
     relay_semaphores: Mutex<RelaySemaphores>,
 }
 
+impl Drop for Queue {
+    fn drop(&mut self) {
+        unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
+    }
+}
+
 #[derive(Debug)]
 pub struct Buffer {
     raw: vk::Buffer,
diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs
index 4ba90a4e0..752bf757e 100644
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@@ -1350,9 +1350,6 @@ impl crate::Context for ContextWgpuCore {
     fn device_drop(&self, device_data: &Self::DeviceData) {
         #[cfg(any(native, Emscripten))]
         {
-            // Call device_poll, but don't check for errors. We have to use its
-            // return value, but we just drop it.
-            let _ = self.0.device_poll(device_data.id, wgt::Maintain::wait());
             self.0.device_drop(device_data.id);
         }
     }