From 3767a6233990f0eb7f5168c33a8d7e1eceaf43aa Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Wed, 13 Nov 2024 22:12:06 -0500 Subject: [PATCH 01/15] chore: qualify `std::mem::size_of` with `use` (again) --- wgpu-core/src/ray_tracing.rs | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/wgpu-core/src/ray_tracing.rs b/wgpu-core/src/ray_tracing.rs index 11ccb714f..f6102b552 100644 --- a/wgpu-core/src/ray_tracing.rs +++ b/wgpu-core/src/ray_tracing.rs @@ -13,7 +13,7 @@ use crate::{ id::{BlasId, BufferId, TlasId}, resource::CreateBufferError, }; -use std::sync::Arc; +use std::{mem::size_of, sync::Arc}; use std::{num::NonZeroU64, slice}; use crate::resource::{Blas, ResourceErrorIdent, Tlas}; @@ -325,11 +325,8 @@ pub(crate) fn tlas_instance_into_bytes( }; let temp: *const _ = &temp; unsafe { - slice::from_raw_parts::( - temp.cast::(), - std::mem::size_of::(), - ) - .to_vec() + slice::from_raw_parts::(temp.cast::(), size_of::()) + .to_vec() } } _ => unimplemented!(), From d0fd6a231ca6f03266f03a8fb2b86e12c3731346 Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Wed, 13 Nov 2024 22:12:06 -0500 Subject: [PATCH 02/15] chore: satisfy `unused_qualifications` (again) --- naga/src/back/glsl/mod.rs | 8 ++++---- wgpu-core/src/command/ray_tracing.rs | 6 +++--- wgpu-core/src/scratch.rs | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/naga/src/back/glsl/mod.rs b/naga/src/back/glsl/mod.rs index d13abb199..2e719055e 100644 --- a/naga/src/back/glsl/mod.rs +++ b/naga/src/back/glsl/mod.rs @@ -3497,13 +3497,13 @@ impl<'a, W: Write> Writer<'a, W> { Mf::Transpose => "transpose", Mf::Determinant => "determinant", Mf::QuantizeToF16 => match *ctx.resolve_type(arg, &self.module.types) { - crate::TypeInner::Scalar { .. } => { + TypeInner::Scalar { .. } => { write!(self.out, "unpackHalf2x16(packHalf2x16(vec2(")?; self.write_expr(arg, ctx)?; write!(self.out, "))).x")?; return Ok(()); } - crate::TypeInner::Vector { + TypeInner::Vector { size: crate::VectorSize::Bi, .. } => { @@ -3512,7 +3512,7 @@ impl<'a, W: Write> Writer<'a, W> { write!(self.out, "))")?; return Ok(()); } - crate::TypeInner::Vector { + TypeInner::Vector { size: crate::VectorSize::Tri, .. } => { @@ -3523,7 +3523,7 @@ impl<'a, W: Write> Writer<'a, W> { write!(self.out, ".zz)).x)")?; return Ok(()); } - crate::TypeInner::Vector { + TypeInner::Vector { size: crate::VectorSize::Quad, .. } => { diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index ac3bbb67b..08f720c75 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -689,7 +689,7 @@ impl Global { if let Some(ref staging_buffer) = staging_buffer { cmd_buf_raw.transition_buffers(&[hal::BufferBarrier:: { buffer: staging_buffer.raw(), - usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + usage: BufferUses::MAP_WRITE..BufferUses::COPY_SRC, }]); } } @@ -711,7 +711,7 @@ impl Global { unsafe { cmd_buf_raw.transition_buffers(&[hal::BufferBarrier:: { buffer: tlas.instance_buffer.as_ref(), - usage: hal::BufferUses::MAP_READ..hal::BufferUses::COPY_DST, + usage: BufferUses::MAP_READ..BufferUses::COPY_DST, }]); let temp = hal::BufferCopy { src_offset: range.start as u64, @@ -951,7 +951,7 @@ fn iter_blas<'a>( } let data = cmd_buf_data.trackers.buffers.set_single( &index_buffer, - hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, + BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT, ); Some((index_buffer.clone(), data)) } else { diff --git a/wgpu-core/src/scratch.rs b/wgpu-core/src/scratch.rs index f5930a5e0..dcd2d28fb 100644 --- a/wgpu-core/src/scratch.rs +++ b/wgpu-core/src/scratch.rs @@ -21,7 +21,7 @@ impl ScratchBuffer { usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH, memory_flags: hal::MemoryFlags::empty(), }) - .map_err(crate::device::DeviceError::from_hal)? + .map_err(DeviceError::from_hal)? }; Ok(Self { raw: ManuallyDrop::new(raw), From 4fb32170bfb41a74dd3ccbde0795377d45f466c7 Mon Sep 17 00:00:00 2001 From: Jamie Nicol Date: Thu, 14 Nov 2024 13:09:51 +0000 Subject: [PATCH 03/15] [wgpu-core] Replace usage of `PreHashedMap` with `FastHashMap` (#6541) PreHashedMap was introduced as a performance optimization. It is, however, potentially unsound as it cannot distinguish between keys that are not equal yet have the same hash. This patch replaces its usage with a simple FastHashMap, which should be good enough. If this is found to cause real life performance issues down the line then we can figure out a better solution. --- CHANGELOG.md | 1 + wgpu-core/src/device/resource.rs | 8 ++-- wgpu-core/src/hash_utils.rs | 72 -------------------------------- wgpu-core/src/pool.rs | 22 ++++------ 4 files changed, 13 insertions(+), 90 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0122c106..76742558a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -128,6 +128,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148] - Check for device mismatches when beginning render and compute passes. By @ErichDonGubler in [#6497](https://github.com/gfx-rs/wgpu/pull/6497). - Lower `QUERY_SET_MAX_QUERIES` (and enforced limits) from 8192 to 4096 to match WebGPU spec. By @ErichDonGubler in [#6525](https://github.com/gfx-rs/wgpu/pull/6525). - Allow non-filterable float on texture bindings never used with samplers when using a derived bind group layout. By @ErichDonGubler in [#6531](https://github.com/gfx-rs/wgpu/pull/6531/). +- Replace potentially unsound usage of `PreHashedMap` with `FastHashMap`. By @jamienicol in [#6541](https://github.com/gfx-rs/wgpu/pull/6541). #### Naga diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 2c511baa5..754d1d91f 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -32,7 +32,7 @@ use crate::{ }, validation::{self, validate_color_attachment_bytes_per_sample}, weak_vec::WeakVec, - FastHashMap, LabelHelpers, PreHashedKey, PreHashedMap, + FastHashMap, LabelHelpers, }; use arrayvec::ArrayVec; @@ -2712,18 +2712,18 @@ impl Device { derived_group_layouts.pop(); } - let mut unique_bind_group_layouts = PreHashedMap::default(); + let mut unique_bind_group_layouts = FastHashMap::default(); let bind_group_layouts = derived_group_layouts .into_iter() .map(|mut bgl_entry_map| { bgl_entry_map.sort(); - match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) { + match unique_bind_group_layouts.entry(bgl_entry_map) { std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())), std::collections::hash_map::Entry::Vacant(e) => { match self.create_bind_group_layout( &None, - bgl_entry_map, + e.key().clone(), bgl::Origin::Derived, ) { Ok(bgl) => { diff --git a/wgpu-core/src/hash_utils.rs b/wgpu-core/src/hash_utils.rs index f44aad2f1..056c84f53 100644 --- a/wgpu-core/src/hash_utils.rs +++ b/wgpu-core/src/hash_utils.rs @@ -12,75 +12,3 @@ pub type FastHashSet = /// IndexMap using a fast, non-cryptographic hash algorithm. pub type FastIndexMap = indexmap::IndexMap>; - -/// HashMap that uses pre-hashed keys and an identity hasher. -/// -/// This is useful when you only need the key to lookup the value, and don't need to store the key, -/// particularly when the key is large. -pub type PreHashedMap = - std::collections::HashMap, V, std::hash::BuildHasherDefault>; - -/// A pre-hashed key using FxHash which allows the hashing operation to be disconnected -/// from the storage in the map. -pub struct PreHashedKey(u64, std::marker::PhantomData K>); - -impl std::fmt::Debug for PreHashedKey { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_tuple("PreHashedKey").field(&self.0).finish() - } -} - -impl Copy for PreHashedKey {} - -impl Clone for PreHashedKey { - fn clone(&self) -> Self { - *self - } -} - -impl PartialEq for PreHashedKey { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} - -impl Eq for PreHashedKey {} - -impl std::hash::Hash for PreHashedKey { - fn hash(&self, state: &mut H) { - self.0.hash(state); - } -} - -impl PreHashedKey { - pub fn from_key(key: &K) -> Self { - use std::hash::Hasher; - - let mut hasher = rustc_hash::FxHasher::default(); - key.hash(&mut hasher); - Self(hasher.finish(), std::marker::PhantomData) - } -} - -/// A hasher which does nothing. Useful for when you want to use a map with pre-hashed keys. -/// -/// When hashing with this hasher, you must provide exactly 8 bytes. Multiple calls to `write` -/// will overwrite the previous value. -#[derive(Default)] -pub struct IdentityHasher { - hash: u64, -} - -impl std::hash::Hasher for IdentityHasher { - fn write(&mut self, bytes: &[u8]) { - self.hash = u64::from_ne_bytes( - bytes - .try_into() - .expect("identity hasher must be given exactly 8 bytes"), - ); - } - - fn finish(&self) -> u64 { - self.hash - } -} diff --git a/wgpu-core/src/pool.rs b/wgpu-core/src/pool.rs index 7d17f3a7a..d14b8162e 100644 --- a/wgpu-core/src/pool.rs +++ b/wgpu-core/src/pool.rs @@ -7,16 +7,13 @@ use std::{ use once_cell::sync::OnceCell; use crate::lock::{rank, Mutex}; -use crate::{PreHashedKey, PreHashedMap}; +use crate::FastHashMap; type SlotInner = Weak; type ResourcePoolSlot = Arc>>; pub struct ResourcePool { - // We use a pre-hashed map as we never actually need to read the keys. - // - // This additionally allows us to not need to hash more than once on get_or_init. - inner: Mutex>>, + inner: Mutex>>, } impl ResourcePool { @@ -35,9 +32,6 @@ impl ResourcePool { where F: FnOnce(K) -> Result, E>, { - // Hash the key outside of the lock. - let hashed_key = PreHashedKey::from_key(&key); - // We can't prove at compile time that these will only ever be consumed once, // so we need to do the check at runtime. let mut key = Some(key); @@ -46,7 +40,7 @@ impl ResourcePool { 'race: loop { let mut map_guard = self.inner.lock(); - let entry = match map_guard.entry(hashed_key) { + let entry = match map_guard.entry(key.clone().unwrap()) { // An entry exists for this resource. // // We know that either: @@ -86,9 +80,11 @@ impl ResourcePool { return Ok(strong); } - // The resource is in the process of being dropped, because upgrade failed. The entry still exists in the map, but it points to nothing. + // The resource is in the process of being dropped, because upgrade failed. + // The entry still exists in the map, but it points to nothing. // - // We're in a race with the drop implementation of the resource, so lets just go around again. When we go around again: + // We're in a race with the drop implementation of the resource, + // so lets just go around again. When we go around again: // - If the entry exists, we might need to go around a few more times. // - If the entry doesn't exist, we'll create a new one. continue 'race; @@ -101,13 +97,11 @@ impl ResourcePool { /// /// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout pub fn remove(&self, key: &K) { - let hashed_key = PreHashedKey::from_key(key); - let mut map_guard = self.inner.lock(); // Weak::upgrade will be failing long before this code is called. All threads trying to access the resource will be spinning, // waiting for the entry to be removed. It is safe to remove the entry from the map. - map_guard.remove(&hashed_key); + map_guard.remove(key); } } From 97acfd26ce322148073b76749ad4f53c9cfae6ec Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Mon, 14 Oct 2024 19:12:12 +0200 Subject: [PATCH 04/15] rely on our ownership model to keep the device alive while there are still active submissions `Global::device_drop` was wrongly assuming `device_poll` with `Maintain::Wait` was called but this is not a documented invariant and only `wgpu` was upholding this. --- tests/tests/device.rs | 34 ++------------------------- wgpu-core/src/device/global.rs | 15 +----------- wgpu-core/src/device/queue.rs | 9 ------- wgpu-core/src/device/resource.rs | 40 +++++++------------------------- wgpu-core/src/global.rs | 4 ---- wgpu/src/backend/wgpu_core.rs | 3 --- 6 files changed, 12 insertions(+), 93 deletions(-) diff --git a/tests/tests/device.rs b/tests/tests/device.rs index 2774bfd52..87935c831 100644 --- a/tests/tests/device.rs +++ b/tests/tests/device.rs @@ -630,7 +630,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default().expect_fail(FailureCase::webgl2())) .run_sync(|ctx| { // This test checks that when the device is dropped (such as in a GC), - // the provided DeviceLostClosure is called with reason DeviceLostReason::Unknown. + // the provided DeviceLostClosure is called with reason DeviceLostReason::Dropped. // Fails on webgl because webgl doesn't implement drop. static WAS_CALLED: std::sync::atomic::AtomicBool = AtomicBool::new(false); @@ -642,8 +642,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new() }); ctx.device.set_device_lost_callback(callback); - // Drop the device. - drop(ctx.device); + drop(ctx); assert!( WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst), @@ -676,35 +675,6 @@ static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguratio ); }); -#[gpu_test] -static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters(TestParameters::default().skip(FailureCase::always())) - .run_sync(|ctx| { - // What we want to do is to drop the Global, forcing a code path that - // eventually calls Device.prepare_to_die, without having first dropped - // the device. This models what might happen in a user agent that kills - // wgpu without providing a more orderly shutdown. In such a case, the - // device lost callback should be invoked with the message "Device is - // dying." - static WAS_CALLED: AtomicBool = AtomicBool::new(false); - - // Set a LoseDeviceCallback on the device. - let callback = Box::new(|reason, message| { - WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst); - assert_eq!(reason, wgt::DeviceLostReason::Dropped); - assert_eq!(message, "Device is dying."); - }); - ctx.device.set_device_lost_callback(callback); - - // TODO: Drop the Global, somehow. - - // Confirm that the callback was invoked. - assert!( - WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst), - "Device lost callback should have been called." - ); - }); - #[gpu_test] static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default()) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index b6ad2354c..c53db14be 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2078,20 +2078,7 @@ impl Global { profiling::scope!("Device::drop"); api_log!("Device::drop {device_id:?}"); - let device = self.hub.devices.remove(device_id); - let device_lost_closure = device.lock_life().device_lost_closure.take(); - if let Some(closure) = device_lost_closure { - closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); - } - - // The things `Device::prepare_to_die` takes care are mostly - // unnecessary here. We know our queue is empty, so we don't - // need to wait for submissions or triage them. We know we were - // just polled, so `life_tracker.free_resources` is empty. - debug_assert!(device.lock_life().queue_empty()); - device.pending_writes.lock().deactivate(); - - drop(device); + self.hub.devices.remove(device_id); } // This closure will be called exactly once during "lose the device", diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index ade609f20..c4ed7b9c0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -345,15 +345,6 @@ impl PendingWrites { } self.command_encoder.as_mut() } - - pub fn deactivate(&mut self) { - if self.is_recording { - unsafe { - self.command_encoder.discard_encoding(); - } - self.is_recording = false; - } - } } #[derive(Clone, Debug, Error)] diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 754d1d91f..f5ae66b44 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -143,13 +143,13 @@ pub struct Device { pub(crate) instance_flags: wgt::InstanceFlags, pub(crate) pending_writes: Mutex>, pub(crate) deferred_destroy: Mutex>, - #[cfg(feature = "trace")] - pub(crate) trace: Mutex>, pub(crate) usage_scopes: UsageScopePool, pub(crate) last_acceleration_structure_build_command_index: AtomicU64, - #[cfg(feature = "indirect-validation")] pub(crate) indirect_validation: Option, + // needs to be dropped last + #[cfg(feature = "trace")] + pub(crate) trace: Mutex>, } pub(crate) enum DeferredDestroy { @@ -171,6 +171,12 @@ impl std::fmt::Debug for Device { impl Drop for Device { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + + let device_lost_closure = self.lock_life().device_lost_closure.take(); + if let Some(closure) = device_lost_closure { + closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); + } + // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point. let raw = unsafe { ManuallyDrop::take(&mut self.raw) }; // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point. @@ -3730,34 +3736,6 @@ impl Device { } } -impl Device { - /// Wait for idle and remove resources that we can, before we die. - pub(crate) fn prepare_to_die(&self) { - self.pending_writes.lock().deactivate(); - let current_index = self - .last_successful_submission_index - .load(Ordering::Acquire); - if let Err(error) = unsafe { - let fence = self.fence.read(); - self.raw() - .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS) - } { - log::error!("failed to wait for the device: {error}"); - } - let mut life_tracker = self.lock_life(); - let _ = life_tracker.triage_submissions(current_index, &self.command_allocator); - if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() { - // It's important to not hold the lock while calling the closure. - drop(life_tracker); - device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string()); - } - #[cfg(feature = "trace")] - { - *self.trace.lock() = None; - } - } -} - crate::impl_resource_type!(Device); crate::impl_labeled!(Device); crate::impl_storage_item!(Device); diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs index 25a864ec2..cd4508d14 100644 --- a/wgpu-core/src/global.rs +++ b/wgpu-core/src/global.rs @@ -89,10 +89,6 @@ impl Drop for Global { fn drop(&mut self) { profiling::scope!("Global::drop"); resource_log!("Global::drop"); - - for (_, device) in self.hub.devices.read().iter() { - device.prepare_to_die(); - } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index e2b75d1e1..a3c867e5a 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1344,9 +1344,6 @@ impl crate::Context for ContextWgpuCore { fn device_drop(&self, device_data: &Self::DeviceData) { #[cfg(any(native, Emscripten))] { - // Call device_poll, but don't check for errors. We have to use its - // return value, but we just drop it. - let _ = self.0.device_poll(device_data.id, wgt::Maintain::wait()); self.0.device_drop(device_data.id); } } From 6c7dbba39932e64c0d3881891cb3d5c6778af665 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:23:34 +0200 Subject: [PATCH 05/15] move `PendingWrites` into the `Queue` The `Device` should not contain any `Arc`s to resources as that creates cycles (since all resources hold strong references to the `Device`). Note that `PendingWrites` internally has `Arc`s to resources. I think this change also makes more sense conceptually since most operations that use `PendingWrites` are on the `Queue`. --- wgpu-core/src/command/ray_tracing.rs | 53 ++++++----- wgpu-core/src/device/queue.rs | 66 ++++++++++++-- wgpu-core/src/device/resource.rs | 37 +------- wgpu-core/src/instance.rs | 16 ++-- wgpu-core/src/lock/rank.rs | 4 +- wgpu-core/src/resource.rs | 129 +++++++++++++++------------ 6 files changed, 169 insertions(+), 136 deletions(-) diff --git a/wgpu-core/src/command/ray_tracing.rs b/wgpu-core/src/command/ray_tracing.rs index 08f720c75..efc3ebbe2 100644 --- a/wgpu-core/src/command/ray_tracing.rs +++ b/wgpu-core/src/command/ray_tracing.rs @@ -1,5 +1,5 @@ use crate::{ - device::queue::TempResource, + device::{queue::TempResource, Device}, global::Global, hub::Hub, id::CommandEncoderId, @@ -20,10 +20,7 @@ use crate::{ use wgt::{math::align_to, BufferUsages, Features}; use super::CommandBufferMutable; -use crate::device::queue::PendingWrites; use hal::BufferUses; -use std::mem::ManuallyDrop; -use std::ops::DerefMut; use std::{ cmp::max, num::NonZeroU64, @@ -184,7 +181,7 @@ impl Global { build_command_index, &mut buf_storage, hub, - device.pending_writes.lock().deref_mut(), + device, )?; let snatch_guard = device.snatchable_lock.read(); @@ -248,7 +245,9 @@ impl Global { .get() .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?; cmd_buf_data.trackers.tlas_s.set_single(tlas.clone()); - device.pending_writes.lock().insert_tlas(&tlas); + if let Some(queue) = device.get_queue() { + queue.pending_writes.lock().insert_tlas(&tlas); + } cmd_buf_data.tlas_actions.push(TlasAction { tlas: tlas.clone(), @@ -349,10 +348,12 @@ impl Global { } } - device - .pending_writes - .lock() - .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); + if let Some(queue) = device.get_queue() { + queue + .pending_writes + .lock() + .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); + } Ok(()) } @@ -495,7 +496,7 @@ impl Global { build_command_index, &mut buf_storage, hub, - device.pending_writes.lock().deref_mut(), + device, )?; let snatch_guard = device.snatchable_lock.read(); @@ -516,7 +517,9 @@ impl Global { .get(package.tlas_id) .get() .map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?; - device.pending_writes.lock().insert_tlas(&tlas); + if let Some(queue) = device.get_queue() { + queue.pending_writes.lock().insert_tlas(&tlas); + } cmd_buf_data.trackers.tlas_s.set_single(tlas.clone()); tlas_lock_store.push((Some(package), tlas.clone())) @@ -742,17 +745,21 @@ impl Global { } if let Some(staging_buffer) = staging_buffer { - device - .pending_writes - .lock() - .consume_temp(TempResource::StagingBuffer(staging_buffer)); + if let Some(queue) = device.get_queue() { + queue + .pending_writes + .lock() + .consume_temp(TempResource::StagingBuffer(staging_buffer)); + } } } - device - .pending_writes - .lock() - .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); + if let Some(queue) = device.get_queue() { + queue + .pending_writes + .lock() + .consume_temp(TempResource::ScratchBuffer(scratch_buffer)); + } Ok(()) } @@ -839,7 +846,7 @@ fn iter_blas<'a>( build_command_index: NonZeroU64, buf_storage: &mut Vec>, hub: &Hub, - pending_writes: &mut ManuallyDrop, + device: &Device, ) -> Result<(), BuildAccelerationStructureError> { let mut temp_buffer = Vec::new(); for entry in blas_iter { @@ -849,7 +856,9 @@ fn iter_blas<'a>( .get() .map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?; cmd_buf_data.trackers.blas_s.set_single(blas.clone()); - pending_writes.insert_blas(&blas); + if let Some(queue) = device.get_queue() { + queue.pending_writes.lock().insert_blas(&blas); + } cmd_buf_data.blas_actions.push(BlasAction { blas: blas.clone(), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index c4ed7b9c0..c8862a518 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -14,7 +14,7 @@ use crate::{ hal_label, id::{self, QueueId}, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, - lock::RwLockWriteGuard, + lock::{rank, Mutex, RwLockWriteGuard}, resource::{ Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError, DestroyedTexture, Fallible, FlushedStagingBuffer, InvalidResourceError, Labeled, @@ -42,14 +42,59 @@ use super::Device; pub struct Queue { raw: ManuallyDrop>, pub(crate) device: Arc, + pub(crate) pending_writes: Mutex>, } impl Queue { - pub(crate) fn new(device: Arc, raw: Box) -> Self { - Queue { + pub(crate) fn new( + device: Arc, + raw: Box, + ) -> Result { + let pending_encoder = device + .command_allocator + .acquire_encoder(device.raw(), raw.as_ref()) + .map_err(DeviceError::from_hal); + + let pending_encoder = match pending_encoder { + Ok(pending_encoder) => pending_encoder, + Err(e) => { + device.release_queue(raw); + return Err(e); + } + }; + + let mut pending_writes = PendingWrites::new(pending_encoder); + + let zero_buffer = device.zero_buffer.as_ref(); + pending_writes.activate(); + unsafe { + pending_writes + .command_encoder + .transition_buffers(&[hal::BufferBarrier { + buffer: zero_buffer, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + }]); + pending_writes + .command_encoder + .clear_buffer(zero_buffer, 0..super::ZERO_BUFFER_SIZE); + pending_writes + .command_encoder + .transition_buffers(&[hal::BufferBarrier { + buffer: zero_buffer, + usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC, + }]); + } + + let pending_writes = Mutex::new( + rank::QUEUE_PENDING_WRITES, + ManuallyDrop::new(pending_writes), + ); + + Ok(Queue { raw: ManuallyDrop::new(raw), device, - } + pending_writes, + }) } pub(crate) fn raw(&self) -> &dyn hal::DynQueue { @@ -70,6 +115,9 @@ crate::impl_storage_item!(Queue); impl Drop for Queue { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point. + let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) }; + pending_writes.dispose(self.device.raw()); // SAFETY: we never access `self.raw` beyond this point. let queue = unsafe { ManuallyDrop::take(&mut self.raw) }; self.device.release_queue(queue); @@ -418,7 +466,7 @@ impl Queue { // freed, even if an error occurs. All paths from here must call // `device.pending_writes.consume`. let mut staging_buffer = StagingBuffer::new(&self.device, data_size)?; - let mut pending_writes = self.device.pending_writes.lock(); + let mut pending_writes = self.pending_writes.lock(); let staging_buffer = { profiling::scope!("copy"); @@ -460,7 +508,7 @@ impl Queue { let buffer = buffer.get()?; - let mut pending_writes = self.device.pending_writes.lock(); + let mut pending_writes = self.pending_writes.lock(); // At this point, we have taken ownership of the staging_buffer from the // user. Platform validation requires that the staging buffer always @@ -636,7 +684,7 @@ impl Queue { .map_err(TransferError::from)?; } - let mut pending_writes = self.device.pending_writes.lock(); + let mut pending_writes = self.pending_writes.lock(); let encoder = pending_writes.activate(); // If the copy does not fully cover the layers, we need to initialize to @@ -888,7 +936,7 @@ impl Queue { let (selector, dst_base) = extract_texture_selector(&destination, &size, &dst)?; - let mut pending_writes = self.device.pending_writes.lock(); + let mut pending_writes = self.pending_writes.lock(); let encoder = pending_writes.activate(); // If the copy does not fully cover the layers, we need to initialize to @@ -1157,7 +1205,7 @@ impl Queue { } } - let mut pending_writes = self.device.pending_writes.lock(); + let mut pending_writes = self.pending_writes.lock(); { used_surface_textures.set_size(self.device.tracker_indices.textures.size()); diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index f5ae66b44..48e065d78 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -6,10 +6,8 @@ use crate::{ device::{ bgl, create_validator, life::{LifetimeTracker, WaitIdleError}, - map_buffer, - queue::PendingWrites, - AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, - RenderPassContext, CLEANUP_WAIT_MS, + map_buffer, AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, + MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS, }, hal_label, init_tracker::{ @@ -141,7 +139,6 @@ pub struct Device { pub(crate) features: wgt::Features, pub(crate) downlevel: wgt::DownlevelCapabilities, pub(crate) instance_flags: wgt::InstanceFlags, - pub(crate) pending_writes: Mutex>, pub(crate) deferred_destroy: Mutex>, pub(crate) usage_scopes: UsageScopePool, pub(crate) last_acceleration_structure_build_command_index: AtomicU64, @@ -181,11 +178,8 @@ impl Drop for Device { let raw = unsafe { ManuallyDrop::take(&mut self.raw) }; // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point. let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) }; - // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point. - let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) }; // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point. let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) }; - pending_writes.dispose(raw.as_ref()); self.command_allocator.dispose(raw.as_ref()); #[cfg(feature = "indirect-validation")] self.indirect_validation @@ -228,7 +222,6 @@ impl Device { impl Device { pub(crate) fn new( raw_device: Box, - raw_queue: &dyn hal::DynQueue, adapter: &Arc, desc: &DeviceDescriptor, trace_path: Option<&std::path::Path>, @@ -241,10 +234,6 @@ impl Device { let fence = unsafe { raw_device.create_fence() }.map_err(DeviceError::from_hal)?; let command_allocator = command::CommandAllocator::new(); - let pending_encoder = command_allocator - .acquire_encoder(raw_device.as_ref(), raw_queue) - .map_err(DeviceError::from_hal)?; - let mut pending_writes = PendingWrites::new(pending_encoder); // Create zeroed buffer used for texture clears. let zero_buffer = unsafe { @@ -256,24 +245,6 @@ impl Device { }) } .map_err(DeviceError::from_hal)?; - pending_writes.activate(); - unsafe { - pending_writes - .command_encoder - .transition_buffers(&[hal::BufferBarrier { - buffer: zero_buffer.as_ref(), - usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, - }]); - pending_writes - .command_encoder - .clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE); - pending_writes - .command_encoder - .transition_buffers(&[hal::BufferBarrier { - buffer: zero_buffer.as_ref(), - usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC, - }]); - } let alignments = adapter.raw.capabilities.alignments.clone(); let downlevel = adapter.raw.capabilities.downlevel.clone(); @@ -336,10 +307,6 @@ impl Device { features: desc.required_features, downlevel, instance_flags, - pending_writes: Mutex::new( - rank::DEVICE_PENDING_WRITES, - ManuallyDrop::new(pending_writes), - ), deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()), usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()), // By starting at one, we can put the result in a NonZeroU64. diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 2c517d1ab..53a498502 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -573,18 +573,14 @@ impl Adapter { ) -> Result<(Arc, Arc), RequestDeviceError> { api_log!("Adapter::create_device"); - let device = Device::new( - hal_device.device, - hal_device.queue.as_ref(), - self, - desc, - trace_path, - instance_flags, - )?; - + let device = Device::new(hal_device.device, self, desc, trace_path, instance_flags)?; let device = Arc::new(device); - let queue = Arc::new(Queue::new(device.clone(), hal_device.queue)); + + let queue = Queue::new(device.clone(), hal_device.queue)?; + let queue = Arc::new(queue); + device.set_queue(&queue); + Ok((device, queue)) } diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs index 842dadf26..154d82b8d 100644 --- a/wgpu-core/src/lock/rank.rs +++ b/wgpu-core/src/lock/rank.rs @@ -111,11 +111,11 @@ define_lock_ranks! { // COMMAND_BUFFER_DATA, } rank BUFFER_MAP_STATE "Buffer::map_state" followed by { - DEVICE_PENDING_WRITES, + QUEUE_PENDING_WRITES, SHARED_TRACKER_INDEX_ALLOCATOR_INNER, DEVICE_TRACE, } - rank DEVICE_PENDING_WRITES "Device::pending_writes" followed by { + rank QUEUE_PENDING_WRITES "Queue::pending_writes" followed by { COMMAND_ALLOCATOR_FREE_ENCODERS, SHARED_TRACKER_INDEX_ALLOCATOR_INNER, DEVICE_LIFE_TRACKER, diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 8ea9dd07e..88b76bb67 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -676,36 +676,37 @@ impl Buffer { }); } - let mut pending_writes = device.pending_writes.lock(); - let staging_buffer = staging_buffer.flush(); - let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy { - src_offset: 0, - dst_offset: 0, - size, - }); - let transition_src = hal::BufferBarrier { - buffer: staging_buffer.raw(), - usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, - }; - let transition_dst = hal::BufferBarrier:: { - buffer: raw_buf, - usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, - }; - let encoder = pending_writes.activate(); - unsafe { - encoder.transition_buffers(&[transition_src, transition_dst]); - if self.size > 0 { - encoder.copy_buffer_to_buffer( - staging_buffer.raw(), - raw_buf, - region.as_slice(), - ); + if let Some(queue) = device.get_queue() { + let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: 0, + size, + }); + let transition_src = hal::BufferBarrier { + buffer: staging_buffer.raw(), + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }; + let transition_dst = hal::BufferBarrier:: { + buffer: raw_buf, + usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST, + }; + let mut pending_writes = queue.pending_writes.lock(); + let encoder = pending_writes.activate(); + unsafe { + encoder.transition_buffers(&[transition_src, transition_dst]); + if self.size > 0 { + encoder.copy_buffer_to_buffer( + staging_buffer.raw(), + raw_buf, + region.as_slice(), + ); + } } + pending_writes.consume(staging_buffer); + pending_writes.insert_buffer(self); } - pending_writes.consume(staging_buffer); - pending_writes.insert_buffer(self); } BufferMapState::Idle => { return Err(BufferAccessError::NotMapped); @@ -778,17 +779,20 @@ impl Buffer { }) }; - let mut pending_writes = device.pending_writes.lock(); - if pending_writes.contains_buffer(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_buffer_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); + if let Some(queue) = device.get_queue() { + let mut pending_writes = queue.pending_writes.lock(); + if pending_writes.contains_buffer(self) { + pending_writes.consume_temp(temp); + return Ok(()); } } + let mut life_lock = device.lock_life(); + let last_submit_index = life_lock.get_buffer_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } + Ok(()) } } @@ -1244,17 +1248,20 @@ impl Texture { }) }; - let mut pending_writes = device.pending_writes.lock(); - if pending_writes.contains_texture(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_texture_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); + if let Some(queue) = device.get_queue() { + let mut pending_writes = queue.pending_writes.lock(); + if pending_writes.contains_texture(self) { + pending_writes.consume_temp(temp); + return Ok(()); } } + let mut life_lock = device.lock_life(); + let last_submit_index = life_lock.get_texture_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } + Ok(()) } } @@ -1960,17 +1967,20 @@ impl Blas { }) }; - let mut pending_writes = device.pending_writes.lock(); - if pending_writes.contains_blas(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_blas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); + if let Some(queue) = device.get_queue() { + let mut pending_writes = queue.pending_writes.lock(); + if pending_writes.contains_blas(self) { + pending_writes.consume_temp(temp); + return Ok(()); } } + let mut life_lock = device.lock_life(); + let last_submit_index = life_lock.get_blas_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } + Ok(()) } } @@ -2047,17 +2057,20 @@ impl Tlas { }) }; - let mut pending_writes = device.pending_writes.lock(); - if pending_writes.contains_tlas(self) { - pending_writes.consume_temp(temp); - } else { - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_tlas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); + if let Some(queue) = device.get_queue() { + let mut pending_writes = queue.pending_writes.lock(); + if pending_writes.contains_tlas(self) { + pending_writes.consume_temp(temp); + return Ok(()); } } + let mut life_lock = device.lock_life(); + let last_submit_index = life_lock.get_tlas_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } + Ok(()) } } From 61c84f956ee84ceeb9f008d1eaad1064bdabad7b Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 11:41:48 +0200 Subject: [PATCH 06/15] Remove outdated locking comments We should rely on the ranks in `wgpu-core\src\lock\rank.rs`. --- wgpu-core/src/device/resource.rs | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 48e065d78..eba5a30bb 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -57,24 +57,6 @@ use super::{ /// Structure describing a logical device. Some members are internally mutable, /// stored behind mutexes. -/// -/// TODO: establish clear order of locking for these: -/// `life_tracker`, `trackers`, `render_passes`, `pending_writes`, `trace`. -/// -/// Currently, the rules are: -/// 1. `life_tracker` is locked after `hub.devices`, enforced by the type system -/// 1. `self.trackers` is locked last (unenforced) -/// 1. `self.trace` is locked last (unenforced) -/// -/// Right now avoid locking twice same resource or registry in a call execution -/// and minimize the locking to the minimum scope possible -/// Unless otherwise specified, no lock may be acquired while holding another lock. -/// This means that you must inspect function calls made while a lock is held -/// to see what locks the callee may try to acquire. -/// -/// Important: -/// When locking pending_writes please check that trackers is not locked -/// trackers should be locked only when needed for the shortest time possible pub struct Device { raw: ManuallyDrop>, pub(crate) adapter: Arc, @@ -124,13 +106,9 @@ pub struct Device { /// using ref-counted references for internal access. pub(crate) valid: AtomicBool, - /// All live resources allocated with this [`Device`]. - /// - /// Has to be locked temporarily only (locked last) - /// and never before pending_writes + /// Stores the state of buffers and textures. pub(crate) trackers: Mutex, pub(crate) tracker_indices: TrackerIndexAllocators, - // Life tracker should be locked right after the device and before anything else. life_tracker: Mutex, /// Pool of bind group layouts, allowing deduplication. pub(crate) bgl_pool: ResourcePool, From e934595bb2337175a5b6ea84d33786b58820f8e3 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:23:44 +0200 Subject: [PATCH 07/15] move `device_lost_closure` from `Device::life_tracker` to `Device` --- wgpu-core/src/device/global.rs | 14 +++++++------- wgpu-core/src/device/life.rs | 8 +------- wgpu-core/src/device/resource.rs | 25 ++++++++++++------------- wgpu-core/src/lock/rank.rs | 1 + 4 files changed, 21 insertions(+), 27 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index c53db14be..6f0359e21 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2090,14 +2090,14 @@ impl Global { ) { let device = self.hub.devices.get(device_id); - let mut life_tracker = device.lock_life(); - if let Some(existing_closure) = life_tracker.device_lost_closure.take() { - // It's important to not hold the lock while calling the closure. - drop(life_tracker); - existing_closure.call(DeviceLostReason::ReplacedCallback, "".to_string()); - life_tracker = device.lock_life(); + let old_device_lost_closure = device + .device_lost_closure + .lock() + .replace(device_lost_closure); + + if let Some(old_device_lost_closure) = old_device_lost_closure { + old_device_lost_closure.call(DeviceLostReason::ReplacedCallback, "".to_string()); } - life_tracker.device_lost_closure = Some(device_lost_closure); } pub fn device_destroy(&self, device_id: DeviceId) { diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 70e5337a7..dc9d4051e 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -1,7 +1,7 @@ use crate::{ device::{ queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource}, - DeviceError, DeviceLostClosure, + DeviceError, }, resource::{self, Buffer, Texture, Trackable}, snatch::SnatchGuard, @@ -196,11 +196,6 @@ pub(crate) struct LifetimeTracker { /// must happen _after_ all mapped buffer callbacks are mapped, so we defer them /// here until the next time the device is maintained. work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>, - - /// Closure to be called on "lose the device". This is invoked directly by - /// device.lose or by the UserCallbacks returned from maintain when the device - /// has been destroyed and its queues are empty. - pub device_lost_closure: Option, } impl LifetimeTracker { @@ -209,7 +204,6 @@ impl LifetimeTracker { active: Vec::new(), ready_to_map: Vec::new(), work_done_closures: SmallVec::new(), - device_lost_closure: None, } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index eba5a30bb..5f50feed8 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -51,8 +51,8 @@ use std::{ }; use super::{ - queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR, - ZERO_BUFFER_SIZE, + queue::Queue, DeviceDescriptor, DeviceError, DeviceLostClosure, UserClosures, + ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE, }; /// Structure describing a logical device. Some members are internally mutable, @@ -106,6 +106,11 @@ pub struct Device { /// using ref-counted references for internal access. pub(crate) valid: AtomicBool, + /// Closure to be called on "lose the device". This is invoked directly by + /// device.lose or by the UserCallbacks returned from maintain when the device + /// has been destroyed and its queues are empty. + pub(crate) device_lost_closure: Mutex>, + /// Stores the state of buffers and textures. pub(crate) trackers: Mutex, pub(crate) tracker_indices: TrackerIndexAllocators, @@ -147,8 +152,7 @@ impl Drop for Device { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); - let device_lost_closure = self.lock_life().device_lost_closure.take(); - if let Some(closure) = device_lost_closure { + if let Some(closure) = self.device_lost_closure.lock().take() { closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); } @@ -259,6 +263,7 @@ impl Device { fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)), snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) }, valid: AtomicBool::new(true), + device_lost_closure: Mutex::new(rank::DEVICE_LOST_CLOSURE, None), trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()), tracker_indices: TrackerIndexAllocators::new(), life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, LifetimeTracker::new()), @@ -466,9 +471,9 @@ impl Device { // If we have a DeviceLostClosure, build an invocation with the // reason DeviceLostReason::Destroyed and no message. - if life_tracker.device_lost_closure.is_some() { + if let Some(device_lost_closure) = self.device_lost_closure.lock().take() { device_lost_invocations.push(DeviceLostInvocation { - closure: life_tracker.device_lost_closure.take().unwrap(), + closure: device_lost_closure, reason: DeviceLostReason::Destroyed, message: String::new(), }); @@ -3623,13 +3628,7 @@ impl Device { self.valid.store(false, Ordering::Release); // 1) Resolve the GPUDevice device.lost promise. - let mut life_lock = self.lock_life(); - let closure = life_lock.device_lost_closure.take(); - // It's important to not hold the lock while calling the closure and while calling - // release_gpu_resources which may take the lock again. - drop(life_lock); - - if let Some(device_lost_closure) = closure { + if let Some(device_lost_closure) = self.device_lost_closure.lock().take() { device_lost_closure.call(DeviceLostReason::Unknown, message.to_string()); } diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs index 154d82b8d..1e8b4d2ae 100644 --- a/wgpu-core/src/lock/rank.rs +++ b/wgpu-core/src/lock/rank.rs @@ -135,6 +135,7 @@ define_lock_ranks! { #[allow(dead_code)] rank DEVICE_TRACE "Device::trace" followed by { } rank DEVICE_TRACKERS "Device::trackers" followed by { } + rank DEVICE_LOST_CLOSURE "Device::device_lost_closure" followed by { } rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { } rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { } rank REGISTRY_STORAGE "Registry::storage" followed by { } From 6cc53421bfcd513bff4e3ceb7b434955443c3141 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:41:29 +0200 Subject: [PATCH 08/15] move `LifetimeTracker` into the `Queue` The `Device` should not contain any `Arc`s to resources as that creates cycles (since all resources hold strong references to the `Device`). Note that `LifetimeTracker` internally has `Arc`s to resources. --- wgpu-core/src/device/global.rs | 8 +++-- wgpu-core/src/device/queue.rs | 15 +++++--- wgpu-core/src/device/resource.rs | 50 +++++++++++++-------------- wgpu-core/src/lock/rank.rs | 4 +-- wgpu-core/src/resource.rs | 59 ++++++++++++++++---------------- 5 files changed, 73 insertions(+), 63 deletions(-) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 6f0359e21..f9b6794b4 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -219,9 +219,11 @@ impl Global { device.check_is_valid()?; buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?; - let last_submission = device - .lock_life() - .get_buffer_latest_submission_index(&buffer); + let last_submission = device.get_queue().and_then(|queue| { + queue + .lock_life() + .get_buffer_latest_submission_index(&buffer) + }); if let Some(last_submission) = last_submission { device.wait_for_submit(last_submission)?; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index c8862a518..afe763d77 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -14,7 +14,7 @@ use crate::{ hal_label, id::{self, QueueId}, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, - lock::{rank, Mutex, RwLockWriteGuard}, + lock::{rank, Mutex, MutexGuard, RwLockWriteGuard}, resource::{ Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError, DestroyedTexture, Fallible, FlushedStagingBuffer, InvalidResourceError, Labeled, @@ -37,12 +37,13 @@ use std::{ }; use thiserror::Error; -use super::Device; +use super::{life::LifetimeTracker, Device}; pub struct Queue { raw: ManuallyDrop>, pub(crate) device: Arc, pub(crate) pending_writes: Mutex>, + life_tracker: Mutex, } impl Queue { @@ -94,12 +95,18 @@ impl Queue { raw: ManuallyDrop::new(raw), device, pending_writes, + life_tracker: Mutex::new(rank::QUEUE_LIFE_TRACKER, LifetimeTracker::new()), }) } pub(crate) fn raw(&self) -> &dyn hal::DynQueue { self.raw.as_ref() } + + #[track_caller] + pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> { + self.life_tracker.lock() + } } crate::impl_resource_type!(Queue); @@ -1292,7 +1299,7 @@ impl Queue { profiling::scope!("cleanup"); // this will register the new submission to the life time tracker - self.device.lock_life().track_submission( + self.lock_life().track_submission( submit_index, pending_writes.temp_resources.drain(..), active_executions, @@ -1341,7 +1348,7 @@ impl Queue { ) -> Option { api_log!("Queue::on_submitted_work_done"); //TODO: flush pending writes - self.device.lock_life().add_work_done_closure(closure) + self.lock_life().add_work_done_closure(closure) } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 5f50feed8..ee1f25233 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -4,10 +4,9 @@ use crate::{ binding_model::{self, BindGroup, BindGroupLayout, BindGroupLayoutEntryError}, command, conv, device::{ - bgl, create_validator, - life::{LifetimeTracker, WaitIdleError}, - map_buffer, AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, - MissingFeatures, RenderPassContext, CLEANUP_WAIT_MS, + bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData, + DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext, + CLEANUP_WAIT_MS, }, hal_label, init_tracker::{ @@ -15,7 +14,7 @@ use crate::{ TextureInitTrackerAction, }, instance::Adapter, - lock::{rank, Mutex, MutexGuard, RwLock}, + lock::{rank, Mutex, RwLock}, pipeline, pool::ResourcePool, resource::{ @@ -114,7 +113,6 @@ pub struct Device { /// Stores the state of buffers and textures. pub(crate) trackers: Mutex, pub(crate) tracker_indices: TrackerIndexAllocators, - life_tracker: Mutex, /// Pool of bind group layouts, allowing deduplication. pub(crate) bgl_pool: ResourcePool, pub(crate) alignments: hal::Alignments, @@ -266,7 +264,6 @@ impl Device { device_lost_closure: Mutex::new(rank::DEVICE_LOST_CLOSURE, None), trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()), tracker_indices: TrackerIndexAllocators::new(), - life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, LifetimeTracker::new()), bgl_pool: ResourcePool::new(), #[cfg(feature = "trace")] trace: Mutex::new( @@ -332,11 +329,6 @@ impl Device { assert!(self.queue_to_drop.set(queue).is_ok()); } - #[track_caller] - pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> { - self.life_tracker.lock() - } - /// Run some destroy operations that were deferred. /// /// Destroying the resources requires taking a write lock on the device's snatch lock, @@ -449,13 +441,20 @@ impl Device { .map_err(|e| self.handle_hal_error(e))?; } - let mut life_tracker = self.lock_life(); - let submission_closures = - life_tracker.triage_submissions(submission_index, &self.command_allocator); + let (submission_closures, mapping_closures, queue_empty) = + if let Some(queue) = self.get_queue() { + let mut life_tracker = queue.lock_life(); + let submission_closures = + life_tracker.triage_submissions(submission_index, &self.command_allocator); - let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard); + let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard); - let queue_empty = life_tracker.queue_empty(); + let queue_empty = life_tracker.queue_empty(); + + (submission_closures, mapping_closures, queue_empty) + } else { + (SmallVec::new(), Vec::new(), true) + }; // Detect if we have been destroyed and now need to lose the device. // If we are invalid (set at start of destroy) and our queue is empty, @@ -481,7 +480,6 @@ impl Device { } // Don't hold the locks while calling release_gpu_resources. - drop(life_tracker); drop(fence); drop(snatch_guard); @@ -3562,13 +3560,15 @@ impl Device { unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) } .map_err(|e| self.handle_hal_error(e))?; drop(fence); - let closures = self - .lock_life() - .triage_submissions(submission_index, &self.command_allocator); - assert!( - closures.is_empty(), - "wait_for_submit is not expected to work with closures" - ); + if let Some(queue) = self.get_queue() { + let closures = queue + .lock_life() + .triage_submissions(submission_index, &self.command_allocator); + assert!( + closures.is_empty(), + "wait_for_submit is not expected to work with closures" + ); + } } Ok(()) } diff --git a/wgpu-core/src/lock/rank.rs b/wgpu-core/src/lock/rank.rs index 1e8b4d2ae..51c6c5431 100644 --- a/wgpu-core/src/lock/rank.rs +++ b/wgpu-core/src/lock/rank.rs @@ -118,9 +118,9 @@ define_lock_ranks! { rank QUEUE_PENDING_WRITES "Queue::pending_writes" followed by { COMMAND_ALLOCATOR_FREE_ENCODERS, SHARED_TRACKER_INDEX_ALLOCATOR_INNER, - DEVICE_LIFE_TRACKER, + QUEUE_LIFE_TRACKER, } - rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by { + rank QUEUE_LIFE_TRACKER "Queue::life_tracker" followed by { COMMAND_ALLOCATOR_FREE_ENCODERS, DEVICE_TRACE, } diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 88b76bb67..6d7544d9b 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -634,7 +634,12 @@ impl Buffer { .buffers .set_single(self, internal_use); - let submit_index = device.lock_life().map(self).unwrap_or(0); // '0' means no wait is necessary + let submit_index = if let Some(queue) = device.get_queue() { + queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary + } else { + // TODO: map immediately + 0 + }; Ok(submit_index) } @@ -783,16 +788,15 @@ impl Buffer { let mut pending_writes = queue.pending_writes.lock(); if pending_writes.contains_buffer(self) { pending_writes.consume_temp(temp); - return Ok(()); + } else { + let mut life_lock = queue.lock_life(); + let last_submit_index = life_lock.get_buffer_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } } } - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_buffer_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - Ok(()) } } @@ -1252,16 +1256,15 @@ impl Texture { let mut pending_writes = queue.pending_writes.lock(); if pending_writes.contains_texture(self) { pending_writes.consume_temp(temp); - return Ok(()); + } else { + let mut life_lock = queue.lock_life(); + let last_submit_index = life_lock.get_texture_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } } } - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_texture_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - Ok(()) } } @@ -1971,16 +1974,15 @@ impl Blas { let mut pending_writes = queue.pending_writes.lock(); if pending_writes.contains_blas(self) { pending_writes.consume_temp(temp); - return Ok(()); + } else { + let mut life_lock = queue.lock_life(); + let last_submit_index = life_lock.get_blas_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } } } - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_blas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - Ok(()) } } @@ -2061,16 +2063,15 @@ impl Tlas { let mut pending_writes = queue.pending_writes.lock(); if pending_writes.contains_tlas(self) { pending_writes.consume_temp(temp); - return Ok(()); + } else { + let mut life_lock = queue.lock_life(); + let last_submit_index = life_lock.get_tlas_latest_submission_index(self); + if let Some(last_submit_index) = last_submit_index { + life_lock.schedule_resource_destruction(temp, last_submit_index); + } } } - let mut life_lock = device.lock_life(); - let last_submit_index = life_lock.get_tlas_latest_submission_index(self); - if let Some(last_submit_index) = last_submit_index { - life_lock.schedule_resource_destruction(temp, last_submit_index); - } - Ok(()) } } From 5061ae3e5e52326135274ccf00ab0dba246f67f3 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 12:58:17 +0200 Subject: [PATCH 09/15] map buffers immediately in `map_async` if the `Queue` has been dropped --- wgpu-core/src/device/life.rs | 62 +++----------------------------- wgpu-core/src/device/mod.rs | 11 +++--- wgpu-core/src/device/resource.rs | 11 ++---- wgpu-core/src/resource.rs | 62 +++++++++++++++++++++++++++++++- 4 files changed, 74 insertions(+), 72 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index dc9d4051e..c7d3a59d6 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -3,7 +3,7 @@ use crate::{ queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource}, DeviceError, }, - resource::{self, Buffer, Texture, Trackable}, + resource::{Buffer, Texture, Trackable}, snatch::SnatchGuard, SubmissionIndex, }; @@ -388,7 +388,6 @@ impl LifetimeTracker { #[must_use] pub(crate) fn handle_mapping( &mut self, - raw: &dyn hal::DynDevice, snatch_guard: &SnatchGuard, ) -> Vec { if self.ready_to_map.is_empty() { @@ -398,61 +397,10 @@ impl LifetimeTracker { Vec::with_capacity(self.ready_to_map.len()); for buffer in self.ready_to_map.drain(..) { - // This _cannot_ be inlined into the match. If it is, the lock will be held - // open through the whole match, resulting in a deadlock when we try to re-lock - // the buffer back to active. - let mapping = std::mem::replace( - &mut *buffer.map_state.lock(), - resource::BufferMapState::Idle, - ); - let pending_mapping = match mapping { - resource::BufferMapState::Waiting(pending_mapping) => pending_mapping, - // Mapping cancelled - resource::BufferMapState::Idle => continue, - // Mapping queued at least twice by map -> unmap -> map - // and was already successfully mapped below - resource::BufferMapState::Active { .. } => { - *buffer.map_state.lock() = mapping; - continue; - } - _ => panic!("No pending mapping."), - }; - let status = if pending_mapping.range.start != pending_mapping.range.end { - let host = pending_mapping.op.host; - let size = pending_mapping.range.end - pending_mapping.range.start; - match super::map_buffer( - raw, - &buffer, - pending_mapping.range.start, - size, - host, - snatch_guard, - ) { - Ok(mapping) => { - *buffer.map_state.lock() = resource::BufferMapState::Active { - mapping, - range: pending_mapping.range.clone(), - host, - }; - Ok(()) - } - Err(e) => { - log::error!("Mapping failed: {e}"); - Err(e) - } - } - } else { - *buffer.map_state.lock() = resource::BufferMapState::Active { - mapping: hal::BufferMapping { - ptr: std::ptr::NonNull::dangling(), - is_coherent: true, - }, - range: pending_mapping.range, - host: pending_mapping.op.host, - }; - Ok(()) - }; - pending_callbacks.push((pending_mapping.op, status)); + match buffer.map(snatch_guard) { + Some(cb) => pending_callbacks.push(cb), + None => continue, + } } pending_callbacks } diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 81cb4654d..18e35206b 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -298,24 +298,25 @@ impl DeviceLostClosure { } } -fn map_buffer( - raw: &dyn hal::DynDevice, +pub(crate) fn map_buffer( buffer: &Buffer, offset: BufferAddress, size: BufferAddress, kind: HostMap, snatch_guard: &SnatchGuard, ) -> Result { + let raw_device = buffer.device.raw(); let raw_buffer = buffer.try_raw(snatch_guard)?; let mapping = unsafe { - raw.map_buffer(raw_buffer, offset..offset + size) + raw_device + .map_buffer(raw_buffer, offset..offset + size) .map_err(|e| buffer.device.handle_hal_error(e))? }; if !mapping.is_coherent && kind == HostMap::Read { #[allow(clippy::single_range_in_vec_init)] unsafe { - raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]); + raw_device.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]); } } @@ -370,7 +371,7 @@ fn map_buffer( && kind == HostMap::Read && buffer.usage.contains(wgt::BufferUsages::MAP_WRITE) { - unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) }; + unsafe { raw_device.flush_mapped_ranges(raw_buffer, &[uninitialized]) }; } } } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index ee1f25233..95274c4d0 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -447,7 +447,7 @@ impl Device { let submission_closures = life_tracker.triage_submissions(submission_index, &self.command_allocator); - let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard); + let mapping_closures = life_tracker.handle_mapping(&snatch_guard); let queue_empty = life_tracker.queue_empty(); @@ -620,14 +620,7 @@ impl Device { } } else { let snatch_guard: SnatchGuard = self.snatchable_lock.read(); - map_buffer( - self.raw(), - &buffer, - 0, - map_size, - HostMap::Write, - &snatch_guard, - )? + map_buffer(&buffer, 0, map_size, HostMap::Write, &snatch_guard)? }; *buffer.map_state.lock() = resource::BufferMapState::Active { mapping, diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 6d7544d9b..0b9cdf821 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -637,13 +637,73 @@ impl Buffer { let submit_index = if let Some(queue) = device.get_queue() { queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary } else { - // TODO: map immediately + // We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`. + let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap(); + if let Some(callback) = operation.callback.take() { + callback.call(status); + } 0 }; Ok(submit_index) } + /// This function returns [`None`] only if [`Self::map_state`] is not [`BufferMapState::Waiting`]. + #[must_use] + pub(crate) fn map(&self, snatch_guard: &SnatchGuard) -> Option { + // This _cannot_ be inlined into the match. If it is, the lock will be held + // open through the whole match, resulting in a deadlock when we try to re-lock + // the buffer back to active. + let mapping = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle); + let pending_mapping = match mapping { + BufferMapState::Waiting(pending_mapping) => pending_mapping, + // Mapping cancelled + BufferMapState::Idle => return None, + // Mapping queued at least twice by map -> unmap -> map + // and was already successfully mapped below + BufferMapState::Active { .. } => { + *self.map_state.lock() = mapping; + return None; + } + _ => panic!("No pending mapping."), + }; + let status = if pending_mapping.range.start != pending_mapping.range.end { + let host = pending_mapping.op.host; + let size = pending_mapping.range.end - pending_mapping.range.start; + match crate::device::map_buffer( + self, + pending_mapping.range.start, + size, + host, + snatch_guard, + ) { + Ok(mapping) => { + *self.map_state.lock() = BufferMapState::Active { + mapping, + range: pending_mapping.range.clone(), + host, + }; + Ok(()) + } + Err(e) => { + log::error!("Mapping failed: {e}"); + Err(e) + } + } + } else { + *self.map_state.lock() = BufferMapState::Active { + mapping: hal::BufferMapping { + ptr: NonNull::dangling(), + is_coherent: true, + }, + range: pending_mapping.range, + host: pending_mapping.op.host, + }; + Ok(()) + }; + Some((pending_mapping.op, status)) + } + // Note: This must not be called while holding a lock. pub(crate) fn unmap( self: &Arc, From 9e17e32686a142ba98408939adb5ee76a80d9fef Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:16:24 +0200 Subject: [PATCH 10/15] extract `Queue.maintain` --- wgpu-core/src/device/queue.rs | 23 ++++++++++++++++++++++- wgpu-core/src/device/resource.rs | 10 +--------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index afe763d77..c21395542 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -21,6 +21,7 @@ use crate::{ ParentDevice, ResourceErrorIdent, StagingBuffer, Texture, TextureInner, Trackable, }, resource_log, + snatch::SnatchGuard, track::{self, Tracker, TrackerIndex}, FastHashMap, SubmissionIndex, }; @@ -107,6 +108,26 @@ impl Queue { pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> { self.life_tracker.lock() } + + pub(crate) fn maintain( + &self, + submission_index: u64, + snatch_guard: &SnatchGuard, + ) -> ( + SmallVec<[SubmittedWorkDoneClosure; 1]>, + Vec, + bool, + ) { + let mut life_tracker = self.lock_life(); + let submission_closures = + life_tracker.triage_submissions(submission_index, &self.device.command_allocator); + + let mapping_closures = life_tracker.handle_mapping(snatch_guard); + + let queue_empty = life_tracker.queue_empty(); + + (submission_closures, mapping_closures, queue_empty) + } } crate::impl_resource_type!(Queue); @@ -1505,7 +1526,7 @@ fn validate_command_buffer( command_buffer: &CommandBuffer, queue: &Queue, cmd_buf_data: &crate::command::CommandBufferMutable, - snatch_guard: &crate::snatch::SnatchGuard<'_>, + snatch_guard: &SnatchGuard, submit_surface_textures_owned: &mut FastHashMap<*const Texture, Arc>, used_surface_textures: &mut track::TextureUsageScope, ) -> Result<(), QueueSubmitError> { diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 95274c4d0..2abc9ddaa 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -443,15 +443,7 @@ impl Device { let (submission_closures, mapping_closures, queue_empty) = if let Some(queue) = self.get_queue() { - let mut life_tracker = queue.lock_life(); - let submission_closures = - life_tracker.triage_submissions(submission_index, &self.command_allocator); - - let mapping_closures = life_tracker.handle_mapping(&snatch_guard); - - let queue_empty = life_tracker.queue_empty(); - - (submission_closures, mapping_closures, queue_empty) + queue.maintain(submission_index, &snatch_guard) } else { (SmallVec::new(), Vec::new(), true) }; From ddf0e67da7582969e7d16d74fd600bc0ad9f31e5 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 13:49:01 +0200 Subject: [PATCH 11/15] wait for the last successful submission to complete before dropping the `Queue` --- wgpu-core/src/device/queue.rs | 44 +++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index c21395542..8b1feefab 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -143,12 +143,56 @@ crate::impl_storage_item!(Queue); impl Drop for Queue { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + + let last_successful_submission_index = self + .device + .last_successful_submission_index + .load(Ordering::Acquire); + + let fence = self.device.fence.read(); + let wait_res = unsafe { + self.device.raw().wait( + fence.as_ref(), + last_successful_submission_index, + crate::device::CLEANUP_WAIT_MS, + ) + }; + drop(fence); + + match wait_res { + Ok(true) => {} + // Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU). + Ok(false) => { + panic!("We timed out while waiting on the last successful submission to complete!"); + } + Err(e) => { + panic!( + "We ran into an error while waiting on the last successful submission to complete! - {e}" + ); + } + } + + let snatch_guard = self.device.snatchable_lock.read(); + let (submission_closures, mapping_closures, queue_empty) = + self.maintain(last_successful_submission_index, &snatch_guard); + drop(snatch_guard); + + assert!(queue_empty); + + let closures = crate::device::UserClosures { + mappings: mapping_closures, + submissions: submission_closures, + device_lost_invocations: SmallVec::new(), + }; + // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point. let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) }; pending_writes.dispose(self.device.raw()); // SAFETY: we never access `self.raw` beyond this point. let queue = unsafe { ManuallyDrop::take(&mut self.raw) }; self.device.release_queue(queue); + + closures.fire(); } } From 394bf37f98a70084bba441b929fce43fb8c5ac11 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 15 Oct 2024 19:11:02 +0200 Subject: [PATCH 12/15] expect to timeout when targeting WebGL --- wgpu-core/src/device/queue.rs | 10 ++++++++++ wgpu-hal/src/gles/device.rs | 4 ++++ 2 files changed, 14 insertions(+) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 8b1feefab..ba3974fa9 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -154,7 +154,10 @@ impl Drop for Queue { self.device.raw().wait( fence.as_ref(), last_successful_submission_index, + #[cfg(not(target_arch = "wasm32"))] crate::device::CLEANUP_WAIT_MS, + #[cfg(target_arch = "wasm32")] + 0, // WebKit and Chromium don't support a non-0 timeout ) }; drop(fence); @@ -163,6 +166,13 @@ impl Drop for Queue { Ok(true) => {} // Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU). Ok(false) => { + // It's fine that we timed out on WebGL; GL objects can be deleted early as they + // will be kept around by the driver if GPU work hasn't finished. + // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other + // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. + // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and + // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. + #[cfg(not(target_arch = "wasm32"))] panic!("We timed out while waiting on the last successful submission to complete!"); } Err(e) => { diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 0421adc04..27aabc51e 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -1563,6 +1563,10 @@ impl crate::Device for super::Device { ) -> Result { if fence.last_completed.load(Ordering::Relaxed) < wait_value { let gl = &self.shared.context.lock(); + // MAX_CLIENT_WAIT_TIMEOUT_WEBGL is: + // - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386 + // - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110 + // - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551 let timeout_ns = if cfg!(any(webgl, Emscripten)) { 0 } else { From d489e4c2e8f2fe494f4871ee90a0dd42086dd4ae Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 7 Nov 2024 12:13:25 +0100 Subject: [PATCH 13/15] remove `hal::Device::exit`, add `Drop` implementations to `Device` and `Queue` instead --- wgpu-core/src/device/queue.rs | 8 ++---- wgpu-core/src/device/resource.rs | 22 +++++---------- wgpu-hal/examples/halmark/main.rs | 3 ++- wgpu-hal/examples/ray-traced-triangle/main.rs | 3 ++- wgpu-hal/src/dx12/device.rs | 4 --- wgpu-hal/src/dx12/mod.rs | 6 +++++ wgpu-hal/src/dynamic/device.rs | 6 ----- wgpu-hal/src/empty.rs | 1 - wgpu-hal/src/gles/device.rs | 8 ------ wgpu-hal/src/gles/mod.rs | 16 +++++++++++ wgpu-hal/src/lib.rs | 4 +-- wgpu-hal/src/metal/device.rs | 2 -- wgpu-hal/src/vulkan/device.rs | 24 ----------------- wgpu-hal/src/vulkan/mod.rs | 27 +++++++++++++++++++ 14 files changed, 62 insertions(+), 72 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index ba3974fa9..efcf50752 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -41,7 +41,7 @@ use thiserror::Error; use super::{life::LifetimeTracker, Device}; pub struct Queue { - raw: ManuallyDrop>, + raw: Box, pub(crate) device: Arc, pub(crate) pending_writes: Mutex>, life_tracker: Mutex, @@ -60,7 +60,6 @@ impl Queue { let pending_encoder = match pending_encoder { Ok(pending_encoder) => pending_encoder, Err(e) => { - device.release_queue(raw); return Err(e); } }; @@ -93,7 +92,7 @@ impl Queue { ); Ok(Queue { - raw: ManuallyDrop::new(raw), + raw, device, pending_writes, life_tracker: Mutex::new(rank::QUEUE_LIFE_TRACKER, LifetimeTracker::new()), @@ -198,9 +197,6 @@ impl Drop for Queue { // SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point. let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) }; pending_writes.dispose(self.device.raw()); - // SAFETY: we never access `self.raw` beyond this point. - let queue = unsafe { ManuallyDrop::take(&mut self.raw) }; - self.device.release_queue(queue); closures.fire(); } diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 2abc9ddaa..618cfb6ea 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -57,10 +57,9 @@ use super::{ /// Structure describing a logical device. Some members are internally mutable, /// stored behind mutexes. pub struct Device { - raw: ManuallyDrop>, + raw: Box, pub(crate) adapter: Arc, pub(crate) queue: OnceLock>, - queue_to_drop: OnceLock>, pub(crate) zero_buffer: ManuallyDrop>, /// The `label` from the descriptor used to create the resource. label: String, @@ -154,23 +153,19 @@ impl Drop for Device { closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); } - // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point. - let raw = unsafe { ManuallyDrop::take(&mut self.raw) }; // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point. let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) }; // SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point. let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) }; - self.command_allocator.dispose(raw.as_ref()); + self.command_allocator.dispose(self.raw.as_ref()); #[cfg(feature = "indirect-validation")] self.indirect_validation .take() .unwrap() - .dispose(raw.as_ref()); + .dispose(self.raw.as_ref()); unsafe { - raw.destroy_buffer(zero_buffer); - raw.destroy_fence(fence); - let queue = self.queue_to_drop.take().unwrap(); - raw.exit(queue); + self.raw.destroy_buffer(zero_buffer); + self.raw.destroy_fence(fence); } } } @@ -249,10 +244,9 @@ impl Device { }; Ok(Self { - raw: ManuallyDrop::new(raw_device), + raw: raw_device, adapter: adapter.clone(), queue: OnceLock::new(), - queue_to_drop: OnceLock::new(), zero_buffer: ManuallyDrop::new(zero_buffer), label: desc.label.to_string(), command_allocator, @@ -325,10 +319,6 @@ impl Device { DeviceError::from_hal(error) } - pub(crate) fn release_queue(&self, queue: Box) { - assert!(self.queue_to_drop.set(queue).is_ok()); - } - /// Run some destroy operations that were deferred. /// /// Destroying the resources requires taking a write lock on the device's snatch lock, diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index 8ab7f1cb4..020d362e8 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -579,7 +579,8 @@ impl Example { self.device.destroy_pipeline_layout(self.pipeline_layout); self.surface.unconfigure(&self.device); - self.device.exit(self.queue); + drop(self.queue); + drop(self.device); drop(self.surface); drop(self.adapter); } diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 4eedfe781..7212988f4 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -1068,7 +1068,8 @@ impl Example { self.device.destroy_shader_module(self.shader_module); self.surface.unconfigure(&self.device); - self.device.exit(self.queue); + drop(self.queue); + drop(self.device); drop(self.surface); drop(self.adapter); } diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 12edf6179..b2b9629ed 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -391,10 +391,6 @@ impl super::Device { impl crate::Device for super::Device { type A = super::Api; - unsafe fn exit(self, _queue: super::Queue) { - self.rtv_pool.lock().free_handle(self.null_rtv_handle); - } - unsafe fn create_buffer( &self, desc: &crate::BufferDescriptor, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index bc9f0db15..fdda89b16 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -602,6 +602,12 @@ pub struct Device { counters: wgt::HalCounters, } +impl Drop for Device { + fn drop(&mut self) { + self.rtv_pool.lock().free_handle(self.null_rtv_handle); + } +} + unsafe impl Send for Device {} unsafe impl Sync for Device {} diff --git a/wgpu-hal/src/dynamic/device.rs b/wgpu-hal/src/dynamic/device.rs index f044a001d..f38d9fd32 100644 --- a/wgpu-hal/src/dynamic/device.rs +++ b/wgpu-hal/src/dynamic/device.rs @@ -16,8 +16,6 @@ use super::{ }; pub trait DynDevice: DynResource { - unsafe fn exit(self: Box, queue: Box); - unsafe fn create_buffer( &self, desc: &BufferDescriptor, @@ -166,10 +164,6 @@ pub trait DynDevice: DynResource { } impl DynDevice for D { - unsafe fn exit(self: Box, queue: Box) { - unsafe { D::exit(*self, queue.unbox()) } - } - unsafe fn create_buffer( &self, desc: &BufferDescriptor, diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 72d9784d6..e74c91c07 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -163,7 +163,6 @@ impl crate::Queue for Context { impl crate::Device for Context { type A = Api; - unsafe fn exit(self, queue: Context) {} unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { Ok(Resource) } diff --git a/wgpu-hal/src/gles/device.rs b/wgpu-hal/src/gles/device.rs index 27aabc51e..365ac7d72 100644 --- a/wgpu-hal/src/gles/device.rs +++ b/wgpu-hal/src/gles/device.rs @@ -501,14 +501,6 @@ impl super::Device { impl crate::Device for super::Device { type A = super::Api; - unsafe fn exit(self, queue: super::Queue) { - let gl = &self.shared.context.lock(); - unsafe { gl.delete_vertex_array(self.main_vao) }; - unsafe { gl.delete_framebuffer(queue.draw_fbo) }; - unsafe { gl.delete_framebuffer(queue.copy_fbo) }; - unsafe { gl.delete_buffer(queue.zero_buffer) }; - } - unsafe fn create_buffer( &self, desc: &crate::BufferDescriptor, diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 55f853715..b6e690a5f 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -295,6 +295,13 @@ pub struct Device { counters: wgt::HalCounters, } +impl Drop for Device { + fn drop(&mut self) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_vertex_array(self.main_vao) }; + } +} + pub struct ShaderClearProgram { pub program: glow::Program, pub color_uniform_location: glow::UniformLocation, @@ -316,6 +323,15 @@ pub struct Queue { current_index_buffer: Mutex>, } +impl Drop for Queue { + fn drop(&mut self) { + let gl = &self.shared.context.lock(); + unsafe { gl.delete_framebuffer(self.draw_fbo) }; + unsafe { gl.delete_framebuffer(self.copy_fbo) }; + unsafe { gl.delete_buffer(self.zero_buffer) }; + } +} + #[derive(Clone, Debug)] pub struct Buffer { raw: Option, diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index 0cddb6997..ad4b458cd 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -676,7 +676,7 @@ pub trait Adapter: WasmNotSendSync { /// 1) Free resources with methods like [`Device::destroy_texture`] or /// [`Device::destroy_shader_module`]. /// -/// 1) Shut down the device by calling [`Device::exit`]. +/// 1) Drop the device. /// /// [`vkDevice`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VkDevice /// [`ID3D12Device`]: https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device @@ -706,8 +706,6 @@ pub trait Adapter: WasmNotSendSync { pub trait Device: WasmNotSendSync { type A: Api; - /// Exit connection to this logical device. - unsafe fn exit(self, queue: ::Queue); /// Creates a new buffer. /// /// The initial usage is `BufferUses::empty()`. diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index 4cc8ef0eb..81887aac7 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -320,8 +320,6 @@ impl super::Device { impl crate::Device for super::Device { type A = super::Api; - unsafe fn exit(self, _queue: super::Queue) {} - unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult { let map_read = desc.usage.contains(crate::BufferUses::MAP_READ); let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE); diff --git a/wgpu-hal/src/vulkan/device.rs b/wgpu-hal/src/vulkan/device.rs index 39afe88ef..fe72ec40f 100644 --- a/wgpu-hal/src/vulkan/device.rs +++ b/wgpu-hal/src/vulkan/device.rs @@ -289,18 +289,6 @@ impl super::DeviceShared { .size((range.end - range.start + mask) & !mask) })) } - - unsafe fn free_resources(&self) { - for &raw in self.render_passes.lock().values() { - unsafe { self.raw.destroy_render_pass(raw, None) }; - } - for &raw in self.framebuffers.lock().values() { - unsafe { self.raw.destroy_framebuffer(raw, None) }; - } - if self.drop_guard.is_none() { - unsafe { self.raw.destroy_device(None) }; - } - } } impl gpu_alloc::MemoryDevice for super::DeviceShared { @@ -1023,18 +1011,6 @@ impl super::Device { impl crate::Device for super::Device { type A = super::Api; - unsafe fn exit(self, queue: super::Queue) { - unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) }; - unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) }; - unsafe { - queue - .relay_semaphores - .into_inner() - .destroy(&self.shared.raw) - }; - unsafe { self.shared.free_resources() }; - } - unsafe fn create_buffer( &self, desc: &crate::BufferDescriptor, diff --git a/wgpu-hal/src/vulkan/mod.rs b/wgpu-hal/src/vulkan/mod.rs index f81dc0893..ff3c865fc 100644 --- a/wgpu-hal/src/vulkan/mod.rs +++ b/wgpu-hal/src/vulkan/mod.rs @@ -646,6 +646,20 @@ struct DeviceShared { memory_allocations_counter: InternalCounter, } +impl Drop for DeviceShared { + fn drop(&mut self) { + for &raw in self.render_passes.lock().values() { + unsafe { self.raw.destroy_render_pass(raw, None) }; + } + for &raw in self.framebuffers.lock().values() { + unsafe { self.raw.destroy_framebuffer(raw, None) }; + } + if self.drop_guard.is_none() { + unsafe { self.raw.destroy_device(None) }; + } + } +} + pub struct Device { shared: Arc, mem_allocator: Mutex>, @@ -658,6 +672,13 @@ pub struct Device { counters: wgt::HalCounters, } +impl Drop for Device { + fn drop(&mut self) { + unsafe { self.mem_allocator.lock().cleanup(&*self.shared) }; + unsafe { self.desc_allocator.lock().cleanup(&*self.shared) }; + } +} + /// Semaphores for forcing queue submissions to run in order. /// /// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are @@ -741,6 +762,12 @@ pub struct Queue { relay_semaphores: Mutex, } +impl Drop for Queue { + fn drop(&mut self) { + unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) }; + } +} + #[derive(Debug)] pub struct Buffer { raw: vk::Buffer, From 5a3de2d3a89490c243562f52e7249e2d8c1dfc29 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 7 Nov 2024 17:24:40 +0100 Subject: [PATCH 14/15] add a retry mechanism for waiting on the last submission in `Queue::drop` --- wgpu-core/src/device/queue.rs | 94 ++++++++++++++++++++++++----------- 1 file changed, 66 insertions(+), 28 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index efcf50752..fe53fb652 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -149,37 +149,75 @@ impl Drop for Queue { .load(Ordering::Acquire); let fence = self.device.fence.read(); - let wait_res = unsafe { - self.device.raw().wait( - fence.as_ref(), - last_successful_submission_index, - #[cfg(not(target_arch = "wasm32"))] - crate::device::CLEANUP_WAIT_MS, - #[cfg(target_arch = "wasm32")] - 0, // WebKit and Chromium don't support a non-0 timeout - ) - }; - drop(fence); - match wait_res { - Ok(true) => {} - // Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU). - Ok(false) => { - // It's fine that we timed out on WebGL; GL objects can be deleted early as they - // will be kept around by the driver if GPU work hasn't finished. - // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other - // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. - // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and - // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. - #[cfg(not(target_arch = "wasm32"))] - panic!("We timed out while waiting on the last successful submission to complete!"); - } - Err(e) => { - panic!( - "We ran into an error while waiting on the last successful submission to complete! - {e}" - ); + // Try waiting on the last submission using the following sequence of timeouts + let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200]; + + for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() { + let is_last_iter = i == timeouts_in_ms.len() - 1; + + api_log!( + "Waiting on last submission. try: {}/{}. timeout: {}ms", + i + 1, + timeouts_in_ms.len(), + timeout_ms + ); + + let wait_res = unsafe { + self.device.raw().wait( + fence.as_ref(), + last_successful_submission_index, + #[cfg(not(target_arch = "wasm32"))] + timeout_ms, + #[cfg(target_arch = "wasm32")] + 0, // WebKit and Chromium don't support a non-0 timeout + ) + }; + // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU). + match wait_res { + Ok(true) => break, + Ok(false) => { + // It's fine that we timed out on WebGL; GL objects can be deleted early as they + // will be kept around by the driver if GPU work hasn't finished. + // Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other + // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. + // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and + // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. + #[cfg(target_arch = "wasm32")] + { + break; + } + #[cfg(not(target_arch = "wasm32"))] + { + if is_last_iter { + panic!( + "We timed out while waiting on the last successful submission to complete!" + ); + } + } + } + Err(e) => match e { + hal::DeviceError::OutOfMemory => { + if is_last_iter { + panic!( + "We ran into an OOM error while waiting on the last successful submission to complete!" + ); + } + } + hal::DeviceError::Lost => { + self.device.handle_hal_error(e); // will lose the device + break; + } + hal::DeviceError::ResourceCreationFailed => unreachable!(), + hal::DeviceError::Unexpected => { + panic!( + "We ran into an unexpected error while waiting on the last successful submission to complete!" + ); + } + }, } } + drop(fence); let snatch_guard = self.device.snatchable_lock.read(); let (submission_closures, mapping_closures, queue_empty) = From 278620be266cbc85c5cb86f7a160f5b348b92e36 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Tue, 12 Nov 2024 20:20:09 +0100 Subject: [PATCH 15/15] destroy subsequent command buffers if we encountered an error at submission This gets the `wgpu_test::ray_tracing::as_build::out_of_order_as_build` test to pass. This seems to be an issue even on trunk, looking at the nr of calls to `create_command_encoder` & `destroy_command_encoder` in hal, they are not equal. So, I'm not sure why the validation layers don't raise the `VUID-vkDestroyDevice-device-05137`. There is still an issue with previous command buffers being leaked but I will fix this in a follow-up. --- wgpu-core/src/device/queue.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index fe53fb652..370880a93 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1209,6 +1209,13 @@ impl Queue { } } + if first_error.is_some() { + if let Ok(cmd_buf_data) = cmd_buf_data { + cmd_buf_data.destroy(&command_buffer.device); + } + continue; + } + let mut baked = match cmd_buf_data { Ok(cmd_buf_data) => { let res = validate_command_buffer( @@ -1232,10 +1239,6 @@ impl Queue { } }; - if first_error.is_some() { - continue; - } - // execute resource transitions if let Err(e) = unsafe { baked.encoder.begin_encoding(hal_label(