From 97acfd26ce322148073b76749ad4f53c9cfae6ec Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Mon, 14 Oct 2024 19:12:12 +0200 Subject: [PATCH] rely on our ownership model to keep the device alive while there are still active submissions `Global::device_drop` was wrongly assuming `device_poll` with `Maintain::Wait` was called but this is not a documented invariant and only `wgpu` was upholding this. --- tests/tests/device.rs | 34 ++------------------------- wgpu-core/src/device/global.rs | 15 +----------- wgpu-core/src/device/queue.rs | 9 ------- wgpu-core/src/device/resource.rs | 40 +++++++------------------------- wgpu-core/src/global.rs | 4 ---- wgpu/src/backend/wgpu_core.rs | 3 --- 6 files changed, 12 insertions(+), 93 deletions(-) diff --git a/tests/tests/device.rs b/tests/tests/device.rs index 2774bfd52..87935c831 100644 --- a/tests/tests/device.rs +++ b/tests/tests/device.rs @@ -630,7 +630,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default().expect_fail(FailureCase::webgl2())) .run_sync(|ctx| { // This test checks that when the device is dropped (such as in a GC), - // the provided DeviceLostClosure is called with reason DeviceLostReason::Unknown. + // the provided DeviceLostClosure is called with reason DeviceLostReason::Dropped. // Fails on webgl because webgl doesn't implement drop. static WAS_CALLED: std::sync::atomic::AtomicBool = AtomicBool::new(false); @@ -642,8 +642,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new() }); ctx.device.set_device_lost_callback(callback); - // Drop the device. - drop(ctx.device); + drop(ctx); assert!( WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst), @@ -676,35 +675,6 @@ static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguratio ); }); -#[gpu_test] -static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new() - .parameters(TestParameters::default().skip(FailureCase::always())) - .run_sync(|ctx| { - // What we want to do is to drop the Global, forcing a code path that - // eventually calls Device.prepare_to_die, without having first dropped - // the device. This models what might happen in a user agent that kills - // wgpu without providing a more orderly shutdown. In such a case, the - // device lost callback should be invoked with the message "Device is - // dying." - static WAS_CALLED: AtomicBool = AtomicBool::new(false); - - // Set a LoseDeviceCallback on the device. - let callback = Box::new(|reason, message| { - WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst); - assert_eq!(reason, wgt::DeviceLostReason::Dropped); - assert_eq!(message, "Device is dying."); - }); - ctx.device.set_device_lost_callback(callback); - - // TODO: Drop the Global, somehow. - - // Confirm that the callback was invoked. - assert!( - WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst), - "Device lost callback should have been called." - ); - }); - #[gpu_test] static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConfiguration::new() .parameters(TestParameters::default()) diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index b6ad2354c..c53db14be 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -2078,20 +2078,7 @@ impl Global { profiling::scope!("Device::drop"); api_log!("Device::drop {device_id:?}"); - let device = self.hub.devices.remove(device_id); - let device_lost_closure = device.lock_life().device_lost_closure.take(); - if let Some(closure) = device_lost_closure { - closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); - } - - // The things `Device::prepare_to_die` takes care are mostly - // unnecessary here. We know our queue is empty, so we don't - // need to wait for submissions or triage them. We know we were - // just polled, so `life_tracker.free_resources` is empty. - debug_assert!(device.lock_life().queue_empty()); - device.pending_writes.lock().deactivate(); - - drop(device); + self.hub.devices.remove(device_id); } // This closure will be called exactly once during "lose the device", diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index ade609f20..c4ed7b9c0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -345,15 +345,6 @@ impl PendingWrites { } self.command_encoder.as_mut() } - - pub fn deactivate(&mut self) { - if self.is_recording { - unsafe { - self.command_encoder.discard_encoding(); - } - self.is_recording = false; - } - } } #[derive(Clone, Debug, Error)] diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 754d1d91f..f5ae66b44 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -143,13 +143,13 @@ pub struct Device { pub(crate) instance_flags: wgt::InstanceFlags, pub(crate) pending_writes: Mutex>, pub(crate) deferred_destroy: Mutex>, - #[cfg(feature = "trace")] - pub(crate) trace: Mutex>, pub(crate) usage_scopes: UsageScopePool, pub(crate) last_acceleration_structure_build_command_index: AtomicU64, - #[cfg(feature = "indirect-validation")] pub(crate) indirect_validation: Option, + // needs to be dropped last + #[cfg(feature = "trace")] + pub(crate) trace: Mutex>, } pub(crate) enum DeferredDestroy { @@ -171,6 +171,12 @@ impl std::fmt::Debug for Device { impl Drop for Device { fn drop(&mut self) { resource_log!("Drop {}", self.error_ident()); + + let device_lost_closure = self.lock_life().device_lost_closure.take(); + if let Some(closure) = device_lost_closure { + closure.call(DeviceLostReason::Dropped, String::from("Device dropped.")); + } + // SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point. let raw = unsafe { ManuallyDrop::take(&mut self.raw) }; // SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point. @@ -3730,34 +3736,6 @@ impl Device { } } -impl Device { - /// Wait for idle and remove resources that we can, before we die. - pub(crate) fn prepare_to_die(&self) { - self.pending_writes.lock().deactivate(); - let current_index = self - .last_successful_submission_index - .load(Ordering::Acquire); - if let Err(error) = unsafe { - let fence = self.fence.read(); - self.raw() - .wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS) - } { - log::error!("failed to wait for the device: {error}"); - } - let mut life_tracker = self.lock_life(); - let _ = life_tracker.triage_submissions(current_index, &self.command_allocator); - if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() { - // It's important to not hold the lock while calling the closure. - drop(life_tracker); - device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string()); - } - #[cfg(feature = "trace")] - { - *self.trace.lock() = None; - } - } -} - crate::impl_resource_type!(Device); crate::impl_labeled!(Device); crate::impl_storage_item!(Device); diff --git a/wgpu-core/src/global.rs b/wgpu-core/src/global.rs index 25a864ec2..cd4508d14 100644 --- a/wgpu-core/src/global.rs +++ b/wgpu-core/src/global.rs @@ -89,10 +89,6 @@ impl Drop for Global { fn drop(&mut self) { profiling::scope!("Global::drop"); resource_log!("Global::drop"); - - for (_, device) in self.hub.devices.read().iter() { - device.prepare_to_die(); - } } } diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index e2b75d1e1..a3c867e5a 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1344,9 +1344,6 @@ impl crate::Context for ContextWgpuCore { fn device_drop(&self, device_data: &Self::DeviceData) { #[cfg(any(native, Emscripten))] { - // Call device_poll, but don't check for errors. We have to use its - // return value, but we just drop it. - let _ = self.0.device_poll(device_data.id, wgt::Maintain::wait()); self.0.device_drop(device_data.id); } }