rely on our ownership model to keep the device alive while there are still active submissions

`Global::device_drop` was wrongly assuming `device_poll` with `Maintain::Wait` was called but this is not a documented invariant and only `wgpu` was upholding this.
This commit is contained in:
teoxoy 2024-10-14 19:12:12 +02:00 committed by Teodor Tanasoaia
parent 4fb32170bf
commit 97acfd26ce
6 changed files with 12 additions and 93 deletions

View File

@ -630,7 +630,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().expect_fail(FailureCase::webgl2()))
.run_sync(|ctx| {
// This test checks that when the device is dropped (such as in a GC),
// the provided DeviceLostClosure is called with reason DeviceLostReason::Unknown.
// the provided DeviceLostClosure is called with reason DeviceLostReason::Dropped.
// Fails on webgl because webgl doesn't implement drop.
static WAS_CALLED: std::sync::atomic::AtomicBool = AtomicBool::new(false);
@ -642,8 +642,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
});
ctx.device.set_device_lost_callback(callback);
// Drop the device.
drop(ctx.device);
drop(ctx);
assert!(
WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
@ -676,35 +675,6 @@ static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguratio
);
});
#[gpu_test]
static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().skip(FailureCase::always()))
.run_sync(|ctx| {
// What we want to do is to drop the Global, forcing a code path that
// eventually calls Device.prepare_to_die, without having first dropped
// the device. This models what might happen in a user agent that kills
// wgpu without providing a more orderly shutdown. In such a case, the
// device lost callback should be invoked with the message "Device is
// dying."
static WAS_CALLED: AtomicBool = AtomicBool::new(false);
// Set a LoseDeviceCallback on the device.
let callback = Box::new(|reason, message| {
WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst);
assert_eq!(reason, wgt::DeviceLostReason::Dropped);
assert_eq!(message, "Device is dying.");
});
ctx.device.set_device_lost_callback(callback);
// TODO: Drop the Global, somehow.
// Confirm that the callback was invoked.
assert!(
WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
"Device lost callback should have been called."
);
});
#[gpu_test]
static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default())

View File

@ -2078,20 +2078,7 @@ impl Global {
profiling::scope!("Device::drop");
api_log!("Device::drop {device_id:?}");
let device = self.hub.devices.remove(device_id);
let device_lost_closure = device.lock_life().device_lost_closure.take();
if let Some(closure) = device_lost_closure {
closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
}
// The things `Device::prepare_to_die` takes care are mostly
// unnecessary here. We know our queue is empty, so we don't
// need to wait for submissions or triage them. We know we were
// just polled, so `life_tracker.free_resources` is empty.
debug_assert!(device.lock_life().queue_empty());
device.pending_writes.lock().deactivate();
drop(device);
self.hub.devices.remove(device_id);
}
// This closure will be called exactly once during "lose the device",

View File

@ -345,15 +345,6 @@ impl PendingWrites {
}
self.command_encoder.as_mut()
}
pub fn deactivate(&mut self) {
if self.is_recording {
unsafe {
self.command_encoder.discard_encoding();
}
self.is_recording = false;
}
}
}
#[derive(Clone, Debug, Error)]

View File

@ -143,13 +143,13 @@ pub struct Device {
pub(crate) instance_flags: wgt::InstanceFlags,
pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
#[cfg(feature = "trace")]
pub(crate) trace: Mutex<Option<trace::Trace>>,
pub(crate) usage_scopes: UsageScopePool,
pub(crate) last_acceleration_structure_build_command_index: AtomicU64,
#[cfg(feature = "indirect-validation")]
pub(crate) indirect_validation: Option<crate::indirect_validation::IndirectValidation>,
// needs to be dropped last
#[cfg(feature = "trace")]
pub(crate) trace: Mutex<Option<trace::Trace>>,
}
pub(crate) enum DeferredDestroy {
@ -171,6 +171,12 @@ impl std::fmt::Debug for Device {
impl Drop for Device {
fn drop(&mut self) {
resource_log!("Drop {}", self.error_ident());
let device_lost_closure = self.lock_life().device_lost_closure.take();
if let Some(closure) = device_lost_closure {
closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
}
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
// SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
@ -3730,34 +3736,6 @@ impl Device {
}
}
impl Device {
/// Wait for idle and remove resources that we can, before we die.
pub(crate) fn prepare_to_die(&self) {
self.pending_writes.lock().deactivate();
let current_index = self
.last_successful_submission_index
.load(Ordering::Acquire);
if let Err(error) = unsafe {
let fence = self.fence.read();
self.raw()
.wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
} {
log::error!("failed to wait for the device: {error}");
}
let mut life_tracker = self.lock_life();
let _ = life_tracker.triage_submissions(current_index, &self.command_allocator);
if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() {
// It's important to not hold the lock while calling the closure.
drop(life_tracker);
device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string());
}
#[cfg(feature = "trace")]
{
*self.trace.lock() = None;
}
}
}
crate::impl_resource_type!(Device);
crate::impl_labeled!(Device);
crate::impl_storage_item!(Device);

View File

@ -89,10 +89,6 @@ impl Drop for Global {
fn drop(&mut self) {
profiling::scope!("Global::drop");
resource_log!("Global::drop");
for (_, device) in self.hub.devices.read().iter() {
device.prepare_to_die();
}
}
}

View File

@ -1344,9 +1344,6 @@ impl crate::Context for ContextWgpuCore {
fn device_drop(&self, device_data: &Self::DeviceData) {
#[cfg(any(native, Emscripten))]
{
// Call device_poll, but don't check for errors. We have to use its
// return value, but we just drop it.
let _ = self.0.device_poll(device_data.id, wgt::Maintain::wait());
self.0.device_drop(device_data.id);
}
}