Call device lost callback when it is replaced, or when the global is dropped. (#5168)

This fixes two cases where a DeviceLostClosureC might not be consumed
before it is dropped, which is a requirement:

1) When the closure is replaced, this ensures the to-be-dropped closure
is invoked.
2)  When the global is dropped, this ensures that the closure is invoked
before it is dropped.

The first of these two cases is tested in a new test,
DEVICE_LOST_REPLACED_CALLBACK. The second case has a stub,
always-skipped test, DROPPED_GLOBAL_THEN_DEVICE_LOST. The test is
always-skipped because there does not appear to be a way to drop the
global from within a test. Nor is there any other way to reach
Device.prepare_to_die without having first dropping the device.
This commit is contained in:
Brad Werth 2024-01-30 23:15:45 -08:00 committed by Connor Fitzgerald
parent 446a3dcdbf
commit e029858a18
No known key found for this signature in database
GPG Key ID: CF0A1F83B4E1A995
5 changed files with 96 additions and 2 deletions

View File

@ -65,6 +65,9 @@ Bottom level categories:
### Bug Fixes
#### General
- Device lost callbacks are invoked when replaced and when global is dropped. By @bradwerth in [#5168](https://github.com/gfx-rs/wgpu/pull/5168)
#### DX12
- Fix `panic!` when dropping `Instance` without `InstanceFlags::VALIDATION`. By @hakolao in [#5134](https://github.com/gfx-rs/wgpu/pull/5134)

View File

@ -549,3 +549,69 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
"Device lost callback should have been called."
);
});
#[gpu_test]
static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default())
.run_sync(|ctx| {
// This test checks that a device_lost_callback is called when it is
// replaced by another callback.
let was_called = std::sync::Arc::<std::sync::atomic::AtomicBool>::new(false.into());
// Set a LoseDeviceCallback on the device.
let was_called_clone = was_called.clone();
let callback = Box::new(move |reason, _m| {
was_called_clone.store(true, std::sync::atomic::Ordering::SeqCst);
assert!(
matches!(reason, wgt::DeviceLostReason::ReplacedCallback),
"Device lost info reason should match DeviceLostReason::ReplacedCallback."
);
});
ctx.device.set_device_lost_callback(callback);
// Replace the callback.
let replacement_callback = Box::new(move |_r, _m| {});
ctx.device.set_device_lost_callback(replacement_callback);
assert!(
was_called.load(std::sync::atomic::Ordering::SeqCst),
"Device lost callback should have been called."
);
});
#[gpu_test]
static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().skip(FailureCase::always()))
.run_sync(|ctx| {
// What we want to do is to drop the Global, forcing a code path that
// eventually calls Device.prepare_to_die, without having first dropped
// the device. This models what might happen in a user agent that kills
// wgpu without providing a more orderly shutdown. In such a case, the
// device lost callback should be invoked with the message "Device is
// dying."
let was_called = std::sync::Arc::<std::sync::atomic::AtomicBool>::new(false.into());
// Set a LoseDeviceCallback on the device.
let was_called_clone = was_called.clone();
let callback = Box::new(move |reason, message| {
was_called_clone.store(true, std::sync::atomic::Ordering::SeqCst);
assert!(
matches!(reason, wgt::DeviceLostReason::Dropped),
"Device lost info reason should match DeviceLostReason::Dropped."
);
assert!(
message == "Device is dying.",
"Device lost info message is \"{}\" and it should be \"Device is dying.\".",
message
);
});
ctx.device.set_device_lost_callback(callback);
// TODO: Drop the Global, somehow.
// Confirm that the callback was invoked.
assert!(
was_called.load(std::sync::atomic::Ordering::SeqCst),
"Device lost callback should have been called."
);
});

View File

@ -2247,8 +2247,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}
// This closure will be called exactly once during "lose the device"
// or when the device is dropped, if it was never lost.
// This closure will be called exactly once during "lose the device",
// or when it is replaced.
pub fn device_set_device_lost_closure<A: HalApi>(
&self,
device_id: DeviceId,
@ -2258,6 +2258,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
if let Ok(device) = hub.devices.get(device_id) {
let mut life_tracker = device.lock_life();
if let Some(existing_closure) = life_tracker.device_lost_closure.take() {
// It's important to not hold the lock while calling the closure.
drop(life_tracker);
existing_closure.call(DeviceLostReason::ReplacedCallback, "".to_string());
life_tracker = device.lock_life();
}
life_tracker.device_lost_closure = Some(device_lost_closure);
}
}

View File

@ -3380,6 +3380,11 @@ impl<A: HalApi> Device<A> {
current_index,
self.command_allocator.lock().as_mut().unwrap(),
);
if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() {
// It's important to not hold the lock while calling the closure.
drop(life_tracker);
device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string());
}
#[cfg(feature = "trace")]
{
*self.trace.lock() = None;

View File

@ -7099,4 +7099,18 @@ pub enum DeviceLostReason {
Unknown = 0,
/// After Device::destroy
Destroyed = 1,
/// After Device::drop
///
/// WebGPU does not invoke the device lost callback when the device is
/// dropped to prevent garbage collection from being observable. In wgpu,
/// we invoke the callback on drop to help with managing memory owned by
/// the callback.
Dropped = 2,
/// After replacing the device_lost_callback
///
/// WebGPU does not have a concept of a device lost callback, but wgpu
/// does. wgpu guarantees that any supplied callback will be invoked
/// exactly once before it is dropped, which helps with managing the
/// memory owned by the callback.
ReplacedCallback = 3,
}