mirror of
https://github.com/gfx-rs/wgpu.git
synced 2024-11-25 00:03:29 +00:00
[core] Make poll(Wait)
not hang after bad command submission.
Add `wgpu_core::device::Device::last_successful_submission_index`, which records the fence value that `Maintain::Wait` should actually wait for. See comments for details. Fixes #5969.
This commit is contained in:
parent
2bc328c46f
commit
aeb2067e81
@ -125,3 +125,37 @@ static WAIT_OUT_OF_ORDER: GpuTestConfiguration =
|
||||
.await
|
||||
.panic_on_timeout();
|
||||
});
|
||||
|
||||
/// Submit a command buffer to the wrong device. A wait poll shouldn't hang.
|
||||
///
|
||||
/// We can't catch panics on Wasm, since they get reported directly to the
|
||||
/// console.
|
||||
#[gpu_test]
|
||||
static WAIT_AFTER_BAD_SUBMISSION: GpuTestConfiguration = GpuTestConfiguration::new()
|
||||
.parameters(wgpu_test::TestParameters::default().skip(wgpu_test::FailureCase::webgl2()))
|
||||
.run_async(wait_after_bad_submission);
|
||||
|
||||
async fn wait_after_bad_submission(ctx: TestingContext) {
|
||||
let (device2, queue2) =
|
||||
wgpu_test::initialize_device(&ctx.adapter, ctx.device_features, ctx.device_limits.clone())
|
||||
.await;
|
||||
|
||||
let command_buffer1 = ctx
|
||||
.device
|
||||
.create_command_encoder(&CommandEncoderDescriptor::default())
|
||||
.finish();
|
||||
|
||||
// This should panic, since the command buffer belongs to the wrong
|
||||
// device, and queue submission errors seem to be fatal errors?
|
||||
let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
|
||||
queue2.submit([command_buffer1]);
|
||||
}));
|
||||
assert!(result.is_err());
|
||||
|
||||
// This should not hang.
|
||||
//
|
||||
// Specifically, the failed submission should not cause a new fence value to
|
||||
// be allocated that will not be signalled until further work is
|
||||
// successfully submitted, causing a greater fence value to be signalled.
|
||||
device2.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
@ -1069,7 +1069,7 @@ impl Global {
|
||||
let fence = fence_guard.as_mut().unwrap();
|
||||
let submit_index = device
|
||||
.active_submission_index
|
||||
.fetch_add(1, Ordering::Relaxed)
|
||||
.fetch_add(1, Ordering::SeqCst)
|
||||
+ 1;
|
||||
let mut active_executions = Vec::new();
|
||||
|
||||
@ -1392,6 +1392,11 @@ impl Global {
|
||||
)
|
||||
.map_err(DeviceError::from)?;
|
||||
}
|
||||
|
||||
// Advance the successful submission index.
|
||||
device
|
||||
.last_successful_submission_index
|
||||
.fetch_max(submit_index, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
profiling::scope!("cleanup");
|
||||
|
@ -88,7 +88,27 @@ pub struct Device<A: HalApi> {
|
||||
label: String,
|
||||
|
||||
pub(crate) command_allocator: command::CommandAllocator<A>,
|
||||
|
||||
/// The index of the last command submission that was attempted.
|
||||
///
|
||||
/// Note that `fence` may never be signalled with this value, if the command
|
||||
/// submission failed. If you need to wait for everything running on a
|
||||
/// `Queue` to complete, wait for [`last_successful_submission_index`].
|
||||
///
|
||||
/// [`last_successful_submission_index`]: Device::last_successful_submission_index
|
||||
pub(crate) active_submission_index: hal::AtomicFenceValue,
|
||||
|
||||
/// The index of the last successful submission to this device's
|
||||
/// [`hal::Queue`].
|
||||
///
|
||||
/// Unlike [`active_submission_index`], which is incremented each time
|
||||
/// submission is attempted, this is updated only when submission succeeds,
|
||||
/// so waiting for this value won't hang waiting for work that was never
|
||||
/// submitted.
|
||||
///
|
||||
/// [`active_submission_index`]: Device::active_submission_index
|
||||
pub(crate) last_successful_submission_index: hal::AtomicFenceValue,
|
||||
|
||||
// NOTE: if both are needed, the `snatchable_lock` must be consistently acquired before the
|
||||
// `fence` lock to avoid deadlocks.
|
||||
pub(crate) fence: RwLock<Option<A::Fence>>,
|
||||
@ -257,6 +277,7 @@ impl<A: HalApi> Device<A> {
|
||||
label: desc.label.to_string(),
|
||||
command_allocator,
|
||||
active_submission_index: AtomicU64::new(0),
|
||||
last_successful_submission_index: AtomicU64::new(0),
|
||||
fence: RwLock::new(rank::DEVICE_FENCE, Some(fence)),
|
||||
snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
|
||||
valid: AtomicBool::new(true),
|
||||
@ -387,37 +408,41 @@ impl<A: HalApi> Device<A> {
|
||||
profiling::scope!("Device::maintain");
|
||||
|
||||
let fence = fence_guard.as_ref().unwrap();
|
||||
let last_done_index = if maintain.is_wait() {
|
||||
let index_to_wait_for = match maintain {
|
||||
wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
|
||||
// We don't need to check to see if the queue id matches
|
||||
// as we already checked this from inside the poll call.
|
||||
submission_index.index
|
||||
}
|
||||
_ => self.active_submission_index.load(Ordering::Relaxed),
|
||||
};
|
||||
unsafe {
|
||||
self.raw
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.wait(fence, index_to_wait_for, CLEANUP_WAIT_MS)
|
||||
.map_err(DeviceError::from)?
|
||||
};
|
||||
index_to_wait_for
|
||||
} else {
|
||||
unsafe {
|
||||
|
||||
// Determine which submission index `maintain` represents.
|
||||
let submission_index = match maintain {
|
||||
wgt::Maintain::WaitForSubmissionIndex(submission_index) => {
|
||||
// We don't need to check to see if the queue id matches
|
||||
// as we already checked this from inside the poll call.
|
||||
submission_index.index
|
||||
}
|
||||
wgt::Maintain::Wait => self
|
||||
.last_successful_submission_index
|
||||
.load(Ordering::Acquire),
|
||||
wgt::Maintain::Poll => unsafe {
|
||||
self.raw
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.get_fence_value(fence)
|
||||
.map_err(DeviceError::from)?
|
||||
}
|
||||
},
|
||||
};
|
||||
log::info!("Device::maintain: last done index {last_done_index}");
|
||||
|
||||
// If necessary, wait for that submission to complete.
|
||||
if maintain.is_wait() {
|
||||
unsafe {
|
||||
self.raw
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.wait(fence, submission_index, CLEANUP_WAIT_MS)
|
||||
.map_err(DeviceError::from)?
|
||||
};
|
||||
}
|
||||
log::info!("Device::maintain: waiting for submission index {submission_index}");
|
||||
|
||||
let mut life_tracker = self.lock_life();
|
||||
let submission_closures =
|
||||
life_tracker.triage_submissions(last_done_index, &self.command_allocator);
|
||||
life_tracker.triage_submissions(submission_index, &self.command_allocator);
|
||||
|
||||
life_tracker.triage_mapped();
|
||||
|
||||
@ -3586,7 +3611,9 @@ impl<A: HalApi> Device<A> {
|
||||
/// Wait for idle and remove resources that we can, before we die.
|
||||
pub(crate) fn prepare_to_die(&self) {
|
||||
self.pending_writes.lock().as_mut().unwrap().deactivate();
|
||||
let current_index = self.active_submission_index.load(Ordering::Relaxed);
|
||||
let current_index = self
|
||||
.last_successful_submission_index
|
||||
.load(Ordering::Acquire);
|
||||
if let Err(error) = unsafe {
|
||||
let fence = self.fence.read();
|
||||
let fence = fence.as_ref().unwrap();
|
||||
|
Loading…
Reference in New Issue
Block a user