add a retry mechanism for waiting on the last submission in Queue::drop

This commit is contained in:
teoxoy 2024-11-07 17:24:40 +01:00 committed by Teodor Tanasoaia
parent d489e4c2e8
commit 5a3de2d3a8

View File

@ -149,21 +149,33 @@ impl Drop for Queue {
.load(Ordering::Acquire); .load(Ordering::Acquire);
let fence = self.device.fence.read(); let fence = self.device.fence.read();
// Try waiting on the last submission using the following sequence of timeouts
let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
let is_last_iter = i == timeouts_in_ms.len() - 1;
api_log!(
"Waiting on last submission. try: {}/{}. timeout: {}ms",
i + 1,
timeouts_in_ms.len(),
timeout_ms
);
let wait_res = unsafe { let wait_res = unsafe {
self.device.raw().wait( self.device.raw().wait(
fence.as_ref(), fence.as_ref(),
last_successful_submission_index, last_successful_submission_index,
#[cfg(not(target_arch = "wasm32"))] #[cfg(not(target_arch = "wasm32"))]
crate::device::CLEANUP_WAIT_MS, timeout_ms,
#[cfg(target_arch = "wasm32")] #[cfg(target_arch = "wasm32")]
0, // WebKit and Chromium don't support a non-0 timeout 0, // WebKit and Chromium don't support a non-0 timeout
) )
}; };
drop(fence); // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
match wait_res { match wait_res {
Ok(true) => {} Ok(true) => break,
// Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU).
Ok(false) => { Ok(false) => {
// It's fine that we timed out on WebGL; GL objects can be deleted early as they // It's fine that we timed out on WebGL; GL objects can be deleted early as they
// will be kept around by the driver if GPU work hasn't finished. // will be kept around by the driver if GPU work hasn't finished.
@ -171,15 +183,41 @@ impl Drop for Queue {
// backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands. // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
// Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
// we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538. // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
#[cfg(not(target_arch = "wasm32"))] #[cfg(target_arch = "wasm32")]
panic!("We timed out while waiting on the last successful submission to complete!"); {
break;
} }
Err(e) => { #[cfg(not(target_arch = "wasm32"))]
{
if is_last_iter {
panic!( panic!(
"We ran into an error while waiting on the last successful submission to complete! - {e}" "We timed out while waiting on the last successful submission to complete!"
); );
} }
} }
}
Err(e) => match e {
hal::DeviceError::OutOfMemory => {
if is_last_iter {
panic!(
"We ran into an OOM error while waiting on the last successful submission to complete!"
);
}
}
hal::DeviceError::Lost => {
self.device.handle_hal_error(e); // will lose the device
break;
}
hal::DeviceError::ResourceCreationFailed => unreachable!(),
hal::DeviceError::Unexpected => {
panic!(
"We ran into an unexpected error while waiting on the last successful submission to complete!"
);
}
},
}
}
drop(fence);
let snatch_guard = self.device.snatchable_lock.read(); let snatch_guard = self.device.snatchable_lock.read();
let (submission_closures, mapping_closures, queue_empty) = let (submission_closures, mapping_closures, queue_empty) =