add a retry mechanism for waiting on the last submission in Queue::drop

2024-11-21 22:33:49 +00:00 · 2024-11-07 17:24:40 +01:00 · 2024-11-07 17:24:40 +01:00 · 5a3de2d3a8
commit 5a3de2d3a8
parent d489e4c2e8
1 changed files with 66 additions and 28 deletions
--- a/wgpu-core/src/device/queue.rs
+++ b/wgpu-core/src/device/queue.rs
@ -149,21 +149,33 @@ impl Drop for Queue {
            .load(Ordering::Acquire);
        let fence = self.device.fence.read();
        // Try waiting on the last submission using the following sequence of timeouts
        let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
        for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
            let is_last_iter = i == timeouts_in_ms.len() - 1;
            api_log!(
                "Waiting on last submission. try: {}/{}. timeout: {}ms",
                i + 1,
                timeouts_in_ms.len(),
                timeout_ms
            );
            let wait_res = unsafe {
                self.device.raw().wait(
                    fence.as_ref(),
                    last_successful_submission_index,
                    #[cfg(not(target_arch = "wasm32"))]
-                crate::device::CLEANUP_WAIT_MS,
+                    timeout_ms,
                    #[cfg(target_arch = "wasm32")]
                    0, // WebKit and Chromium don't support a non-0 timeout
                )
            };
-        drop(fence);
+            // Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
            match wait_res {
-            Ok(true) => {}
+                Ok(true) => break,
            // Note: If we don't panic here we are in UB land (destroying resources while they are still in use by the GPU).
                Ok(false) => {
                    // It's fine that we timed out on WebGL; GL objects can be deleted early as they
                    // will be kept around by the driver if GPU work hasn't finished.
@ -171,15 +183,41 @@ impl Drop for Queue {
                    // backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
                    // Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
                    // we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
-                #[cfg(not(target_arch = "wasm32"))]
+                    #[cfg(target_arch = "wasm32")]
-                panic!("We timed out while waiting on the last successful submission to complete!");
+                    {
                        break;
                    }
-            Err(e) => {
+                    #[cfg(not(target_arch = "wasm32"))]
                    {
                        if is_last_iter {
                            panic!(
-                    "We ran into an error while waiting on the last successful submission to complete! - {e}"
+                                "We timed out while waiting on the last successful submission to complete!"
                            );
                        }
                    }
                }
                Err(e) => match e {
                    hal::DeviceError::OutOfMemory => {
                        if is_last_iter {
                            panic!(
                                "We ran into an OOM error while waiting on the last successful submission to complete!"
                            );
                        }
                    }
                    hal::DeviceError::Lost => {
                        self.device.handle_hal_error(e); // will lose the device
                        break;
                    }
                    hal::DeviceError::ResourceCreationFailed => unreachable!(),
                    hal::DeviceError::Unexpected => {
                        panic!(
                            "We ran into an unexpected error while waiting on the last successful submission to complete!"
                        );
                    }
                },
            }
        }
        drop(fence);
        let snatch_guard = self.device.snatchable_lock.read();
        let (submission_closures, mapping_closures, queue_empty) =