From 0ff7bb4dec4d701060c955bcd86a7bc9714a08ad Mon Sep 17 00:00:00 2001 From: Gabriel Majeri Date: Sat, 18 Jul 2020 10:58:46 +0300 Subject: [PATCH 1/2] Error type for `WaitIdle` --- wgpu-core/src/device/life.rs | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index a8e29da97..5c2f8c84c 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -14,8 +14,9 @@ use crate::{ use copyless::VecHelper as _; use gfx_descriptor::{DescriptorAllocator, DescriptorSet}; use gfx_memory::{Heaps, MemoryBlock}; -use hal::device::Device as _; +use hal::device::{Device as _, OomOrDeviceLost}; use parking_lot::Mutex; +use thiserror::Error; use std::sync::atomic::Ordering; @@ -186,6 +187,14 @@ struct ActiveSubmission { mapped: Vec, } +#[derive(Clone, Debug, Error)] +pub enum WaitIdleError { + #[error(transparent)] + OomOrDeviceLost(#[from] OomOrDeviceLost), + #[error("GPU got stuck :(")] + StuckGpu, +} + /// A struct responsible for tracking resource lifetimes. /// /// Here is how host mapping is handled: @@ -259,7 +268,7 @@ impl LifetimeTracker { }); } - fn wait_idle(&self, device: &B::Device) { + fn wait_idle(&self, device: &B::Device) -> Result<(), WaitIdleError> { if !self.active.is_empty() { log::debug!("Waiting for IDLE..."); let status = unsafe { @@ -267,17 +276,22 @@ impl LifetimeTracker { self.active.iter().map(|a| &a.fence), hal::device::WaitFor::All, CLEANUP_WAIT_MS * 1_000_000, - ) + )? }; log::debug!("...Done"); - assert_eq!(status, Ok(true), "GPU got stuck :("); + + if status == false { + // We timed out while waiting for the fences + return Err(WaitIdleError::StuckGpu); + } } + Ok(()) } /// Returns the last submission index that is done. pub fn triage_submissions(&mut self, device: &B::Device, force_wait: bool) -> SubmissionIndex { if force_wait { - self.wait_idle(device); + self.wait_idle(device).unwrap(); } //TODO: enable when `is_sorted_by_key` is stable //debug_assert!(self.active.is_sorted_by_key(|a| a.index)); From fc460f4eab1422279010e6170b147ed335416a86 Mon Sep 17 00:00:00 2001 From: Gabriel Majeri Date: Sat, 18 Jul 2020 20:39:11 +0300 Subject: [PATCH 2/2] Propagate `WaitIdleError` up through the API --- player/src/bin/play.rs | 2 +- player/src/lib.rs | 4 ++-- player/tests/test.rs | 2 +- wgpu-core/src/device/life.rs | 12 ++++++---- wgpu-core/src/device/mod.rs | 43 +++++++++++++++++++++++------------ wgpu-core/src/device/queue.rs | 14 ++++++++++-- 6 files changed, 52 insertions(+), 25 deletions(-) diff --git a/player/src/bin/play.rs b/player/src/bin/play.rs index 03cf63d2d..8c1f18a79 100644 --- a/player/src/bin/play.rs +++ b/player/src/bin/play.rs @@ -101,7 +101,7 @@ fn main() { #[cfg(feature = "renderdoc")] rd.end_frame_capture(std::ptr::null(), std::ptr::null()); - gfx_select!(device => global.device_poll(device, true)); + gfx_select!(device => global.device_poll(device, true)).unwrap(); } #[cfg(feature = "winit")] { diff --git a/player/src/lib.rs b/player/src/lib.rs index 47799ef17..84194f874 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -386,7 +386,7 @@ impl GlobalPlay for wgc::hub::Global { if queued { self.queue_write_buffer::(device, id, range.start, &bin); } else { - self.device_wait_for_buffer::(device, id); + self.device_wait_for_buffer::(device, id).unwrap(); self.device_set_buffer_sub_data::(device, id, range.start, &bin[..size]); } } @@ -406,7 +406,7 @@ impl GlobalPlay for wgc::hub::Global { comb_manager.alloc(device.backend()), ); let comb = self.encode_commands::(encoder, commands); - self.queue_submit::(device, &[comb]); + self.queue_submit::(device, &[comb]).unwrap(); } } } diff --git a/player/tests/test.rs b/player/tests/test.rs index 6c3f2bab1..a223b41b7 100644 --- a/player/tests/test.rs +++ b/player/tests/test.rs @@ -101,7 +101,7 @@ impl Test { } println!("\t\t\tWaiting..."); - gfx_select!(device => global.device_poll(device, true)); + gfx_select!(device => global.device_poll(device, true)).unwrap(); for expect in self.expectations { println!("\t\t\tChecking {}", expect.name); diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 5c2f8c84c..99dcb182a 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -289,9 +289,13 @@ impl LifetimeTracker { } /// Returns the last submission index that is done. - pub fn triage_submissions(&mut self, device: &B::Device, force_wait: bool) -> SubmissionIndex { + pub fn triage_submissions( + &mut self, + device: &B::Device, + force_wait: bool, + ) -> Result { if force_wait { - self.wait_idle(device).unwrap(); + self.wait_idle(device)?; } //TODO: enable when `is_sorted_by_key` is stable //debug_assert!(self.active.is_sorted_by_key(|a| a.index)); @@ -303,7 +307,7 @@ impl LifetimeTracker { let last_done = if done_count != 0 { self.active[done_count - 1].index } else { - return 0; + return Ok(0); }; for a in self.active.drain(..done_count) { @@ -315,7 +319,7 @@ impl LifetimeTracker { } } - last_done + Ok(last_done) } pub fn cleanup( diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 6498f7b92..0cc566da9 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -5,6 +5,7 @@ use crate::{ binding_model::{self, CreateBindGroupError, PipelineLayoutError}, command, conv, + device::life::WaitIdleError, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Hub, Input, Token}, id, pipeline, resource, span, swap_chain, track::{BufferState, TextureState, TrackerSet}, @@ -295,7 +296,7 @@ impl Device { hub: &Hub, force_wait: bool, token: &mut Token<'token, Self>, - ) -> Vec { + ) -> Result, WaitIdleError> { let mut life_tracker = self.lock_life(token); life_tracker.triage_suspected( @@ -307,7 +308,7 @@ impl Device { ); life_tracker.triage_mapped(hub, token); life_tracker.triage_framebuffers(hub, &mut *self.framebuffers.lock(), token); - let last_done = life_tracker.triage_submissions(&self.raw, force_wait); + let last_done = life_tracker.triage_submissions(&self.raw, force_wait)?; let callbacks = life_tracker.handle_mapping(hub, &self.raw, &self.trackers, token); life_tracker.cleanup(&self.raw, &self.mem_allocator, &self.desc_allocator); @@ -315,7 +316,7 @@ impl Device { .submission_index .store(last_done, Ordering::Release); self.com_allocator.maintain(&self.raw, last_done); - callbacks + Ok(callbacks) } fn untrack<'this, 'token: 'this, G: GlobalIdentityHandlerFactory>( @@ -609,7 +610,9 @@ impl Device { /// Wait for idle and remove resources that we can, before we die. pub(crate) fn prepare_to_die(&mut self) { let mut life_tracker = self.life_tracker.lock(); - life_tracker.triage_submissions(&self.raw, true); + if let Err(error) = life_tracker.triage_submissions(&self.raw, true) { + log::error!("failed to triage submissions: {}", error); + } life_tracker.cleanup(&self.raw, &self.mem_allocator, &self.desc_allocator); } @@ -757,7 +760,7 @@ impl Global { &self, device_id: id::DeviceId, buffer_id: id::BufferId, - ) { + ) -> Result<(), WaitIdleError> { let hub = B::hub(self); let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); @@ -778,8 +781,10 @@ impl Global { ); device .lock_life(&mut token) - .triage_submissions(&device.raw, true); + .triage_submissions(&device.raw, true)?; } + + Ok(()) } pub fn device_set_buffer_sub_data( @@ -2774,54 +2779,62 @@ impl Global { ); } - pub fn device_poll(&self, device_id: id::DeviceId, force_wait: bool) { + pub fn device_poll( + &self, + device_id: id::DeviceId, + force_wait: bool, + ) -> Result<(), WaitIdleError> { span!(_guard, INFO, "Device::poll"); let hub = B::hub(self); let mut token = Token::root(); let callbacks = { let (device_guard, mut token) = hub.devices.read(&mut token); - device_guard[device_id].maintain(&hub, force_wait, &mut token) + device_guard[device_id].maintain(&hub, force_wait, &mut token)? }; fire_map_callbacks(callbacks); + Ok(()) } fn poll_devices( &self, force_wait: bool, callbacks: &mut Vec, - ) { + ) -> Result<(), WaitIdleError> { span!(_guard, INFO, "Device::poll_devices"); let hub = B::hub(self); let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); for (_, device) in device_guard.iter(B::VARIANT) { - let cbs = device.maintain(&hub, force_wait, &mut token); + let cbs = device.maintain(&hub, force_wait, &mut token)?; callbacks.extend(cbs); } + Ok(()) } - pub fn poll_all_devices(&self, force_wait: bool) { + pub fn poll_all_devices(&self, force_wait: bool) -> Result<(), WaitIdleError> { use crate::backend; let mut callbacks = Vec::new(); backends! { #[vulkan] { - self.poll_devices::(force_wait, &mut callbacks); + self.poll_devices::(force_wait, &mut callbacks)?; } #[metal] { - self.poll_devices::(force_wait, &mut callbacks); + self.poll_devices::(force_wait, &mut callbacks)?; } #[dx12] { - self.poll_devices::(force_wait, &mut callbacks); + self.poll_devices::(force_wait, &mut callbacks)?; } #[dx11] { - self.poll_devices::(force_wait, &mut callbacks); + self.poll_devices::(force_wait, &mut callbacks)?; } } fire_map_callbacks(callbacks); + + Ok(()) } pub fn device_destroy(&self, device_id: id::DeviceId) { diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 56e5934f2..effcb9ebd 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -7,6 +7,7 @@ use crate::device::trace::Action; use crate::{ command::{CommandAllocator, CommandBuffer, TextureCopyView, BITS_PER_BYTE}, conv, + device::WaitIdleError, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Token}, id, resource::{BufferMapState, BufferUse, TextureUse}, @@ -17,6 +18,7 @@ use gfx_memory::{Block, Heaps, MemoryBlock}; use hal::{command::CommandBuffer as _, device::Device as _, queue::CommandQueue as _}; use smallvec::SmallVec; use std::iter; +use thiserror::Error; struct StagingData { buffer: B::Buffer, @@ -371,7 +373,7 @@ impl Global { &self, queue_id: id::QueueId, command_buffer_ids: &[id::CommandBufferId], - ) { + ) -> Result<(), QueueSubmitError> { span!(_guard, INFO, "Queue::submit"); let hub = B::hub(self); @@ -538,7 +540,7 @@ impl Global { .after_submit_internal(comb_raw, submit_index); } - let callbacks = device.maintain(&hub, false, &mut token); + let callbacks = device.maintain(&hub, false, &mut token)?; super::Device::lock_life_internal(&device.life_tracker, &mut token).track_submission( submit_index, fence, @@ -556,9 +558,17 @@ impl Global { }; super::fire_map_callbacks(callbacks); + + Ok(()) } } +#[derive(Clone, Debug, Error)] +pub enum QueueSubmitError { + #[error(transparent)] + WaitIdle(#[from] WaitIdleError), +} + fn get_lowest_common_denom(a: u32, b: u32) -> u32 { let gcd = if a >= b { get_greatest_common_divisor(a, b)