Fix D3D12 Surface Leak (#4106)

This commit is contained in:
Connor Fitzgerald 2023-09-05 14:06:33 -04:00
parent 8ea270cd16
commit 1d0a22b1a8
8 changed files with 83 additions and 25 deletions

View File

@ -2110,7 +2110,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (mut surface_guard, mut token) = self.surfaces.write(&mut token);
let (adapter_guard, mut token) = hub.adapters.read(&mut token);
let (device_guard, _token) = hub.devices.read(&mut token);
let (device_guard, mut token) = hub.devices.read(&mut token);
let error = 'outer: loop {
let device = match device_guard.get(device_id) {
@ -2183,6 +2183,24 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
break error;
}
// Wait for all work to finish before configuring the surface.
if let Err(e) = device.maintain(hub, wgt::Maintain::Wait, &mut token) {
break e.into();
}
// All textures must be destroyed before the surface can be re-configured.
if let Some(present) = surface.presentation.take() {
if present.acquired_texture.is_some() {
break E::PreviousOutputExists;
}
}
// TODO: Texture views may still be alive that point to the texture.
// this will allow the user to render to the surface texture, long after
// it has been removed.
//
// https://github.com/gfx-rs/wgpu/issues/4105
match unsafe {
A::get_surface_mut(surface)
.unwrap()
@ -2202,12 +2220,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
}
}
if let Some(present) = surface.presentation.take() {
if present.acquired_texture.is_some() {
break E::PreviousOutputExists;
}
}
surface.presentation = Some(present::Presentation {
device_id: Stored {
value: id::Valid(device_id),

View File

@ -1,6 +1,5 @@
use crate::{
binding_model,
device::life::WaitIdleError,
hal_api::HalApi,
hub::Hub,
id,
@ -24,7 +23,7 @@ pub mod queue;
pub mod resource;
#[cfg(any(feature = "trace", feature = "replay"))]
pub mod trace;
pub use resource::Device;
pub use {life::WaitIdleError, resource::Device};
pub const SHADER_STAGE_COUNT: usize = 3;
// Should be large enough for the largest possible texture row. This

View File

@ -15,7 +15,7 @@ use std::borrow::Borrow;
use crate::device::trace::Action;
use crate::{
conv,
device::{DeviceError, MissingDownlevelFlags},
device::{DeviceError, MissingDownlevelFlags, WaitIdleError},
global::Global,
hal_api::HalApi,
hub::Token,
@ -96,6 +96,18 @@ pub enum ConfigureSurfaceError {
},
#[error("Requested usage is not supported")]
UnsupportedUsage,
#[error("Gpu got stuck :(")]
StuckGpu,
}
impl From<WaitIdleError> for ConfigureSurfaceError {
fn from(e: WaitIdleError) -> Self {
match e {
WaitIdleError::Device(d) => ConfigureSurfaceError::Device(d),
WaitIdleError::WrongSubmissionIndex(..) => unreachable!(),
WaitIdleError::StuckGpu => ConfigureSurfaceError::StuckGpu,
}
}
}
#[repr(C)]

View File

@ -180,7 +180,10 @@ impl super::Device {
})
}
pub(super) unsafe fn wait_idle(&self) -> Result<(), crate::DeviceError> {
// Blocks until the dedicated present queue is finished with all of its work.
//
// Once this method completes, the surface is able to be resized or deleted.
pub(super) unsafe fn wait_for_present_queue_idle(&self) -> Result<(), crate::DeviceError> {
let cur_value = self.idler.fence.get_value();
if cur_value == !0 {
return Err(crate::DeviceError::Lost);

View File

@ -609,19 +609,23 @@ impl crate::Surface<Api> for Surface {
let mut flags = dxgi::DXGI_SWAP_CHAIN_FLAG_FRAME_LATENCY_WAITABLE_OBJECT;
// We always set ALLOW_TEARING on the swapchain no matter
// what kind of swapchain we want because ResizeBuffers
// cannot change if ALLOW_TEARING is applied to the swapchain.
// cannot change the swapchain's ALLOW_TEARING flag.
//
// This does not change the behavior of the swapchain, just
// allow present calls to use tearing.
if self.supports_allow_tearing {
flags |= dxgi::DXGI_SWAP_CHAIN_FLAG_ALLOW_TEARING;
}
// While `configure`s contract ensures that no work on the GPU's main queues
// are in flight, we still need to wait for the present queue to be idle.
unsafe { device.wait_for_present_queue_idle() }?;
let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);
let swap_chain = match self.swap_chain.take() {
//Note: this path doesn't properly re-initialize all of the things
Some(sc) => {
// can't have image resources in flight used by GPU
let _ = unsafe { device.wait_idle() };
let raw = unsafe { sc.release_resources() };
let result = unsafe {
raw.ResizeBuffers(
@ -769,12 +773,16 @@ impl crate::Surface<Api> for Surface {
}
unsafe fn unconfigure(&mut self, device: &Device) {
if let Some(mut sc) = self.swap_chain.take() {
if let Some(sc) = self.swap_chain.take() {
unsafe {
let _ = sc.wait(None);
//TODO: this shouldn't be needed,
// but it complains that the queue is still used otherwise
let _ = device.wait_idle();
// While `unconfigure`s contract ensures that no work on the GPU's main queues
// are in flight, we still need to wait for the present queue to be idle.
// The major failure mode of this function is device loss,
// which if we have lost the device, we should just continue
// cleaning up, without error.
let _ = device.wait_for_present_queue_idle();
let _raw = sc.release_resources();
}
}
@ -833,6 +841,13 @@ impl crate::Queue<Api> for Queue {
.signal(&fence.raw, value)
.into_device_result("Signal fence")?;
}
// Note the lack of synchronization here between the main Direct queue
// and the dedicated presentation queue. This is automatically handled
// by the D3D runtime by detecting uses of resources derived from the
// swapchain. This automatic detection is why you cannot use a swapchain
// as an UAV in D3D12.
Ok(())
}
unsafe fn present(

View File

@ -192,12 +192,28 @@ pub trait Instance<A: Api>: Sized + WasmNotSend + WasmNotSync {
}
pub trait Surface<A: Api>: WasmNotSend + WasmNotSync {
/// Configures the surface to use the given device.
///
/// # Safety
///
/// - All gpu work that uses the surface must have been completed.
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All surfaces created using other devices must have been unconfigured before this call.
unsafe fn configure(
&mut self,
device: &A::Device,
config: &SurfaceConfiguration,
) -> Result<(), SurfaceError>;
/// Unconfigures the surface on the given device.
///
/// # Safety
///
/// - All gpu work that uses the surface must have been completed.
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - The surface must have been configured on the given device.
unsafe fn unconfigure(&mut self, device: &A::Device);
/// Returns the next texture to be presented by the swapchain for drawing

View File

@ -1143,7 +1143,7 @@ impl crate::Device<super::Api> for super::Device {
}
if desc.anisotropy_clamp != 1 {
// We only enable anisotropy if it is supported, and wgpu-hal interface guarentees
// We only enable anisotropy if it is supported, and wgpu-hal interface guarantees
// the clamp is in the range [1, 16] which is always supported if anisotropy is.
vk_info = vk_info
.anisotropy_enable(true)

View File

@ -152,12 +152,11 @@ unsafe extern "system" fn debug_utils_messenger_callback(
}
impl super::Swapchain {
/// # Safety
///
/// - The device must have been made idle before calling this function.
unsafe fn release_resources(self, device: &ash::Device) -> Self {
profiling::scope!("Swapchain::release_resources");
{
profiling::scope!("vkDeviceWaitIdle");
let _ = unsafe { device.device_wait_idle() };
};
unsafe { device.destroy_fence(self.fence, None) };
self
}
@ -784,6 +783,7 @@ impl crate::Surface<super::Api> for super::Surface {
device: &super::Device,
config: &crate::SurfaceConfiguration,
) -> Result<(), crate::SurfaceError> {
// Safety: `configure`'s contract guarantees there are no resources derived from the swapchain in use.
let old = self
.swapchain
.take()
@ -797,6 +797,7 @@ impl crate::Surface<super::Api> for super::Surface {
unsafe fn unconfigure(&mut self, device: &super::Device) {
if let Some(sc) = self.swapchain.take() {
// Safety: `unconfigure`'s contract guarantees there are no resources derived from the swapchain in use.
let swapchain = unsafe { sc.release_resources(&device.shared.raw) };
unsafe { swapchain.functor.destroy_swapchain(swapchain.raw, None) };
}