mirror of
https://github.com/gfx-rs/wgpu.git
synced 2024-11-21 22:33:49 +00:00
[hal/vk] Rework Submission and Surface Synchronization (#5681)
Fix two major synchronization issues in `wgpu_val::vulkan`: - Properly order queue command buffer submissions. Due to Mesa bugs, two semaphores are required even though the Vulkan spec says that only one should be necessary. - Properly manage surface texture acquisition and presentation: - Acquiring a surface texture can return while the presentation engine is still displaying the texture. Applications must wait for a semaphore to be signaled before using the acquired texture. - Presenting a surface texture requires a semaphore to ensure that drawing is complete before presentation occurs. Co-authored-by: Jim Blandy <jimb@red-bean.com>
This commit is contained in:
parent
9b7a965667
commit
c7458638d1
@ -1499,7 +1499,7 @@ impl Global {
|
||||
.raw
|
||||
.as_ref()
|
||||
.unwrap()
|
||||
.submit(&refs, &submit_surface_textures, Some((fence, submit_index)))
|
||||
.submit(&refs, &submit_surface_textures, (fence, submit_index))
|
||||
.map_err(DeviceError::from)?;
|
||||
}
|
||||
|
||||
|
@ -154,17 +154,20 @@ impl Global {
|
||||
parent_id: surface_id,
|
||||
});
|
||||
}
|
||||
#[cfg(not(feature = "trace"))]
|
||||
let _ = device;
|
||||
|
||||
let fence_guard = device.fence.read();
|
||||
let fence = fence_guard.as_ref().unwrap();
|
||||
|
||||
let suf = A::surface_as_hal(surface.as_ref());
|
||||
let (texture_id, status) = match unsafe {
|
||||
suf.unwrap()
|
||||
.acquire_texture(Some(std::time::Duration::from_millis(
|
||||
FRAME_TIMEOUT_MS as u64,
|
||||
)))
|
||||
suf.unwrap().acquire_texture(
|
||||
Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
|
||||
fence,
|
||||
)
|
||||
} {
|
||||
Ok(Some(ast)) => {
|
||||
drop(fence_guard);
|
||||
|
||||
let texture_desc = wgt::TextureDescriptor {
|
||||
label: (),
|
||||
size: wgt::Extent3d {
|
||||
|
@ -22,7 +22,6 @@ const MAX_BUNNIES: usize = 1 << 20;
|
||||
const BUNNY_SIZE: f32 = 0.15 * 256.0;
|
||||
const GRAVITY: f32 = -9.8 * 100.0;
|
||||
const MAX_VELOCITY: f32 = 750.0;
|
||||
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
|
||||
const DESIRED_MAX_LATENCY: u32 = 2;
|
||||
|
||||
#[repr(C)]
|
||||
@ -498,7 +497,7 @@ impl<A: hal::Api> Example<A> {
|
||||
let mut fence = device.create_fence().unwrap();
|
||||
let init_cmd = cmd_encoder.end_encoding().unwrap();
|
||||
queue
|
||||
.submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value)))
|
||||
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
|
||||
.unwrap();
|
||||
device.wait(&fence, init_fence_value, !0).unwrap();
|
||||
device.destroy_buffer(staging_buffer);
|
||||
@ -550,7 +549,7 @@ impl<A: hal::Api> Example<A> {
|
||||
{
|
||||
let ctx = &mut self.contexts[self.context_index];
|
||||
self.queue
|
||||
.submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value)))
|
||||
.submit(&[], &[], (&mut ctx.fence, ctx.fence_value))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
@ -650,7 +649,13 @@ impl<A: hal::Api> Example<A> {
|
||||
|
||||
let ctx = &mut self.contexts[self.context_index];
|
||||
|
||||
let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture };
|
||||
let surface_tex = unsafe {
|
||||
self.surface
|
||||
.acquire_texture(None, &ctx.fence)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.texture
|
||||
};
|
||||
|
||||
let target_barrier0 = hal::TextureBarrier {
|
||||
texture: surface_tex.borrow(),
|
||||
@ -718,7 +723,6 @@ impl<A: hal::Api> Example<A> {
|
||||
}
|
||||
|
||||
ctx.frames_recorded += 1;
|
||||
let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT;
|
||||
|
||||
let target_barrier1 = hal::TextureBarrier {
|
||||
texture: surface_tex.borrow(),
|
||||
@ -732,20 +736,18 @@ impl<A: hal::Api> Example<A> {
|
||||
|
||||
unsafe {
|
||||
let cmd_buf = ctx.encoder.end_encoding().unwrap();
|
||||
let fence_param = if do_fence {
|
||||
Some((&mut ctx.fence, ctx.fence_value))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.queue
|
||||
.submit(&[&cmd_buf], &[&surface_tex], fence_param)
|
||||
.submit(
|
||||
&[&cmd_buf],
|
||||
&[&surface_tex],
|
||||
(&mut ctx.fence, ctx.fence_value),
|
||||
)
|
||||
.unwrap();
|
||||
self.queue.present(&self.surface, surface_tex).unwrap();
|
||||
ctx.used_cmd_bufs.push(cmd_buf);
|
||||
ctx.used_views.push(surface_tex_view);
|
||||
};
|
||||
|
||||
if do_fence {
|
||||
log::debug!("Context switch from {}", self.context_index);
|
||||
let old_fence_value = ctx.fence_value;
|
||||
if self.contexts.len() == 1 {
|
||||
@ -771,7 +773,6 @@ impl<A: hal::Api> Example<A> {
|
||||
}
|
||||
next.fence_value = old_fence_value + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
|
@ -156,6 +156,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
|
||||
})
|
||||
.unwrap()
|
||||
};
|
||||
let mut fence = unsafe { od.device.create_fence().unwrap() };
|
||||
let rp_desc = hal::RenderPassDescriptor {
|
||||
label: None,
|
||||
extent: wgt::Extent3d {
|
||||
@ -183,6 +184,6 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
|
||||
encoder.begin_render_pass(&rp_desc);
|
||||
encoder.end_render_pass();
|
||||
let cmd_buf = encoder.end_encoding().unwrap();
|
||||
od.queue.submit(&[&cmd_buf], &[], None).unwrap();
|
||||
od.queue.submit(&[&cmd_buf], &[], (&mut fence, 0)).unwrap();
|
||||
}
|
||||
}
|
||||
|
@ -13,7 +13,6 @@ use std::{
|
||||
};
|
||||
use winit::window::WindowButtons;
|
||||
|
||||
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
|
||||
const DESIRED_MAX_LATENCY: u32 = 2;
|
||||
|
||||
/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
|
||||
@ -759,7 +758,7 @@ impl<A: hal::Api> Example<A> {
|
||||
let mut fence = device.create_fence().unwrap();
|
||||
let init_cmd = cmd_encoder.end_encoding().unwrap();
|
||||
queue
|
||||
.submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value)))
|
||||
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
|
||||
.unwrap();
|
||||
device.wait(&fence, init_fence_value, !0).unwrap();
|
||||
cmd_encoder.reset_all(iter::once(init_cmd));
|
||||
@ -808,7 +807,13 @@ impl<A: hal::Api> Example<A> {
|
||||
fn render(&mut self) {
|
||||
let ctx = &mut self.contexts[self.context_index];
|
||||
|
||||
let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture };
|
||||
let surface_tex = unsafe {
|
||||
self.surface
|
||||
.acquire_texture(None, &ctx.fence)
|
||||
.unwrap()
|
||||
.unwrap()
|
||||
.texture
|
||||
};
|
||||
|
||||
let target_barrier0 = hal::TextureBarrier {
|
||||
texture: surface_tex.borrow(),
|
||||
@ -909,7 +914,6 @@ impl<A: hal::Api> Example<A> {
|
||||
}
|
||||
|
||||
ctx.frames_recorded += 1;
|
||||
let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT;
|
||||
|
||||
let target_barrier1 = hal::TextureBarrier {
|
||||
texture: surface_tex.borrow(),
|
||||
@ -959,20 +963,18 @@ impl<A: hal::Api> Example<A> {
|
||||
|
||||
unsafe {
|
||||
let cmd_buf = ctx.encoder.end_encoding().unwrap();
|
||||
let fence_param = if do_fence {
|
||||
Some((&mut ctx.fence, ctx.fence_value))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.queue
|
||||
.submit(&[&cmd_buf], &[&surface_tex], fence_param)
|
||||
.submit(
|
||||
&[&cmd_buf],
|
||||
&[&surface_tex],
|
||||
(&mut ctx.fence, ctx.fence_value),
|
||||
)
|
||||
.unwrap();
|
||||
self.queue.present(&self.surface, surface_tex).unwrap();
|
||||
ctx.used_cmd_bufs.push(cmd_buf);
|
||||
ctx.used_views.push(surface_tex_view);
|
||||
};
|
||||
|
||||
if do_fence {
|
||||
log::info!("Context switch from {}", self.context_index);
|
||||
let old_fence_value = ctx.fence_value;
|
||||
if self.contexts.len() == 1 {
|
||||
@ -998,14 +1000,13 @@ impl<A: hal::Api> Example<A> {
|
||||
}
|
||||
next.fence_value = old_fence_value + 1;
|
||||
}
|
||||
}
|
||||
|
||||
fn exit(mut self) {
|
||||
unsafe {
|
||||
{
|
||||
let ctx = &mut self.contexts[self.context_index];
|
||||
self.queue
|
||||
.submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value)))
|
||||
.submit(&[], &[], (&mut ctx.fence, ctx.fence_value))
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
|
@ -857,6 +857,7 @@ impl crate::Surface for Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
timeout: Option<std::time::Duration>,
|
||||
_fence: &Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> {
|
||||
let mut swapchain = self.swap_chain.write();
|
||||
let sc = swapchain.as_mut().unwrap();
|
||||
@ -895,7 +896,7 @@ impl crate::Queue for Queue {
|
||||
&self,
|
||||
command_buffers: &[&CommandBuffer],
|
||||
_surface_textures: &[&Texture],
|
||||
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
|
||||
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
|
||||
) -> Result<(), crate::DeviceError> {
|
||||
let mut temp_lists = self.temp_lists.lock();
|
||||
temp_lists.clear();
|
||||
@ -908,11 +909,9 @@ impl crate::Queue for Queue {
|
||||
self.raw.execute_command_lists(&temp_lists);
|
||||
}
|
||||
|
||||
if let Some((fence, value)) = signal_fence {
|
||||
self.raw
|
||||
.signal(&fence.raw, value)
|
||||
.signal(&signal_fence.raw, signal_value)
|
||||
.into_device_result("Signal fence")?;
|
||||
}
|
||||
|
||||
// Note the lack of synchronization here between the main Direct queue
|
||||
// and the dedicated presentation queue. This is automatically handled
|
||||
|
@ -75,6 +75,7 @@ impl crate::Surface for Context {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
timeout: Option<std::time::Duration>,
|
||||
fence: &Resource,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> {
|
||||
Ok(None)
|
||||
}
|
||||
@ -114,7 +115,7 @@ impl crate::Queue for Context {
|
||||
&self,
|
||||
command_buffers: &[&Resource],
|
||||
surface_textures: &[&Resource],
|
||||
signal_fence: Option<(&mut Resource, crate::FenceValue)>,
|
||||
signal_fence: (&mut Resource, crate::FenceValue),
|
||||
) -> DeviceResult<()> {
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1432,6 +1432,7 @@ impl crate::Surface for Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
_timeout_ms: Option<Duration>, //TODO
|
||||
_fence: &super::Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
|
||||
let swapchain = self.swapchain.read();
|
||||
let sc = swapchain.as_ref().unwrap();
|
||||
|
@ -1740,7 +1740,7 @@ impl crate::Queue for super::Queue {
|
||||
&self,
|
||||
command_buffers: &[&super::CommandBuffer],
|
||||
_surface_textures: &[&super::Texture],
|
||||
signal_fence: Option<(&mut super::Fence, crate::FenceValue)>,
|
||||
(signal_fence, signal_value): (&mut super::Fence, crate::FenceValue),
|
||||
) -> Result<(), crate::DeviceError> {
|
||||
let shared = Arc::clone(&self.shared);
|
||||
let gl = &shared.context.lock();
|
||||
@ -1774,12 +1774,10 @@ impl crate::Queue for super::Queue {
|
||||
}
|
||||
}
|
||||
|
||||
if let Some((fence, value)) = signal_fence {
|
||||
fence.maintain(gl);
|
||||
signal_fence.maintain(gl);
|
||||
let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) }
|
||||
.map_err(|_| crate::DeviceError::OutOfMemory)?;
|
||||
fence.pending.push((value, sync));
|
||||
}
|
||||
signal_fence.pending.push((signal_value, sync));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -427,6 +427,7 @@ impl crate::Surface for Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
_timeout_ms: Option<std::time::Duration>, //TODO
|
||||
_fence: &super::Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
|
||||
let swapchain = self.swapchain.read();
|
||||
let sc = swapchain.as_ref().unwrap();
|
||||
|
@ -798,6 +798,7 @@ impl crate::Surface for Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
_timeout_ms: Option<Duration>,
|
||||
_fence: &super::Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
|
||||
let swapchain = self.swapchain.read();
|
||||
let sc = swapchain.as_ref().unwrap();
|
||||
|
@ -459,44 +459,101 @@ pub trait Instance: Sized + WasmNotSendSync {
|
||||
pub trait Surface: WasmNotSendSync {
|
||||
type A: Api;
|
||||
|
||||
/// Configures the surface to use the given device.
|
||||
/// Configure `self` to use `device`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// - All gpu work that uses the surface must have been completed.
|
||||
/// - All GPU work using `self` must have been completed.
|
||||
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
|
||||
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
|
||||
/// - All surfaces created using other devices must have been unconfigured before this call.
|
||||
/// - The surface `self` must not currently be configured to use any other [`Device`].
|
||||
unsafe fn configure(
|
||||
&self,
|
||||
device: &<Self::A as Api>::Device,
|
||||
config: &SurfaceConfiguration,
|
||||
) -> Result<(), SurfaceError>;
|
||||
|
||||
/// Unconfigures the surface on the given device.
|
||||
/// Unconfigure `self` on `device`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// - All gpu work that uses the surface must have been completed.
|
||||
/// - All GPU work that uses `surface` must have been completed.
|
||||
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
|
||||
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
|
||||
/// - The surface must have been configured on the given device.
|
||||
/// - The surface `self` must have been configured on `device`.
|
||||
unsafe fn unconfigure(&self, device: &<Self::A as Api>::Device);
|
||||
|
||||
/// Returns the next texture to be presented by the swapchain for drawing
|
||||
/// Return the next texture to be presented by `self`, for the caller to draw on.
|
||||
///
|
||||
/// A `timeout` of `None` means to wait indefinitely, with no timeout.
|
||||
/// On success, return an [`AcquiredSurfaceTexture`] representing the
|
||||
/// texture into which the caller should draw the image to be displayed on
|
||||
/// `self`.
|
||||
///
|
||||
/// If `timeout` elapses before `self` has a texture ready to be acquired,
|
||||
/// return `Ok(None)`. If `timeout` is `None`, wait indefinitely, with no
|
||||
/// timeout.
|
||||
///
|
||||
/// # Using an [`AcquiredSurfaceTexture`]
|
||||
///
|
||||
/// On success, this function returns an [`AcquiredSurfaceTexture`] whose
|
||||
/// [`texture`] field is a [`SurfaceTexture`] from which the caller can
|
||||
/// [`borrow`] a [`Texture`] to draw on. The [`AcquiredSurfaceTexture`] also
|
||||
/// carries some metadata about that [`SurfaceTexture`].
|
||||
///
|
||||
/// All calls to [`Queue::submit`] that draw on that [`Texture`] must also
|
||||
/// include the [`SurfaceTexture`] in the `surface_textures` argument.
|
||||
///
|
||||
/// When you are done drawing on the texture, you can display it on `self`
|
||||
/// by passing the [`SurfaceTexture`] and `self` to [`Queue::present`].
|
||||
///
|
||||
/// If you do not wish to display the texture, you must pass the
|
||||
/// [`SurfaceTexture`] to [`self.discard_texture`], so that it can be reused
|
||||
/// by future acquisitions.
|
||||
///
|
||||
/// # Portability
|
||||
///
|
||||
/// Some backends can't support a timeout when acquiring a texture and
|
||||
/// the timeout will be ignored.
|
||||
/// Some backends can't support a timeout when acquiring a texture. On these
|
||||
/// backends, `timeout` is ignored.
|
||||
///
|
||||
/// Returns `None` on timing out.
|
||||
/// # Safety
|
||||
///
|
||||
/// - The surface `self` must currently be configured on some [`Device`].
|
||||
///
|
||||
/// - The `fence` argument must be the same [`Fence`] passed to all calls to
|
||||
/// [`Queue::submit`] that used [`Texture`]s acquired from this surface.
|
||||
///
|
||||
/// - You may only have one texture acquired from `self` at a time. When
|
||||
/// `acquire_texture` returns `Ok(Some(ast))`, you must pass the returned
|
||||
/// [`SurfaceTexture`] `ast.texture` to either [`Queue::present`] or
|
||||
/// [`Surface::discard_texture`] before calling `acquire_texture` again.
|
||||
///
|
||||
/// [`texture`]: AcquiredSurfaceTexture::texture
|
||||
/// [`SurfaceTexture`]: Api::SurfaceTexture
|
||||
/// [`borrow`]: std::borrow::Borrow::borrow
|
||||
/// [`Texture`]: Api::Texture
|
||||
/// [`Fence`]: Api::Fence
|
||||
/// [`self.discard_texture`]: Surface::discard_texture
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
timeout: Option<std::time::Duration>,
|
||||
fence: &<Self::A as Api>::Fence,
|
||||
) -> Result<Option<AcquiredSurfaceTexture<Self::A>>, SurfaceError>;
|
||||
|
||||
/// Relinquish an acquired texture without presenting it.
|
||||
///
|
||||
/// After this call, the texture underlying [`SurfaceTexture`] may be
|
||||
/// returned by subsequent calls to [`self.acquire_texture`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// - The surface `self` must currently be configured on some [`Device`].
|
||||
///
|
||||
/// - `texture` must be a [`SurfaceTexture`] returned by a call to
|
||||
/// [`self.acquire_texture`] that has not yet been passed to
|
||||
/// [`Queue::present`].
|
||||
///
|
||||
/// [`SurfaceTexture`]: Api::SurfaceTexture
|
||||
/// [`self.acquire_texture`]: Surface::acquire_texture
|
||||
unsafe fn discard_texture(&self, texture: <Self::A as Api>::SurfaceTexture);
|
||||
}
|
||||
|
||||
@ -762,17 +819,21 @@ pub trait Queue: WasmNotSendSync {
|
||||
|
||||
/// Submit `command_buffers` for execution on GPU.
|
||||
///
|
||||
/// If `signal_fence` is `Some(fence, value)`, update `fence` to `value`
|
||||
/// when the operation is complete. See [`Fence`] for details.
|
||||
/// Update `fence` to `value` when the operation is complete. See
|
||||
/// [`Fence`] for details.
|
||||
///
|
||||
/// If two calls to `submit` on a single `Queue` occur in a particular order
|
||||
/// (that is, they happen on the same thread, or on two threads that have
|
||||
/// synchronized to establish an ordering), then the first submission's
|
||||
/// commands all complete execution before any of the second submission's
|
||||
/// commands begin. All results produced by one submission are visible to
|
||||
/// the next.
|
||||
/// A `wgpu_hal` queue is "single threaded": all command buffers are
|
||||
/// executed in the order they're submitted, with each buffer able to see
|
||||
/// previous buffers' results. Specifically:
|
||||
///
|
||||
/// Within a submission, command buffers execute in the order in which they
|
||||
/// - If two calls to `submit` on a single `Queue` occur in a particular
|
||||
/// order (that is, they happen on the same thread, or on two threads that
|
||||
/// have synchronized to establish an ordering), then the first
|
||||
/// submission's commands all complete execution before any of the second
|
||||
/// submission's commands begin. All results produced by one submission
|
||||
/// are visible to the next.
|
||||
///
|
||||
/// - Within a submission, command buffers execute in the order in which they
|
||||
/// appear in `command_buffers`. All results produced by one buffer are
|
||||
/// visible to the next.
|
||||
///
|
||||
@ -803,10 +864,16 @@ pub trait Queue: WasmNotSendSync {
|
||||
/// - Every [`SurfaceTexture`][st] that any command in `command_buffers`
|
||||
/// writes to must appear in the `surface_textures` argument.
|
||||
///
|
||||
/// - No [`SurfaceTexture`][st] may appear in the `surface_textures`
|
||||
/// argument more than once.
|
||||
///
|
||||
/// - Each [`SurfaceTexture`][st] in `surface_textures` must be configured
|
||||
/// for use with the [`Device`][d] associated with this [`Queue`],
|
||||
/// typically by calling [`Surface::configure`].
|
||||
///
|
||||
/// - All calls to this function that include a given [`SurfaceTexture`][st]
|
||||
/// in `surface_textures` must use the same [`Fence`].
|
||||
///
|
||||
/// [`Fence`]: Api::Fence
|
||||
/// [cb]: Api::CommandBuffer
|
||||
/// [ce]: Api::CommandEncoder
|
||||
@ -819,7 +886,7 @@ pub trait Queue: WasmNotSendSync {
|
||||
&self,
|
||||
command_buffers: &[&<Self::A as Api>::CommandBuffer],
|
||||
surface_textures: &[&<Self::A as Api>::SurfaceTexture],
|
||||
signal_fence: Option<(&mut <Self::A as Api>::Fence, FenceValue)>,
|
||||
signal_fence: (&mut <Self::A as Api>::Fence, FenceValue),
|
||||
) -> Result<(), DeviceError>;
|
||||
unsafe fn present(
|
||||
&self,
|
||||
|
@ -377,14 +377,13 @@ impl crate::Queue for Queue {
|
||||
&self,
|
||||
command_buffers: &[&CommandBuffer],
|
||||
_surface_textures: &[&SurfaceTexture],
|
||||
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
|
||||
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
|
||||
) -> Result<(), crate::DeviceError> {
|
||||
objc::rc::autoreleasepool(|| {
|
||||
let extra_command_buffer = match signal_fence {
|
||||
Some((fence, value)) => {
|
||||
let completed_value = Arc::clone(&fence.completed_value);
|
||||
let extra_command_buffer = {
|
||||
let completed_value = Arc::clone(&signal_fence.completed_value);
|
||||
let block = block::ConcreteBlock::new(move |_cmd_buf| {
|
||||
completed_value.store(value, atomic::Ordering::Release);
|
||||
completed_value.store(signal_value, atomic::Ordering::Release);
|
||||
})
|
||||
.copy();
|
||||
|
||||
@ -400,15 +399,15 @@ impl crate::Queue for Queue {
|
||||
raw.set_label("(wgpu internal) Signal");
|
||||
raw.add_completed_handler(&block);
|
||||
|
||||
fence.maintain();
|
||||
fence.pending_command_buffers.push((value, raw.to_owned()));
|
||||
signal_fence.maintain();
|
||||
signal_fence
|
||||
.pending_command_buffers
|
||||
.push((signal_value, raw.to_owned()));
|
||||
// only return an extra one if it's extra
|
||||
match command_buffers.last() {
|
||||
Some(_) => None,
|
||||
None => Some(raw),
|
||||
}
|
||||
}
|
||||
None => None,
|
||||
};
|
||||
|
||||
for cmd_buffer in command_buffers {
|
||||
|
@ -242,6 +242,7 @@ impl crate::Surface for super::Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
_timeout_ms: Option<std::time::Duration>, //TODO
|
||||
_fence: &super::Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
|
||||
let render_layer = self.render_layer.lock();
|
||||
let (drawable, texture) = match autoreleasepool(|| {
|
||||
|
@ -3,11 +3,7 @@ use super::conv;
|
||||
use ash::{amd, ext, khr, vk};
|
||||
use parking_lot::Mutex;
|
||||
|
||||
use std::{
|
||||
collections::BTreeMap,
|
||||
ffi::CStr,
|
||||
sync::{atomic::AtomicIsize, Arc},
|
||||
};
|
||||
use std::{collections::BTreeMap, ffi::CStr, sync::Arc};
|
||||
|
||||
fn depth_stencil_required_flags() -> vk::FormatFeatureFlags {
|
||||
vk::FormatFeatureFlags::SAMPLED_IMAGE | vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT
|
||||
@ -1783,21 +1779,15 @@ impl super::Adapter {
|
||||
render_passes: Mutex::new(Default::default()),
|
||||
framebuffers: Mutex::new(Default::default()),
|
||||
});
|
||||
let mut relay_semaphores = [vk::Semaphore::null(); 2];
|
||||
for sem in relay_semaphores.iter_mut() {
|
||||
unsafe {
|
||||
*sem = shared
|
||||
.raw
|
||||
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?
|
||||
};
|
||||
}
|
||||
|
||||
let relay_semaphores = super::RelaySemaphores::new(&shared)?;
|
||||
|
||||
let queue = super::Queue {
|
||||
raw: raw_queue,
|
||||
swapchain_fn,
|
||||
device: Arc::clone(&shared),
|
||||
family_index,
|
||||
relay_semaphores,
|
||||
relay_index: AtomicIsize::new(-1),
|
||||
relay_semaphores: Mutex::new(relay_semaphores),
|
||||
};
|
||||
|
||||
let mem_allocator = {
|
||||
|
@ -612,17 +612,16 @@ impl super::Device {
|
||||
let images =
|
||||
unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?;
|
||||
|
||||
// NOTE: It's important that we define at least images.len() + 1 wait
|
||||
// NOTE: It's important that we define at least images.len() wait
|
||||
// semaphores, since we prospectively need to provide the call to
|
||||
// acquire the next image with an unsignaled semaphore.
|
||||
let surface_semaphores = (0..images.len() + 1)
|
||||
.map(|_| unsafe {
|
||||
self.shared
|
||||
.raw
|
||||
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
|
||||
let surface_semaphores = (0..=images.len())
|
||||
.map(|_| {
|
||||
super::SwapchainImageSemaphores::new(&self.shared)
|
||||
.map(Mutex::new)
|
||||
.map(Arc::new)
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()
|
||||
.map_err(crate::DeviceError::from)?;
|
||||
.collect::<Result<Vec<_>, _>>()?;
|
||||
|
||||
Ok(super::Swapchain {
|
||||
raw,
|
||||
@ -633,7 +632,7 @@ impl super::Device {
|
||||
config: config.clone(),
|
||||
view_formats: wgt_view_formats,
|
||||
surface_semaphores,
|
||||
next_surface_index: 0,
|
||||
next_semaphore_index: 0,
|
||||
})
|
||||
}
|
||||
|
||||
@ -836,9 +835,12 @@ impl crate::Device for super::Device {
|
||||
unsafe fn exit(self, queue: super::Queue) {
|
||||
unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) };
|
||||
unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) };
|
||||
for &sem in queue.relay_semaphores.iter() {
|
||||
unsafe { self.shared.raw.destroy_semaphore(sem, None) };
|
||||
}
|
||||
unsafe {
|
||||
queue
|
||||
.relay_semaphores
|
||||
.into_inner()
|
||||
.destroy(&self.shared.raw)
|
||||
};
|
||||
unsafe { self.shared.free_resources() };
|
||||
}
|
||||
|
||||
@ -2055,54 +2057,7 @@ impl crate::Device for super::Device {
|
||||
timeout_ms: u32,
|
||||
) -> Result<bool, crate::DeviceError> {
|
||||
let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS;
|
||||
match *fence {
|
||||
super::Fence::TimelineSemaphore(raw) => {
|
||||
let semaphores = [raw];
|
||||
let values = [wait_value];
|
||||
let vk_info = vk::SemaphoreWaitInfo::default()
|
||||
.semaphores(&semaphores)
|
||||
.values(&values);
|
||||
let result = match self.shared.extension_fns.timeline_semaphore {
|
||||
Some(super::ExtensionFn::Extension(ref ext)) => unsafe {
|
||||
ext.wait_semaphores(&vk_info, timeout_ns)
|
||||
},
|
||||
Some(super::ExtensionFn::Promoted) => unsafe {
|
||||
self.shared.raw.wait_semaphores(&vk_info, timeout_ns)
|
||||
},
|
||||
None => unreachable!(),
|
||||
};
|
||||
match result {
|
||||
Ok(()) => Ok(true),
|
||||
Err(vk::Result::TIMEOUT) => Ok(false),
|
||||
Err(other) => Err(other.into()),
|
||||
}
|
||||
}
|
||||
super::Fence::FencePool {
|
||||
last_completed,
|
||||
ref active,
|
||||
free: _,
|
||||
} => {
|
||||
if wait_value <= last_completed {
|
||||
Ok(true)
|
||||
} else {
|
||||
match active.iter().find(|&&(value, _)| value >= wait_value) {
|
||||
Some(&(_, raw)) => {
|
||||
match unsafe {
|
||||
self.shared.raw.wait_for_fences(&[raw], true, timeout_ns)
|
||||
} {
|
||||
Ok(()) => Ok(true),
|
||||
Err(vk::Result::TIMEOUT) => Ok(false),
|
||||
Err(other) => Err(other.into()),
|
||||
}
|
||||
}
|
||||
None => {
|
||||
log::error!("No signals reached value {}", wait_value);
|
||||
Err(crate::DeviceError::Lost)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
self.shared.wait_for_fence(fence, wait_value, timeout_ns)
|
||||
}
|
||||
|
||||
unsafe fn start_capture(&self) -> bool {
|
||||
@ -2364,6 +2319,71 @@ impl crate::Device for super::Device {
|
||||
}
|
||||
}
|
||||
|
||||
impl super::DeviceShared {
|
||||
pub(super) fn new_binary_semaphore(&self) -> Result<vk::Semaphore, crate::DeviceError> {
|
||||
unsafe {
|
||||
self.raw
|
||||
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
|
||||
.map_err(crate::DeviceError::from)
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn wait_for_fence(
|
||||
&self,
|
||||
fence: &super::Fence,
|
||||
wait_value: crate::FenceValue,
|
||||
timeout_ns: u64,
|
||||
) -> Result<bool, crate::DeviceError> {
|
||||
profiling::scope!("Device::wait");
|
||||
match *fence {
|
||||
super::Fence::TimelineSemaphore(raw) => {
|
||||
let semaphores = [raw];
|
||||
let values = [wait_value];
|
||||
let vk_info = vk::SemaphoreWaitInfo::default()
|
||||
.semaphores(&semaphores)
|
||||
.values(&values);
|
||||
let result = match self.extension_fns.timeline_semaphore {
|
||||
Some(super::ExtensionFn::Extension(ref ext)) => unsafe {
|
||||
ext.wait_semaphores(&vk_info, timeout_ns)
|
||||
},
|
||||
Some(super::ExtensionFn::Promoted) => unsafe {
|
||||
self.raw.wait_semaphores(&vk_info, timeout_ns)
|
||||
},
|
||||
None => unreachable!(),
|
||||
};
|
||||
match result {
|
||||
Ok(()) => Ok(true),
|
||||
Err(vk::Result::TIMEOUT) => Ok(false),
|
||||
Err(other) => Err(other.into()),
|
||||
}
|
||||
}
|
||||
super::Fence::FencePool {
|
||||
last_completed,
|
||||
ref active,
|
||||
free: _,
|
||||
} => {
|
||||
if wait_value <= last_completed {
|
||||
Ok(true)
|
||||
} else {
|
||||
match active.iter().find(|&&(value, _)| value >= wait_value) {
|
||||
Some(&(_, raw)) => {
|
||||
match unsafe { self.raw.wait_for_fences(&[raw], true, timeout_ns) } {
|
||||
Ok(()) => Ok(true),
|
||||
Err(vk::Result::TIMEOUT) => Ok(false),
|
||||
Err(other) => Err(other.into()),
|
||||
}
|
||||
}
|
||||
None => {
|
||||
log::error!("No signals reached value {}", wait_value);
|
||||
Err(crate::DeviceError::Lost)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<gpu_alloc::AllocationError> for crate::DeviceError {
|
||||
fn from(error: gpu_alloc::AllocationError) -> Self {
|
||||
use gpu_alloc::AllocationError as Ae;
|
||||
|
@ -164,10 +164,14 @@ impl super::Swapchain {
|
||||
let _ = unsafe { device.device_wait_idle() };
|
||||
};
|
||||
|
||||
// We cannot take this by value, as the function returns `self`.
|
||||
for semaphore in self.surface_semaphores.drain(..) {
|
||||
unsafe {
|
||||
device.destroy_semaphore(semaphore, None);
|
||||
}
|
||||
let arc_removed = Arc::into_inner(semaphore).expect(
|
||||
"Trying to destroy a SurfaceSemaphores that is still in use by a SurfaceTexture",
|
||||
);
|
||||
let mutex_removed = arc_removed.into_inner();
|
||||
|
||||
unsafe { mutex_removed.destroy(device) };
|
||||
}
|
||||
|
||||
self
|
||||
@ -966,9 +970,10 @@ impl crate::Surface for super::Surface {
|
||||
unsafe fn acquire_texture(
|
||||
&self,
|
||||
timeout: Option<std::time::Duration>,
|
||||
fence: &super::Fence,
|
||||
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
|
||||
let mut swapchain = self.swapchain.write();
|
||||
let sc = swapchain.as_mut().unwrap();
|
||||
let swapchain = swapchain.as_mut().unwrap();
|
||||
|
||||
let mut timeout_ns = match timeout {
|
||||
Some(duration) => duration.as_nanos() as u64,
|
||||
@ -988,12 +993,40 @@ impl crate::Surface for super::Surface {
|
||||
timeout_ns = u64::MAX;
|
||||
}
|
||||
|
||||
let wait_semaphore = sc.surface_semaphores[sc.next_surface_index];
|
||||
let swapchain_semaphores_arc = swapchain.get_surface_semaphores();
|
||||
// Nothing should be using this, so we don't block, but panic if we fail to lock.
|
||||
let locked_swapchain_semaphores = swapchain_semaphores_arc
|
||||
.try_lock()
|
||||
.expect("Failed to lock a SwapchainSemaphores.");
|
||||
|
||||
// Wait for all commands writing to the previously acquired image to
|
||||
// complete.
|
||||
//
|
||||
// Almost all the steps in the usual acquire-draw-present flow are
|
||||
// asynchronous: they get something started on the presentation engine
|
||||
// or the GPU, but on the CPU, control returns immediately. Without some
|
||||
// sort of intervention, the CPU could crank out frames much faster than
|
||||
// the presentation engine can display them.
|
||||
//
|
||||
// This is the intervention: if any submissions drew on this image, and
|
||||
// thus waited for `locked_swapchain_semaphores.acquire`, wait for all
|
||||
// of them to finish, thus ensuring that it's okay to pass `acquire` to
|
||||
// `vkAcquireNextImageKHR` again.
|
||||
swapchain.device.wait_for_fence(
|
||||
fence,
|
||||
locked_swapchain_semaphores.previously_used_submission_index,
|
||||
timeout_ns,
|
||||
)?;
|
||||
|
||||
// will block if no image is available
|
||||
let (index, suboptimal) = match unsafe {
|
||||
sc.functor
|
||||
.acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null())
|
||||
profiling::scope!("vkAcquireNextImageKHR");
|
||||
swapchain.functor.acquire_next_image(
|
||||
swapchain.raw,
|
||||
timeout_ns,
|
||||
locked_swapchain_semaphores.acquire,
|
||||
vk::Fence::null(),
|
||||
)
|
||||
} {
|
||||
// We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
|
||||
// See the comment in `Queue::present`.
|
||||
@ -1013,16 +1046,18 @@ impl crate::Surface for super::Surface {
|
||||
}
|
||||
};
|
||||
|
||||
sc.next_surface_index += 1;
|
||||
sc.next_surface_index %= sc.surface_semaphores.len();
|
||||
drop(locked_swapchain_semaphores);
|
||||
// We only advance the surface semaphores if we successfully acquired an image, otherwise
|
||||
// we should try to re-acquire using the same semaphores.
|
||||
swapchain.advance_surface_semaphores();
|
||||
|
||||
// special case for Intel Vulkan returning bizarre values (ugh)
|
||||
if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 {
|
||||
if swapchain.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 {
|
||||
return Err(crate::SurfaceError::Outdated);
|
||||
}
|
||||
|
||||
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209
|
||||
let raw_flags = if sc
|
||||
let raw_flags = if swapchain
|
||||
.raw_flags
|
||||
.contains(vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT)
|
||||
{
|
||||
@ -1034,20 +1069,20 @@ impl crate::Surface for super::Surface {
|
||||
let texture = super::SurfaceTexture {
|
||||
index,
|
||||
texture: super::Texture {
|
||||
raw: sc.images[index as usize],
|
||||
raw: swapchain.images[index as usize],
|
||||
drop_guard: None,
|
||||
block: None,
|
||||
usage: sc.config.usage,
|
||||
format: sc.config.format,
|
||||
usage: swapchain.config.usage,
|
||||
format: swapchain.config.format,
|
||||
raw_flags,
|
||||
copy_size: crate::CopyExtent {
|
||||
width: sc.config.extent.width,
|
||||
height: sc.config.extent.height,
|
||||
width: swapchain.config.extent.width,
|
||||
height: swapchain.config.extent.height,
|
||||
depth: 1,
|
||||
},
|
||||
view_formats: sc.view_formats.clone(),
|
||||
view_formats: swapchain.view_formats.clone(),
|
||||
},
|
||||
wait_semaphore,
|
||||
surface_semaphores: swapchain_semaphores_arc,
|
||||
};
|
||||
Ok(Some(crate::AcquiredSurfaceTexture {
|
||||
texture,
|
||||
|
@ -33,13 +33,11 @@ mod instance;
|
||||
|
||||
use std::{
|
||||
borrow::Borrow,
|
||||
collections::HashSet,
|
||||
ffi::{CStr, CString},
|
||||
fmt,
|
||||
fmt, mem,
|
||||
num::NonZeroU32,
|
||||
sync::{
|
||||
atomic::{AtomicIsize, Ordering},
|
||||
Arc,
|
||||
},
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
use arrayvec::ArrayVec;
|
||||
@ -147,6 +145,173 @@ pub struct Instance {
|
||||
shared: Arc<InstanceShared>,
|
||||
}
|
||||
|
||||
/// The semaphores needed to use one image in a swapchain.
|
||||
#[derive(Debug)]
|
||||
struct SwapchainImageSemaphores {
|
||||
/// A semaphore that is signaled when this image is safe for us to modify.
|
||||
///
|
||||
/// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
|
||||
/// image that we should use, that image may actually still be in use by the
|
||||
/// presentation engine, and is not yet safe to modify. However, that
|
||||
/// function does accept a semaphore that it will signal when the image is
|
||||
/// indeed safe to begin messing with.
|
||||
///
|
||||
/// This semaphore is:
|
||||
///
|
||||
/// - waited for by the first queue submission to operate on this image
|
||||
/// since it was acquired, and
|
||||
///
|
||||
/// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
|
||||
/// for us to use.
|
||||
///
|
||||
/// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
|
||||
acquire: vk::Semaphore,
|
||||
|
||||
/// True if the next command submission operating on this image should wait
|
||||
/// for [`acquire`].
|
||||
///
|
||||
/// We must wait for `acquire` before drawing to this swapchain image, but
|
||||
/// because `wgpu-hal` queue submissions are always strongly ordered, only
|
||||
/// the first submission that works with a swapchain image actually needs to
|
||||
/// wait. We set this flag when this image is acquired, and clear it the
|
||||
/// first time it's passed to [`Queue::submit`] as a surface texture.
|
||||
///
|
||||
/// [`acquire`]: SwapchainImageSemaphores::acquire
|
||||
/// [`Queue::submit`]: crate::Queue::submit
|
||||
should_wait_for_acquire: bool,
|
||||
|
||||
/// A pool of semaphores for ordering presentation after drawing.
|
||||
///
|
||||
/// The first [`present_index`] semaphores in this vector are:
|
||||
///
|
||||
/// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
|
||||
/// image, and
|
||||
///
|
||||
/// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
|
||||
/// this image, when the submission finishes execution.
|
||||
///
|
||||
/// This vector accumulates one semaphore per submission that writes to this
|
||||
/// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
|
||||
/// requires a semaphore to order it with respect to drawing commands, and
|
||||
/// we can't attach new completion semaphores to a command submission after
|
||||
/// it's been submitted. This means that, at submission time, we must create
|
||||
/// the semaphore we might need if the caller's next action is to enqueue a
|
||||
/// presentation of this image.
|
||||
///
|
||||
/// An alternative strategy would be for presentation to enqueue an empty
|
||||
/// submit, ordered relative to other submits in the usual way, and
|
||||
/// signaling a single presentation semaphore. But we suspect that submits
|
||||
/// are usually expensive enough, and semaphores usually cheap enough, that
|
||||
/// performance-sensitive users will avoid making many submits, so that the
|
||||
/// cost of accumulated semaphores will usually be less than the cost of an
|
||||
/// additional submit.
|
||||
///
|
||||
/// Only the first [`present_index`] semaphores in the vector are actually
|
||||
/// going to be signalled by submitted commands, and need to be waited for
|
||||
/// by the next present call. Any semaphores beyond that index were created
|
||||
/// for prior presents and are simply being retained for recycling.
|
||||
///
|
||||
/// [`present_index`]: SwapchainImageSemaphores::present_index
|
||||
/// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
|
||||
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
|
||||
present: Vec<vk::Semaphore>,
|
||||
|
||||
/// The number of semaphores in [`present`] to be signalled for this submission.
|
||||
///
|
||||
/// [`present`]: SwapchainImageSemaphores::present
|
||||
present_index: usize,
|
||||
|
||||
/// The fence value of the last command submission that wrote to this image.
|
||||
///
|
||||
/// The next time we try to acquire this image, we'll block until
|
||||
/// this submission finishes, proving that [`acquire`] is ready to
|
||||
/// pass to `vkAcquireNextImageKHR` again.
|
||||
///
|
||||
/// [`acquire`]: SwapchainImageSemaphores::acquire
|
||||
previously_used_submission_index: crate::FenceValue,
|
||||
}
|
||||
|
||||
impl SwapchainImageSemaphores {
|
||||
fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
|
||||
Ok(Self {
|
||||
acquire: device.new_binary_semaphore()?,
|
||||
should_wait_for_acquire: true,
|
||||
present: Vec::new(),
|
||||
present_index: 0,
|
||||
previously_used_submission_index: 0,
|
||||
})
|
||||
}
|
||||
|
||||
fn set_used_fence_value(&mut self, value: crate::FenceValue) {
|
||||
self.previously_used_submission_index = value;
|
||||
}
|
||||
|
||||
/// Return the semaphore that commands drawing to this image should wait for, if any.
|
||||
///
|
||||
/// This only returns `Some` once per acquisition; see
|
||||
/// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
|
||||
fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
|
||||
if self.should_wait_for_acquire {
|
||||
self.should_wait_for_acquire = false;
|
||||
Some(self.acquire)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Return a semaphore that a submission that writes to this image should
|
||||
/// signal when it's done.
|
||||
///
|
||||
/// See [`SwapchainImageSemaphores::present`] for details.
|
||||
fn get_submit_signal_semaphore(
|
||||
&mut self,
|
||||
device: &DeviceShared,
|
||||
) -> Result<vk::Semaphore, crate::DeviceError> {
|
||||
// Try to recycle a semaphore we created for a previous presentation.
|
||||
let sem = match self.present.get(self.present_index) {
|
||||
Some(sem) => *sem,
|
||||
None => {
|
||||
let sem = device.new_binary_semaphore()?;
|
||||
self.present.push(sem);
|
||||
sem
|
||||
}
|
||||
};
|
||||
|
||||
self.present_index += 1;
|
||||
|
||||
Ok(sem)
|
||||
}
|
||||
|
||||
/// Return the semaphores that a presentation of this image should wait on.
|
||||
///
|
||||
/// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
|
||||
/// ends this image's acquisition should wait for. See
|
||||
/// [`SwapchainImageSemaphores::present`] for details.
|
||||
///
|
||||
/// Reset `self` to be ready for the next acquisition cycle.
|
||||
///
|
||||
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
|
||||
fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
|
||||
let old_index = self.present_index;
|
||||
|
||||
// Since this marks the end of this acquire/draw/present cycle, take the
|
||||
// opportunity to reset `self` in preparation for the next acquisition.
|
||||
self.present_index = 0;
|
||||
self.should_wait_for_acquire = true;
|
||||
|
||||
&self.present[0..old_index]
|
||||
}
|
||||
|
||||
unsafe fn destroy(&self, device: &ash::Device) {
|
||||
unsafe {
|
||||
device.destroy_semaphore(self.acquire, None);
|
||||
for sem in &self.present {
|
||||
device.destroy_semaphore(*sem, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Swapchain {
|
||||
raw: vk::SwapchainKHR,
|
||||
raw_flags: vk::SwapchainCreateFlagsKHR,
|
||||
@ -157,9 +322,25 @@ struct Swapchain {
|
||||
view_formats: Vec<wgt::TextureFormat>,
|
||||
/// One wait semaphore per swapchain image. This will be associated with the
|
||||
/// surface texture, and later collected during submission.
|
||||
surface_semaphores: Vec<vk::Semaphore>,
|
||||
/// Current semaphore index to use when acquiring a surface.
|
||||
next_surface_index: usize,
|
||||
///
|
||||
/// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
|
||||
/// data into the surface texture, so submit/present can use it.
|
||||
surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
|
||||
/// The index of the next semaphore to use. Ideally we would use the same
|
||||
/// index as the image index, but we need to specify the semaphore as an argument
|
||||
/// to the acquire_next_image function which is what tells us which image to use.
|
||||
next_semaphore_index: usize,
|
||||
}
|
||||
|
||||
impl Swapchain {
|
||||
fn advance_surface_semaphores(&mut self) {
|
||||
let semaphore_count = self.surface_semaphores.len();
|
||||
self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
|
||||
}
|
||||
|
||||
fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
|
||||
self.surface_semaphores[self.next_semaphore_index].clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Surface {
|
||||
@ -173,7 +354,7 @@ pub struct Surface {
|
||||
pub struct SurfaceTexture {
|
||||
index: u32,
|
||||
texture: Texture,
|
||||
wait_semaphore: vk::Semaphore,
|
||||
surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
|
||||
}
|
||||
|
||||
impl Borrow<Texture> for SurfaceTexture {
|
||||
@ -359,18 +540,87 @@ pub struct Device {
|
||||
render_doc: crate::auxil::renderdoc::RenderDoc,
|
||||
}
|
||||
|
||||
/// Semaphores for forcing queue submissions to run in order.
|
||||
///
|
||||
/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
|
||||
/// ordered, then the first submission will finish on the GPU before the second
|
||||
/// submission begins. To get this behavior on Vulkan we need to pass semaphores
|
||||
/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
|
||||
/// and to signal when their execution is done.
|
||||
///
|
||||
/// Normally this can be done with a single semaphore, waited on and then
|
||||
/// signalled for each submission. At any given time there's exactly one
|
||||
/// submission that would signal the semaphore, and exactly one waiting on it,
|
||||
/// as Vulkan requires.
|
||||
///
|
||||
/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
|
||||
/// hang if we use a single semaphore. The workaround is to alternate between
|
||||
/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
|
||||
/// the workaround until, say, Oct 2026.
|
||||
///
|
||||
/// [`wgpu_hal::Queue`]: crate::Queue
|
||||
/// [`submit`]: crate::Queue::submit
|
||||
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
|
||||
/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
|
||||
#[derive(Clone)]
|
||||
struct RelaySemaphores {
|
||||
/// The semaphore the next submission should wait on before beginning
|
||||
/// execution on the GPU. This is `None` for the first submission, which
|
||||
/// should not wait on anything at all.
|
||||
wait: Option<vk::Semaphore>,
|
||||
|
||||
/// The semaphore the next submission should signal when it has finished
|
||||
/// execution on the GPU.
|
||||
signal: vk::Semaphore,
|
||||
}
|
||||
|
||||
impl RelaySemaphores {
|
||||
fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
|
||||
Ok(Self {
|
||||
wait: None,
|
||||
signal: device.new_binary_semaphore()?,
|
||||
})
|
||||
}
|
||||
|
||||
/// Advances the semaphores, returning the semaphores that should be used for a submission.
|
||||
fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
|
||||
let old = self.clone();
|
||||
|
||||
// Build the state for the next submission.
|
||||
match self.wait {
|
||||
None => {
|
||||
// The `old` values describe the first submission to this queue.
|
||||
// The second submission should wait on `old.signal`, and then
|
||||
// signal a new semaphore which we'll create now.
|
||||
self.wait = Some(old.signal);
|
||||
self.signal = device.new_binary_semaphore()?;
|
||||
}
|
||||
Some(ref mut wait) => {
|
||||
// What this submission signals, the next should wait.
|
||||
mem::swap(wait, &mut self.signal);
|
||||
}
|
||||
};
|
||||
|
||||
Ok(old)
|
||||
}
|
||||
|
||||
/// Destroys the semaphores.
|
||||
unsafe fn destroy(&self, device: &ash::Device) {
|
||||
unsafe {
|
||||
if let Some(wait) = self.wait {
|
||||
device.destroy_semaphore(wait, None);
|
||||
}
|
||||
device.destroy_semaphore(self.signal, None);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Queue {
|
||||
raw: vk::Queue,
|
||||
swapchain_fn: khr::swapchain::Device,
|
||||
device: Arc<DeviceShared>,
|
||||
family_index: u32,
|
||||
/// We use a redundant chain of semaphores to pass on the signal
|
||||
/// from submissions to the last present, since it's required by the
|
||||
/// specification.
|
||||
/// It would be correct to use a single semaphore there, but
|
||||
/// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508).
|
||||
relay_semaphores: [vk::Semaphore; 2],
|
||||
relay_index: AtomicIsize,
|
||||
relay_semaphores: Mutex<RelaySemaphores>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -702,42 +952,74 @@ impl crate::Queue for Queue {
|
||||
&self,
|
||||
command_buffers: &[&CommandBuffer],
|
||||
surface_textures: &[&SurfaceTexture],
|
||||
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
|
||||
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
|
||||
) -> Result<(), crate::DeviceError> {
|
||||
let mut fence_raw = vk::Fence::null();
|
||||
|
||||
let mut wait_stage_masks = Vec::new();
|
||||
let mut wait_semaphores = Vec::new();
|
||||
let mut signal_semaphores = ArrayVec::<_, 2>::new();
|
||||
let mut signal_values = ArrayVec::<_, 2>::new();
|
||||
let mut signal_semaphores = Vec::new();
|
||||
let mut signal_values = Vec::new();
|
||||
|
||||
for &surface_texture in surface_textures {
|
||||
// Double check that the same swapchain image isn't being given to us multiple times,
|
||||
// as that will deadlock when we try to lock them all.
|
||||
debug_assert!(
|
||||
{
|
||||
let mut check = HashSet::with_capacity(surface_textures.len());
|
||||
// We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
|
||||
for st in surface_textures {
|
||||
check.insert(Arc::as_ptr(&st.surface_semaphores));
|
||||
}
|
||||
check.len() == surface_textures.len()
|
||||
},
|
||||
"More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
|
||||
);
|
||||
|
||||
let locked_swapchain_semaphores = surface_textures
|
||||
.iter()
|
||||
.map(|st| {
|
||||
st.surface_semaphores
|
||||
.try_lock()
|
||||
.expect("Failed to lock surface semaphore.")
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
for mut swapchain_semaphore in locked_swapchain_semaphores {
|
||||
swapchain_semaphore.set_used_fence_value(signal_value);
|
||||
|
||||
// If we're the first submission to operate on this image, wait on
|
||||
// its acquire semaphore, to make sure the presentation engine is
|
||||
// done with it.
|
||||
if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
|
||||
wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
|
||||
wait_semaphores.push(surface_texture.wait_semaphore);
|
||||
wait_semaphores.push(sem);
|
||||
}
|
||||
|
||||
let old_index = self.relay_index.load(Ordering::Relaxed);
|
||||
// Get a semaphore to signal when we're done writing to this surface
|
||||
// image. Presentation of this image will wait for this.
|
||||
let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
|
||||
signal_semaphores.push(signal_semaphore);
|
||||
signal_values.push(!0);
|
||||
}
|
||||
|
||||
let sem_index = if old_index >= 0 {
|
||||
// In order for submissions to be strictly ordered, we encode a dependency between each submission
|
||||
// using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
|
||||
let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
|
||||
|
||||
if let Some(sem) = semaphore_state.wait {
|
||||
wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
|
||||
wait_semaphores.push(self.relay_semaphores[old_index as usize]);
|
||||
(old_index as usize + 1) % self.relay_semaphores.len()
|
||||
} else {
|
||||
0
|
||||
};
|
||||
wait_semaphores.push(sem);
|
||||
}
|
||||
|
||||
signal_semaphores.push(self.relay_semaphores[sem_index]);
|
||||
signal_semaphores.push(semaphore_state.signal);
|
||||
signal_values.push(!0);
|
||||
|
||||
self.relay_index
|
||||
.store(sem_index as isize, Ordering::Relaxed);
|
||||
|
||||
if let Some((fence, value)) = signal_fence {
|
||||
fence.maintain(&self.device.raw)?;
|
||||
match *fence {
|
||||
// We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
|
||||
signal_fence.maintain(&self.device.raw)?;
|
||||
match *signal_fence {
|
||||
Fence::TimelineSemaphore(raw) => {
|
||||
signal_semaphores.push(raw);
|
||||
signal_values.push(!0);
|
||||
signal_values.push(value);
|
||||
signal_values.push(signal_value);
|
||||
}
|
||||
Fence::FencePool {
|
||||
ref mut active,
|
||||
@ -752,8 +1034,7 @@ impl crate::Queue for Queue {
|
||||
.create_fence(&vk::FenceCreateInfo::default(), None)?
|
||||
},
|
||||
};
|
||||
active.push((value, fence_raw));
|
||||
}
|
||||
active.push((signal_value, fence_raw));
|
||||
}
|
||||
}
|
||||
|
||||
@ -771,7 +1052,7 @@ impl crate::Queue for Queue {
|
||||
|
||||
let mut vk_timeline_info;
|
||||
|
||||
if !signal_values.is_empty() {
|
||||
if self.device.private_caps.timeline_semaphores {
|
||||
vk_timeline_info =
|
||||
vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
|
||||
vk_info = vk_info.push_next(&mut vk_timeline_info);
|
||||
@ -793,19 +1074,14 @@ impl crate::Queue for Queue {
|
||||
) -> Result<(), crate::SurfaceError> {
|
||||
let mut swapchain = surface.swapchain.write();
|
||||
let ssc = swapchain.as_mut().unwrap();
|
||||
let mut swapchain_semaphores = texture.surface_semaphores.lock();
|
||||
|
||||
let swapchains = [ssc.raw];
|
||||
let image_indices = [texture.index];
|
||||
let mut vk_info = vk::PresentInfoKHR::default()
|
||||
let vk_info = vk::PresentInfoKHR::default()
|
||||
.swapchains(&swapchains)
|
||||
.image_indices(&image_indices);
|
||||
|
||||
let old_index = self.relay_index.swap(-1, Ordering::Relaxed);
|
||||
if old_index >= 0 {
|
||||
vk_info = vk_info.wait_semaphores(
|
||||
&self.relay_semaphores[old_index as usize..old_index as usize + 1],
|
||||
);
|
||||
}
|
||||
.image_indices(&image_indices)
|
||||
.wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
|
||||
|
||||
let suboptimal = {
|
||||
profiling::scope!("vkQueuePresentKHR");
|
||||
|
Loading…
Reference in New Issue
Block a user