[hal/vk] Rework Submission and Surface Synchronization (#5681)

Fix two major synchronization issues in `wgpu_val::vulkan`:

- Properly order queue command buffer submissions. Due to Mesa bugs, two semaphores are required even though the Vulkan spec says that only one should be necessary.

- Properly manage surface texture acquisition and presentation:

    - Acquiring a surface texture can return while the presentation engine is still displaying the texture. Applications must wait for a semaphore to be signaled before using the acquired texture.

    - Presenting a surface texture requires a semaphore to ensure that drawing is complete before presentation occurs.

Co-authored-by: Jim Blandy <jimb@red-bean.com>
This commit is contained in:
Connor Fitzgerald 2024-05-30 16:53:34 -04:00 committed by GitHub
parent 9b7a965667
commit c7458638d1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
18 changed files with 698 additions and 303 deletions

View File

@ -1499,7 +1499,7 @@ impl Global {
.raw
.as_ref()
.unwrap()
.submit(&refs, &submit_surface_textures, Some((fence, submit_index)))
.submit(&refs, &submit_surface_textures, (fence, submit_index))
.map_err(DeviceError::from)?;
}

View File

@ -154,17 +154,20 @@ impl Global {
parent_id: surface_id,
});
}
#[cfg(not(feature = "trace"))]
let _ = device;
let fence_guard = device.fence.read();
let fence = fence_guard.as_ref().unwrap();
let suf = A::surface_as_hal(surface.as_ref());
let (texture_id, status) = match unsafe {
suf.unwrap()
.acquire_texture(Some(std::time::Duration::from_millis(
FRAME_TIMEOUT_MS as u64,
)))
suf.unwrap().acquire_texture(
Some(std::time::Duration::from_millis(FRAME_TIMEOUT_MS as u64)),
fence,
)
} {
Ok(Some(ast)) => {
drop(fence_guard);
let texture_desc = wgt::TextureDescriptor {
label: (),
size: wgt::Extent3d {

View File

@ -22,7 +22,6 @@ const MAX_BUNNIES: usize = 1 << 20;
const BUNNY_SIZE: f32 = 0.15 * 256.0;
const GRAVITY: f32 = -9.8 * 100.0;
const MAX_VELOCITY: f32 = 750.0;
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_MAX_LATENCY: u32 = 2;
#[repr(C)]
@ -498,7 +497,7 @@ impl<A: hal::Api> Example<A> {
let mut fence = device.create_fence().unwrap();
let init_cmd = cmd_encoder.end_encoding().unwrap();
queue
.submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value)))
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
.unwrap();
device.wait(&fence, init_fence_value, !0).unwrap();
device.destroy_buffer(staging_buffer);
@ -550,7 +549,7 @@ impl<A: hal::Api> Example<A> {
{
let ctx = &mut self.contexts[self.context_index];
self.queue
.submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value)))
.submit(&[], &[], (&mut ctx.fence, ctx.fence_value))
.unwrap();
}
@ -650,7 +649,13 @@ impl<A: hal::Api> Example<A> {
let ctx = &mut self.contexts[self.context_index];
let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture };
let surface_tex = unsafe {
self.surface
.acquire_texture(None, &ctx.fence)
.unwrap()
.unwrap()
.texture
};
let target_barrier0 = hal::TextureBarrier {
texture: surface_tex.borrow(),
@ -718,7 +723,6 @@ impl<A: hal::Api> Example<A> {
}
ctx.frames_recorded += 1;
let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT;
let target_barrier1 = hal::TextureBarrier {
texture: surface_tex.borrow(),
@ -732,20 +736,18 @@ impl<A: hal::Api> Example<A> {
unsafe {
let cmd_buf = ctx.encoder.end_encoding().unwrap();
let fence_param = if do_fence {
Some((&mut ctx.fence, ctx.fence_value))
} else {
None
};
self.queue
.submit(&[&cmd_buf], &[&surface_tex], fence_param)
.submit(
&[&cmd_buf],
&[&surface_tex],
(&mut ctx.fence, ctx.fence_value),
)
.unwrap();
self.queue.present(&self.surface, surface_tex).unwrap();
ctx.used_cmd_bufs.push(cmd_buf);
ctx.used_views.push(surface_tex_view);
};
if do_fence {
log::debug!("Context switch from {}", self.context_index);
let old_fence_value = ctx.fence_value;
if self.contexts.len() == 1 {
@ -772,7 +774,6 @@ impl<A: hal::Api> Example<A> {
next.fence_value = old_fence_value + 1;
}
}
}
cfg_if::cfg_if! {
// Apple + Metal

View File

@ -156,6 +156,7 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
})
.unwrap()
};
let mut fence = unsafe { od.device.create_fence().unwrap() };
let rp_desc = hal::RenderPassDescriptor {
label: None,
extent: wgt::Extent3d {
@ -183,6 +184,6 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
encoder.begin_render_pass(&rp_desc);
encoder.end_render_pass();
let cmd_buf = encoder.end_encoding().unwrap();
od.queue.submit(&[&cmd_buf], &[], None).unwrap();
od.queue.submit(&[&cmd_buf], &[], (&mut fence, 0)).unwrap();
}
}

View File

@ -13,7 +13,6 @@ use std::{
};
use winit::window::WindowButtons;
const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
const DESIRED_MAX_LATENCY: u32 = 2;
/// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
@ -759,7 +758,7 @@ impl<A: hal::Api> Example<A> {
let mut fence = device.create_fence().unwrap();
let init_cmd = cmd_encoder.end_encoding().unwrap();
queue
.submit(&[&init_cmd], &[], Some((&mut fence, init_fence_value)))
.submit(&[&init_cmd], &[], (&mut fence, init_fence_value))
.unwrap();
device.wait(&fence, init_fence_value, !0).unwrap();
cmd_encoder.reset_all(iter::once(init_cmd));
@ -808,7 +807,13 @@ impl<A: hal::Api> Example<A> {
fn render(&mut self) {
let ctx = &mut self.contexts[self.context_index];
let surface_tex = unsafe { self.surface.acquire_texture(None).unwrap().unwrap().texture };
let surface_tex = unsafe {
self.surface
.acquire_texture(None, &ctx.fence)
.unwrap()
.unwrap()
.texture
};
let target_barrier0 = hal::TextureBarrier {
texture: surface_tex.borrow(),
@ -909,7 +914,6 @@ impl<A: hal::Api> Example<A> {
}
ctx.frames_recorded += 1;
let do_fence = ctx.frames_recorded > COMMAND_BUFFER_PER_CONTEXT;
let target_barrier1 = hal::TextureBarrier {
texture: surface_tex.borrow(),
@ -959,20 +963,18 @@ impl<A: hal::Api> Example<A> {
unsafe {
let cmd_buf = ctx.encoder.end_encoding().unwrap();
let fence_param = if do_fence {
Some((&mut ctx.fence, ctx.fence_value))
} else {
None
};
self.queue
.submit(&[&cmd_buf], &[&surface_tex], fence_param)
.submit(
&[&cmd_buf],
&[&surface_tex],
(&mut ctx.fence, ctx.fence_value),
)
.unwrap();
self.queue.present(&self.surface, surface_tex).unwrap();
ctx.used_cmd_bufs.push(cmd_buf);
ctx.used_views.push(surface_tex_view);
};
if do_fence {
log::info!("Context switch from {}", self.context_index);
let old_fence_value = ctx.fence_value;
if self.contexts.len() == 1 {
@ -998,14 +1000,13 @@ impl<A: hal::Api> Example<A> {
}
next.fence_value = old_fence_value + 1;
}
}
fn exit(mut self) {
unsafe {
{
let ctx = &mut self.contexts[self.context_index];
self.queue
.submit(&[], &[], Some((&mut ctx.fence, ctx.fence_value)))
.submit(&[], &[], (&mut ctx.fence, ctx.fence_value))
.unwrap();
}

View File

@ -857,6 +857,7 @@ impl crate::Surface for Surface {
unsafe fn acquire_texture(
&self,
timeout: Option<std::time::Duration>,
_fence: &Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> {
let mut swapchain = self.swap_chain.write();
let sc = swapchain.as_mut().unwrap();
@ -895,7 +896,7 @@ impl crate::Queue for Queue {
&self,
command_buffers: &[&CommandBuffer],
_surface_textures: &[&Texture],
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
let mut temp_lists = self.temp_lists.lock();
temp_lists.clear();
@ -908,11 +909,9 @@ impl crate::Queue for Queue {
self.raw.execute_command_lists(&temp_lists);
}
if let Some((fence, value)) = signal_fence {
self.raw
.signal(&fence.raw, value)
.signal(&signal_fence.raw, signal_value)
.into_device_result("Signal fence")?;
}
// Note the lack of synchronization here between the main Direct queue
// and the dedicated presentation queue. This is automatically handled

View File

@ -75,6 +75,7 @@ impl crate::Surface for Context {
unsafe fn acquire_texture(
&self,
timeout: Option<std::time::Duration>,
fence: &Resource,
) -> Result<Option<crate::AcquiredSurfaceTexture<Api>>, crate::SurfaceError> {
Ok(None)
}
@ -114,7 +115,7 @@ impl crate::Queue for Context {
&self,
command_buffers: &[&Resource],
surface_textures: &[&Resource],
signal_fence: Option<(&mut Resource, crate::FenceValue)>,
signal_fence: (&mut Resource, crate::FenceValue),
) -> DeviceResult<()> {
Ok(())
}

View File

@ -1432,6 +1432,7 @@ impl crate::Surface for Surface {
unsafe fn acquire_texture(
&self,
_timeout_ms: Option<Duration>, //TODO
_fence: &super::Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
let swapchain = self.swapchain.read();
let sc = swapchain.as_ref().unwrap();

View File

@ -1740,7 +1740,7 @@ impl crate::Queue for super::Queue {
&self,
command_buffers: &[&super::CommandBuffer],
_surface_textures: &[&super::Texture],
signal_fence: Option<(&mut super::Fence, crate::FenceValue)>,
(signal_fence, signal_value): (&mut super::Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
let shared = Arc::clone(&self.shared);
let gl = &shared.context.lock();
@ -1774,12 +1774,10 @@ impl crate::Queue for super::Queue {
}
}
if let Some((fence, value)) = signal_fence {
fence.maintain(gl);
signal_fence.maintain(gl);
let sync = unsafe { gl.fence_sync(glow::SYNC_GPU_COMMANDS_COMPLETE, 0) }
.map_err(|_| crate::DeviceError::OutOfMemory)?;
fence.pending.push((value, sync));
}
signal_fence.pending.push((signal_value, sync));
Ok(())
}

View File

@ -427,6 +427,7 @@ impl crate::Surface for Surface {
unsafe fn acquire_texture(
&self,
_timeout_ms: Option<std::time::Duration>, //TODO
_fence: &super::Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
let swapchain = self.swapchain.read();
let sc = swapchain.as_ref().unwrap();

View File

@ -798,6 +798,7 @@ impl crate::Surface for Surface {
unsafe fn acquire_texture(
&self,
_timeout_ms: Option<Duration>,
_fence: &super::Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
let swapchain = self.swapchain.read();
let sc = swapchain.as_ref().unwrap();

View File

@ -459,44 +459,101 @@ pub trait Instance: Sized + WasmNotSendSync {
pub trait Surface: WasmNotSendSync {
type A: Api;
/// Configures the surface to use the given device.
/// Configure `self` to use `device`.
///
/// # Safety
///
/// - All gpu work that uses the surface must have been completed.
/// - All GPU work using `self` must have been completed.
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All surfaces created using other devices must have been unconfigured before this call.
/// - The surface `self` must not currently be configured to use any other [`Device`].
unsafe fn configure(
&self,
device: &<Self::A as Api>::Device,
config: &SurfaceConfiguration,
) -> Result<(), SurfaceError>;
/// Unconfigures the surface on the given device.
/// Unconfigure `self` on `device`.
///
/// # Safety
///
/// - All gpu work that uses the surface must have been completed.
/// - All GPU work that uses `surface` must have been completed.
/// - All [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - All [`Api::TextureView`]s derived from the [`AcquiredSurfaceTexture`]s must have been destroyed.
/// - The surface must have been configured on the given device.
/// - The surface `self` must have been configured on `device`.
unsafe fn unconfigure(&self, device: &<Self::A as Api>::Device);
/// Returns the next texture to be presented by the swapchain for drawing
/// Return the next texture to be presented by `self`, for the caller to draw on.
///
/// A `timeout` of `None` means to wait indefinitely, with no timeout.
/// On success, return an [`AcquiredSurfaceTexture`] representing the
/// texture into which the caller should draw the image to be displayed on
/// `self`.
///
/// If `timeout` elapses before `self` has a texture ready to be acquired,
/// return `Ok(None)`. If `timeout` is `None`, wait indefinitely, with no
/// timeout.
///
/// # Using an [`AcquiredSurfaceTexture`]
///
/// On success, this function returns an [`AcquiredSurfaceTexture`] whose
/// [`texture`] field is a [`SurfaceTexture`] from which the caller can
/// [`borrow`] a [`Texture`] to draw on. The [`AcquiredSurfaceTexture`] also
/// carries some metadata about that [`SurfaceTexture`].
///
/// All calls to [`Queue::submit`] that draw on that [`Texture`] must also
/// include the [`SurfaceTexture`] in the `surface_textures` argument.
///
/// When you are done drawing on the texture, you can display it on `self`
/// by passing the [`SurfaceTexture`] and `self` to [`Queue::present`].
///
/// If you do not wish to display the texture, you must pass the
/// [`SurfaceTexture`] to [`self.discard_texture`], so that it can be reused
/// by future acquisitions.
///
/// # Portability
///
/// Some backends can't support a timeout when acquiring a texture and
/// the timeout will be ignored.
/// Some backends can't support a timeout when acquiring a texture. On these
/// backends, `timeout` is ignored.
///
/// Returns `None` on timing out.
/// # Safety
///
/// - The surface `self` must currently be configured on some [`Device`].
///
/// - The `fence` argument must be the same [`Fence`] passed to all calls to
/// [`Queue::submit`] that used [`Texture`]s acquired from this surface.
///
/// - You may only have one texture acquired from `self` at a time. When
/// `acquire_texture` returns `Ok(Some(ast))`, you must pass the returned
/// [`SurfaceTexture`] `ast.texture` to either [`Queue::present`] or
/// [`Surface::discard_texture`] before calling `acquire_texture` again.
///
/// [`texture`]: AcquiredSurfaceTexture::texture
/// [`SurfaceTexture`]: Api::SurfaceTexture
/// [`borrow`]: std::borrow::Borrow::borrow
/// [`Texture`]: Api::Texture
/// [`Fence`]: Api::Fence
/// [`self.discard_texture`]: Surface::discard_texture
unsafe fn acquire_texture(
&self,
timeout: Option<std::time::Duration>,
fence: &<Self::A as Api>::Fence,
) -> Result<Option<AcquiredSurfaceTexture<Self::A>>, SurfaceError>;
/// Relinquish an acquired texture without presenting it.
///
/// After this call, the texture underlying [`SurfaceTexture`] may be
/// returned by subsequent calls to [`self.acquire_texture`].
///
/// # Safety
///
/// - The surface `self` must currently be configured on some [`Device`].
///
/// - `texture` must be a [`SurfaceTexture`] returned by a call to
/// [`self.acquire_texture`] that has not yet been passed to
/// [`Queue::present`].
///
/// [`SurfaceTexture`]: Api::SurfaceTexture
/// [`self.acquire_texture`]: Surface::acquire_texture
unsafe fn discard_texture(&self, texture: <Self::A as Api>::SurfaceTexture);
}
@ -762,17 +819,21 @@ pub trait Queue: WasmNotSendSync {
/// Submit `command_buffers` for execution on GPU.
///
/// If `signal_fence` is `Some(fence, value)`, update `fence` to `value`
/// when the operation is complete. See [`Fence`] for details.
/// Update `fence` to `value` when the operation is complete. See
/// [`Fence`] for details.
///
/// If two calls to `submit` on a single `Queue` occur in a particular order
/// (that is, they happen on the same thread, or on two threads that have
/// synchronized to establish an ordering), then the first submission's
/// commands all complete execution before any of the second submission's
/// commands begin. All results produced by one submission are visible to
/// the next.
/// A `wgpu_hal` queue is "single threaded": all command buffers are
/// executed in the order they're submitted, with each buffer able to see
/// previous buffers' results. Specifically:
///
/// Within a submission, command buffers execute in the order in which they
/// - If two calls to `submit` on a single `Queue` occur in a particular
/// order (that is, they happen on the same thread, or on two threads that
/// have synchronized to establish an ordering), then the first
/// submission's commands all complete execution before any of the second
/// submission's commands begin. All results produced by one submission
/// are visible to the next.
///
/// - Within a submission, command buffers execute in the order in which they
/// appear in `command_buffers`. All results produced by one buffer are
/// visible to the next.
///
@ -803,10 +864,16 @@ pub trait Queue: WasmNotSendSync {
/// - Every [`SurfaceTexture`][st] that any command in `command_buffers`
/// writes to must appear in the `surface_textures` argument.
///
/// - No [`SurfaceTexture`][st] may appear in the `surface_textures`
/// argument more than once.
///
/// - Each [`SurfaceTexture`][st] in `surface_textures` must be configured
/// for use with the [`Device`][d] associated with this [`Queue`],
/// typically by calling [`Surface::configure`].
///
/// - All calls to this function that include a given [`SurfaceTexture`][st]
/// in `surface_textures` must use the same [`Fence`].
///
/// [`Fence`]: Api::Fence
/// [cb]: Api::CommandBuffer
/// [ce]: Api::CommandEncoder
@ -819,7 +886,7 @@ pub trait Queue: WasmNotSendSync {
&self,
command_buffers: &[&<Self::A as Api>::CommandBuffer],
surface_textures: &[&<Self::A as Api>::SurfaceTexture],
signal_fence: Option<(&mut <Self::A as Api>::Fence, FenceValue)>,
signal_fence: (&mut <Self::A as Api>::Fence, FenceValue),
) -> Result<(), DeviceError>;
unsafe fn present(
&self,

View File

@ -377,14 +377,13 @@ impl crate::Queue for Queue {
&self,
command_buffers: &[&CommandBuffer],
_surface_textures: &[&SurfaceTexture],
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
objc::rc::autoreleasepool(|| {
let extra_command_buffer = match signal_fence {
Some((fence, value)) => {
let completed_value = Arc::clone(&fence.completed_value);
let extra_command_buffer = {
let completed_value = Arc::clone(&signal_fence.completed_value);
let block = block::ConcreteBlock::new(move |_cmd_buf| {
completed_value.store(value, atomic::Ordering::Release);
completed_value.store(signal_value, atomic::Ordering::Release);
})
.copy();
@ -400,15 +399,15 @@ impl crate::Queue for Queue {
raw.set_label("(wgpu internal) Signal");
raw.add_completed_handler(&block);
fence.maintain();
fence.pending_command_buffers.push((value, raw.to_owned()));
signal_fence.maintain();
signal_fence
.pending_command_buffers
.push((signal_value, raw.to_owned()));
// only return an extra one if it's extra
match command_buffers.last() {
Some(_) => None,
None => Some(raw),
}
}
None => None,
};
for cmd_buffer in command_buffers {

View File

@ -242,6 +242,7 @@ impl crate::Surface for super::Surface {
unsafe fn acquire_texture(
&self,
_timeout_ms: Option<std::time::Duration>, //TODO
_fence: &super::Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
let render_layer = self.render_layer.lock();
let (drawable, texture) = match autoreleasepool(|| {

View File

@ -3,11 +3,7 @@ use super::conv;
use ash::{amd, ext, khr, vk};
use parking_lot::Mutex;
use std::{
collections::BTreeMap,
ffi::CStr,
sync::{atomic::AtomicIsize, Arc},
};
use std::{collections::BTreeMap, ffi::CStr, sync::Arc};
fn depth_stencil_required_flags() -> vk::FormatFeatureFlags {
vk::FormatFeatureFlags::SAMPLED_IMAGE | vk::FormatFeatureFlags::DEPTH_STENCIL_ATTACHMENT
@ -1783,21 +1779,15 @@ impl super::Adapter {
render_passes: Mutex::new(Default::default()),
framebuffers: Mutex::new(Default::default()),
});
let mut relay_semaphores = [vk::Semaphore::null(); 2];
for sem in relay_semaphores.iter_mut() {
unsafe {
*sem = shared
.raw
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)?
};
}
let relay_semaphores = super::RelaySemaphores::new(&shared)?;
let queue = super::Queue {
raw: raw_queue,
swapchain_fn,
device: Arc::clone(&shared),
family_index,
relay_semaphores,
relay_index: AtomicIsize::new(-1),
relay_semaphores: Mutex::new(relay_semaphores),
};
let mem_allocator = {

View File

@ -612,17 +612,16 @@ impl super::Device {
let images =
unsafe { functor.get_swapchain_images(raw) }.map_err(crate::DeviceError::from)?;
// NOTE: It's important that we define at least images.len() + 1 wait
// NOTE: It's important that we define at least images.len() wait
// semaphores, since we prospectively need to provide the call to
// acquire the next image with an unsignaled semaphore.
let surface_semaphores = (0..images.len() + 1)
.map(|_| unsafe {
self.shared
.raw
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
let surface_semaphores = (0..=images.len())
.map(|_| {
super::SwapchainImageSemaphores::new(&self.shared)
.map(Mutex::new)
.map(Arc::new)
})
.collect::<Result<Vec<_>, _>>()
.map_err(crate::DeviceError::from)?;
.collect::<Result<Vec<_>, _>>()?;
Ok(super::Swapchain {
raw,
@ -633,7 +632,7 @@ impl super::Device {
config: config.clone(),
view_formats: wgt_view_formats,
surface_semaphores,
next_surface_index: 0,
next_semaphore_index: 0,
})
}
@ -836,9 +835,12 @@ impl crate::Device for super::Device {
unsafe fn exit(self, queue: super::Queue) {
unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) };
unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) };
for &sem in queue.relay_semaphores.iter() {
unsafe { self.shared.raw.destroy_semaphore(sem, None) };
}
unsafe {
queue
.relay_semaphores
.into_inner()
.destroy(&self.shared.raw)
};
unsafe { self.shared.free_resources() };
}
@ -2055,54 +2057,7 @@ impl crate::Device for super::Device {
timeout_ms: u32,
) -> Result<bool, crate::DeviceError> {
let timeout_ns = timeout_ms as u64 * super::MILLIS_TO_NANOS;
match *fence {
super::Fence::TimelineSemaphore(raw) => {
let semaphores = [raw];
let values = [wait_value];
let vk_info = vk::SemaphoreWaitInfo::default()
.semaphores(&semaphores)
.values(&values);
let result = match self.shared.extension_fns.timeline_semaphore {
Some(super::ExtensionFn::Extension(ref ext)) => unsafe {
ext.wait_semaphores(&vk_info, timeout_ns)
},
Some(super::ExtensionFn::Promoted) => unsafe {
self.shared.raw.wait_semaphores(&vk_info, timeout_ns)
},
None => unreachable!(),
};
match result {
Ok(()) => Ok(true),
Err(vk::Result::TIMEOUT) => Ok(false),
Err(other) => Err(other.into()),
}
}
super::Fence::FencePool {
last_completed,
ref active,
free: _,
} => {
if wait_value <= last_completed {
Ok(true)
} else {
match active.iter().find(|&&(value, _)| value >= wait_value) {
Some(&(_, raw)) => {
match unsafe {
self.shared.raw.wait_for_fences(&[raw], true, timeout_ns)
} {
Ok(()) => Ok(true),
Err(vk::Result::TIMEOUT) => Ok(false),
Err(other) => Err(other.into()),
}
}
None => {
log::error!("No signals reached value {}", wait_value);
Err(crate::DeviceError::Lost)
}
}
}
}
}
self.shared.wait_for_fence(fence, wait_value, timeout_ns)
}
unsafe fn start_capture(&self) -> bool {
@ -2364,6 +2319,71 @@ impl crate::Device for super::Device {
}
}
impl super::DeviceShared {
pub(super) fn new_binary_semaphore(&self) -> Result<vk::Semaphore, crate::DeviceError> {
unsafe {
self.raw
.create_semaphore(&vk::SemaphoreCreateInfo::default(), None)
.map_err(crate::DeviceError::from)
}
}
pub(super) fn wait_for_fence(
&self,
fence: &super::Fence,
wait_value: crate::FenceValue,
timeout_ns: u64,
) -> Result<bool, crate::DeviceError> {
profiling::scope!("Device::wait");
match *fence {
super::Fence::TimelineSemaphore(raw) => {
let semaphores = [raw];
let values = [wait_value];
let vk_info = vk::SemaphoreWaitInfo::default()
.semaphores(&semaphores)
.values(&values);
let result = match self.extension_fns.timeline_semaphore {
Some(super::ExtensionFn::Extension(ref ext)) => unsafe {
ext.wait_semaphores(&vk_info, timeout_ns)
},
Some(super::ExtensionFn::Promoted) => unsafe {
self.raw.wait_semaphores(&vk_info, timeout_ns)
},
None => unreachable!(),
};
match result {
Ok(()) => Ok(true),
Err(vk::Result::TIMEOUT) => Ok(false),
Err(other) => Err(other.into()),
}
}
super::Fence::FencePool {
last_completed,
ref active,
free: _,
} => {
if wait_value <= last_completed {
Ok(true)
} else {
match active.iter().find(|&&(value, _)| value >= wait_value) {
Some(&(_, raw)) => {
match unsafe { self.raw.wait_for_fences(&[raw], true, timeout_ns) } {
Ok(()) => Ok(true),
Err(vk::Result::TIMEOUT) => Ok(false),
Err(other) => Err(other.into()),
}
}
None => {
log::error!("No signals reached value {}", wait_value);
Err(crate::DeviceError::Lost)
}
}
}
}
}
}
}
impl From<gpu_alloc::AllocationError> for crate::DeviceError {
fn from(error: gpu_alloc::AllocationError) -> Self {
use gpu_alloc::AllocationError as Ae;

View File

@ -164,10 +164,14 @@ impl super::Swapchain {
let _ = unsafe { device.device_wait_idle() };
};
// We cannot take this by value, as the function returns `self`.
for semaphore in self.surface_semaphores.drain(..) {
unsafe {
device.destroy_semaphore(semaphore, None);
}
let arc_removed = Arc::into_inner(semaphore).expect(
"Trying to destroy a SurfaceSemaphores that is still in use by a SurfaceTexture",
);
let mutex_removed = arc_removed.into_inner();
unsafe { mutex_removed.destroy(device) };
}
self
@ -966,9 +970,10 @@ impl crate::Surface for super::Surface {
unsafe fn acquire_texture(
&self,
timeout: Option<std::time::Duration>,
fence: &super::Fence,
) -> Result<Option<crate::AcquiredSurfaceTexture<super::Api>>, crate::SurfaceError> {
let mut swapchain = self.swapchain.write();
let sc = swapchain.as_mut().unwrap();
let swapchain = swapchain.as_mut().unwrap();
let mut timeout_ns = match timeout {
Some(duration) => duration.as_nanos() as u64,
@ -988,12 +993,40 @@ impl crate::Surface for super::Surface {
timeout_ns = u64::MAX;
}
let wait_semaphore = sc.surface_semaphores[sc.next_surface_index];
let swapchain_semaphores_arc = swapchain.get_surface_semaphores();
// Nothing should be using this, so we don't block, but panic if we fail to lock.
let locked_swapchain_semaphores = swapchain_semaphores_arc
.try_lock()
.expect("Failed to lock a SwapchainSemaphores.");
// Wait for all commands writing to the previously acquired image to
// complete.
//
// Almost all the steps in the usual acquire-draw-present flow are
// asynchronous: they get something started on the presentation engine
// or the GPU, but on the CPU, control returns immediately. Without some
// sort of intervention, the CPU could crank out frames much faster than
// the presentation engine can display them.
//
// This is the intervention: if any submissions drew on this image, and
// thus waited for `locked_swapchain_semaphores.acquire`, wait for all
// of them to finish, thus ensuring that it's okay to pass `acquire` to
// `vkAcquireNextImageKHR` again.
swapchain.device.wait_for_fence(
fence,
locked_swapchain_semaphores.previously_used_submission_index,
timeout_ns,
)?;
// will block if no image is available
let (index, suboptimal) = match unsafe {
sc.functor
.acquire_next_image(sc.raw, timeout_ns, wait_semaphore, vk::Fence::null())
profiling::scope!("vkAcquireNextImageKHR");
swapchain.functor.acquire_next_image(
swapchain.raw,
timeout_ns,
locked_swapchain_semaphores.acquire,
vk::Fence::null(),
)
} {
// We treat `VK_SUBOPTIMAL_KHR` as `VK_SUCCESS` on Android.
// See the comment in `Queue::present`.
@ -1013,16 +1046,18 @@ impl crate::Surface for super::Surface {
}
};
sc.next_surface_index += 1;
sc.next_surface_index %= sc.surface_semaphores.len();
drop(locked_swapchain_semaphores);
// We only advance the surface semaphores if we successfully acquired an image, otherwise
// we should try to re-acquire using the same semaphores.
swapchain.advance_surface_semaphores();
// special case for Intel Vulkan returning bizarre values (ugh)
if sc.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 {
if swapchain.device.vendor_id == crate::auxil::db::intel::VENDOR && index > 0x100 {
return Err(crate::SurfaceError::Outdated);
}
// https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkRenderPassBeginInfo.html#VUID-VkRenderPassBeginInfo-framebuffer-03209
let raw_flags = if sc
let raw_flags = if swapchain
.raw_flags
.contains(vk::SwapchainCreateFlagsKHR::MUTABLE_FORMAT)
{
@ -1034,20 +1069,20 @@ impl crate::Surface for super::Surface {
let texture = super::SurfaceTexture {
index,
texture: super::Texture {
raw: sc.images[index as usize],
raw: swapchain.images[index as usize],
drop_guard: None,
block: None,
usage: sc.config.usage,
format: sc.config.format,
usage: swapchain.config.usage,
format: swapchain.config.format,
raw_flags,
copy_size: crate::CopyExtent {
width: sc.config.extent.width,
height: sc.config.extent.height,
width: swapchain.config.extent.width,
height: swapchain.config.extent.height,
depth: 1,
},
view_formats: sc.view_formats.clone(),
view_formats: swapchain.view_formats.clone(),
},
wait_semaphore,
surface_semaphores: swapchain_semaphores_arc,
};
Ok(Some(crate::AcquiredSurfaceTexture {
texture,

View File

@ -33,13 +33,11 @@ mod instance;
use std::{
borrow::Borrow,
collections::HashSet,
ffi::{CStr, CString},
fmt,
fmt, mem,
num::NonZeroU32,
sync::{
atomic::{AtomicIsize, Ordering},
Arc,
},
sync::Arc,
};
use arrayvec::ArrayVec;
@ -147,6 +145,173 @@ pub struct Instance {
shared: Arc<InstanceShared>,
}
/// The semaphores needed to use one image in a swapchain.
#[derive(Debug)]
struct SwapchainImageSemaphores {
/// A semaphore that is signaled when this image is safe for us to modify.
///
/// When [`vkAcquireNextImageKHR`] returns the index of the next swapchain
/// image that we should use, that image may actually still be in use by the
/// presentation engine, and is not yet safe to modify. However, that
/// function does accept a semaphore that it will signal when the image is
/// indeed safe to begin messing with.
///
/// This semaphore is:
///
/// - waited for by the first queue submission to operate on this image
/// since it was acquired, and
///
/// - signaled by [`vkAcquireNextImageKHR`] when the acquired image is ready
/// for us to use.
///
/// [`vkAcquireNextImageKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkAcquireNextImageKHR
acquire: vk::Semaphore,
/// True if the next command submission operating on this image should wait
/// for [`acquire`].
///
/// We must wait for `acquire` before drawing to this swapchain image, but
/// because `wgpu-hal` queue submissions are always strongly ordered, only
/// the first submission that works with a swapchain image actually needs to
/// wait. We set this flag when this image is acquired, and clear it the
/// first time it's passed to [`Queue::submit`] as a surface texture.
///
/// [`acquire`]: SwapchainImageSemaphores::acquire
/// [`Queue::submit`]: crate::Queue::submit
should_wait_for_acquire: bool,
/// A pool of semaphores for ordering presentation after drawing.
///
/// The first [`present_index`] semaphores in this vector are:
///
/// - all waited on by the call to [`vkQueuePresentKHR`] that presents this
/// image, and
///
/// - each signaled by some [`vkQueueSubmit`] queue submission that draws to
/// this image, when the submission finishes execution.
///
/// This vector accumulates one semaphore per submission that writes to this
/// image. This is awkward, but hard to avoid: [`vkQueuePresentKHR`]
/// requires a semaphore to order it with respect to drawing commands, and
/// we can't attach new completion semaphores to a command submission after
/// it's been submitted. This means that, at submission time, we must create
/// the semaphore we might need if the caller's next action is to enqueue a
/// presentation of this image.
///
/// An alternative strategy would be for presentation to enqueue an empty
/// submit, ordered relative to other submits in the usual way, and
/// signaling a single presentation semaphore. But we suspect that submits
/// are usually expensive enough, and semaphores usually cheap enough, that
/// performance-sensitive users will avoid making many submits, so that the
/// cost of accumulated semaphores will usually be less than the cost of an
/// additional submit.
///
/// Only the first [`present_index`] semaphores in the vector are actually
/// going to be signalled by submitted commands, and need to be waited for
/// by the next present call. Any semaphores beyond that index were created
/// for prior presents and are simply being retained for recycling.
///
/// [`present_index`]: SwapchainImageSemaphores::present_index
/// [`vkQueuePresentKHR`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueuePresentKHR
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
present: Vec<vk::Semaphore>,
/// The number of semaphores in [`present`] to be signalled for this submission.
///
/// [`present`]: SwapchainImageSemaphores::present
present_index: usize,
/// The fence value of the last command submission that wrote to this image.
///
/// The next time we try to acquire this image, we'll block until
/// this submission finishes, proving that [`acquire`] is ready to
/// pass to `vkAcquireNextImageKHR` again.
///
/// [`acquire`]: SwapchainImageSemaphores::acquire
previously_used_submission_index: crate::FenceValue,
}
impl SwapchainImageSemaphores {
fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
Ok(Self {
acquire: device.new_binary_semaphore()?,
should_wait_for_acquire: true,
present: Vec::new(),
present_index: 0,
previously_used_submission_index: 0,
})
}
fn set_used_fence_value(&mut self, value: crate::FenceValue) {
self.previously_used_submission_index = value;
}
/// Return the semaphore that commands drawing to this image should wait for, if any.
///
/// This only returns `Some` once per acquisition; see
/// [`SwapchainImageSemaphores::should_wait_for_acquire`] for details.
fn get_acquire_wait_semaphore(&mut self) -> Option<vk::Semaphore> {
if self.should_wait_for_acquire {
self.should_wait_for_acquire = false;
Some(self.acquire)
} else {
None
}
}
/// Return a semaphore that a submission that writes to this image should
/// signal when it's done.
///
/// See [`SwapchainImageSemaphores::present`] for details.
fn get_submit_signal_semaphore(
&mut self,
device: &DeviceShared,
) -> Result<vk::Semaphore, crate::DeviceError> {
// Try to recycle a semaphore we created for a previous presentation.
let sem = match self.present.get(self.present_index) {
Some(sem) => *sem,
None => {
let sem = device.new_binary_semaphore()?;
self.present.push(sem);
sem
}
};
self.present_index += 1;
Ok(sem)
}
/// Return the semaphores that a presentation of this image should wait on.
///
/// Return a slice of semaphores that the call to [`vkQueueSubmit`] that
/// ends this image's acquisition should wait for. See
/// [`SwapchainImageSemaphores::present`] for details.
///
/// Reset `self` to be ready for the next acquisition cycle.
///
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
fn get_present_wait_semaphores(&mut self) -> &[vk::Semaphore] {
let old_index = self.present_index;
// Since this marks the end of this acquire/draw/present cycle, take the
// opportunity to reset `self` in preparation for the next acquisition.
self.present_index = 0;
self.should_wait_for_acquire = true;
&self.present[0..old_index]
}
unsafe fn destroy(&self, device: &ash::Device) {
unsafe {
device.destroy_semaphore(self.acquire, None);
for sem in &self.present {
device.destroy_semaphore(*sem, None);
}
}
}
}
struct Swapchain {
raw: vk::SwapchainKHR,
raw_flags: vk::SwapchainCreateFlagsKHR,
@ -157,9 +322,25 @@ struct Swapchain {
view_formats: Vec<wgt::TextureFormat>,
/// One wait semaphore per swapchain image. This will be associated with the
/// surface texture, and later collected during submission.
surface_semaphores: Vec<vk::Semaphore>,
/// Current semaphore index to use when acquiring a surface.
next_surface_index: usize,
///
/// We need this to be `Arc<Mutex<>>` because we need to be able to pass this
/// data into the surface texture, so submit/present can use it.
surface_semaphores: Vec<Arc<Mutex<SwapchainImageSemaphores>>>,
/// The index of the next semaphore to use. Ideally we would use the same
/// index as the image index, but we need to specify the semaphore as an argument
/// to the acquire_next_image function which is what tells us which image to use.
next_semaphore_index: usize,
}
impl Swapchain {
fn advance_surface_semaphores(&mut self) {
let semaphore_count = self.surface_semaphores.len();
self.next_semaphore_index = (self.next_semaphore_index + 1) % semaphore_count;
}
fn get_surface_semaphores(&self) -> Arc<Mutex<SwapchainImageSemaphores>> {
self.surface_semaphores[self.next_semaphore_index].clone()
}
}
pub struct Surface {
@ -173,7 +354,7 @@ pub struct Surface {
pub struct SurfaceTexture {
index: u32,
texture: Texture,
wait_semaphore: vk::Semaphore,
surface_semaphores: Arc<Mutex<SwapchainImageSemaphores>>,
}
impl Borrow<Texture> for SurfaceTexture {
@ -359,18 +540,87 @@ pub struct Device {
render_doc: crate::auxil::renderdoc::RenderDoc,
}
/// Semaphores for forcing queue submissions to run in order.
///
/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
/// ordered, then the first submission will finish on the GPU before the second
/// submission begins. To get this behavior on Vulkan we need to pass semaphores
/// to [`vkQueueSubmit`] for the commands to wait on before beginning execution,
/// and to signal when their execution is done.
///
/// Normally this can be done with a single semaphore, waited on and then
/// signalled for each submission. At any given time there's exactly one
/// submission that would signal the semaphore, and exactly one waiting on it,
/// as Vulkan requires.
///
/// However, as of Oct 2021, bug [#5508] in the Mesa ANV drivers caused them to
/// hang if we use a single semaphore. The workaround is to alternate between
/// two semaphores. The bug has been fixed in Mesa, but we should probably keep
/// the workaround until, say, Oct 2026.
///
/// [`wgpu_hal::Queue`]: crate::Queue
/// [`submit`]: crate::Queue::submit
/// [`vkQueueSubmit`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#vkQueueSubmit
/// [#5508]: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508
#[derive(Clone)]
struct RelaySemaphores {
/// The semaphore the next submission should wait on before beginning
/// execution on the GPU. This is `None` for the first submission, which
/// should not wait on anything at all.
wait: Option<vk::Semaphore>,
/// The semaphore the next submission should signal when it has finished
/// execution on the GPU.
signal: vk::Semaphore,
}
impl RelaySemaphores {
fn new(device: &DeviceShared) -> Result<Self, crate::DeviceError> {
Ok(Self {
wait: None,
signal: device.new_binary_semaphore()?,
})
}
/// Advances the semaphores, returning the semaphores that should be used for a submission.
fn advance(&mut self, device: &DeviceShared) -> Result<Self, crate::DeviceError> {
let old = self.clone();
// Build the state for the next submission.
match self.wait {
None => {
// The `old` values describe the first submission to this queue.
// The second submission should wait on `old.signal`, and then
// signal a new semaphore which we'll create now.
self.wait = Some(old.signal);
self.signal = device.new_binary_semaphore()?;
}
Some(ref mut wait) => {
// What this submission signals, the next should wait.
mem::swap(wait, &mut self.signal);
}
};
Ok(old)
}
/// Destroys the semaphores.
unsafe fn destroy(&self, device: &ash::Device) {
unsafe {
if let Some(wait) = self.wait {
device.destroy_semaphore(wait, None);
}
device.destroy_semaphore(self.signal, None);
}
}
}
pub struct Queue {
raw: vk::Queue,
swapchain_fn: khr::swapchain::Device,
device: Arc<DeviceShared>,
family_index: u32,
/// We use a redundant chain of semaphores to pass on the signal
/// from submissions to the last present, since it's required by the
/// specification.
/// It would be correct to use a single semaphore there, but
/// [Intel hangs in `anv_queue_finish`](https://gitlab.freedesktop.org/mesa/mesa/-/issues/5508).
relay_semaphores: [vk::Semaphore; 2],
relay_index: AtomicIsize,
relay_semaphores: Mutex<RelaySemaphores>,
}
#[derive(Debug)]
@ -702,42 +952,74 @@ impl crate::Queue for Queue {
&self,
command_buffers: &[&CommandBuffer],
surface_textures: &[&SurfaceTexture],
signal_fence: Option<(&mut Fence, crate::FenceValue)>,
(signal_fence, signal_value): (&mut Fence, crate::FenceValue),
) -> Result<(), crate::DeviceError> {
let mut fence_raw = vk::Fence::null();
let mut wait_stage_masks = Vec::new();
let mut wait_semaphores = Vec::new();
let mut signal_semaphores = ArrayVec::<_, 2>::new();
let mut signal_values = ArrayVec::<_, 2>::new();
let mut signal_semaphores = Vec::new();
let mut signal_values = Vec::new();
for &surface_texture in surface_textures {
// Double check that the same swapchain image isn't being given to us multiple times,
// as that will deadlock when we try to lock them all.
debug_assert!(
{
let mut check = HashSet::with_capacity(surface_textures.len());
// We compare the Arcs by pointer, as Eq isn't well defined for SurfaceSemaphores.
for st in surface_textures {
check.insert(Arc::as_ptr(&st.surface_semaphores));
}
check.len() == surface_textures.len()
},
"More than one surface texture is being used from the same swapchain. This will cause a deadlock in release."
);
let locked_swapchain_semaphores = surface_textures
.iter()
.map(|st| {
st.surface_semaphores
.try_lock()
.expect("Failed to lock surface semaphore.")
})
.collect::<Vec<_>>();
for mut swapchain_semaphore in locked_swapchain_semaphores {
swapchain_semaphore.set_used_fence_value(signal_value);
// If we're the first submission to operate on this image, wait on
// its acquire semaphore, to make sure the presentation engine is
// done with it.
if let Some(sem) = swapchain_semaphore.get_acquire_wait_semaphore() {
wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
wait_semaphores.push(surface_texture.wait_semaphore);
wait_semaphores.push(sem);
}
let old_index = self.relay_index.load(Ordering::Relaxed);
// Get a semaphore to signal when we're done writing to this surface
// image. Presentation of this image will wait for this.
let signal_semaphore = swapchain_semaphore.get_submit_signal_semaphore(&self.device)?;
signal_semaphores.push(signal_semaphore);
signal_values.push(!0);
}
let sem_index = if old_index >= 0 {
// In order for submissions to be strictly ordered, we encode a dependency between each submission
// using a pair of semaphores. This adds a wait if it is needed, and signals the next semaphore.
let semaphore_state = self.relay_semaphores.lock().advance(&self.device)?;
if let Some(sem) = semaphore_state.wait {
wait_stage_masks.push(vk::PipelineStageFlags::TOP_OF_PIPE);
wait_semaphores.push(self.relay_semaphores[old_index as usize]);
(old_index as usize + 1) % self.relay_semaphores.len()
} else {
0
};
wait_semaphores.push(sem);
}
signal_semaphores.push(self.relay_semaphores[sem_index]);
signal_semaphores.push(semaphore_state.signal);
signal_values.push(!0);
self.relay_index
.store(sem_index as isize, Ordering::Relaxed);
if let Some((fence, value)) = signal_fence {
fence.maintain(&self.device.raw)?;
match *fence {
// We need to signal our wgpu::Fence if we have one, this adds it to the signal list.
signal_fence.maintain(&self.device.raw)?;
match *signal_fence {
Fence::TimelineSemaphore(raw) => {
signal_semaphores.push(raw);
signal_values.push(!0);
signal_values.push(value);
signal_values.push(signal_value);
}
Fence::FencePool {
ref mut active,
@ -752,8 +1034,7 @@ impl crate::Queue for Queue {
.create_fence(&vk::FenceCreateInfo::default(), None)?
},
};
active.push((value, fence_raw));
}
active.push((signal_value, fence_raw));
}
}
@ -771,7 +1052,7 @@ impl crate::Queue for Queue {
let mut vk_timeline_info;
if !signal_values.is_empty() {
if self.device.private_caps.timeline_semaphores {
vk_timeline_info =
vk::TimelineSemaphoreSubmitInfo::default().signal_semaphore_values(&signal_values);
vk_info = vk_info.push_next(&mut vk_timeline_info);
@ -793,19 +1074,14 @@ impl crate::Queue for Queue {
) -> Result<(), crate::SurfaceError> {
let mut swapchain = surface.swapchain.write();
let ssc = swapchain.as_mut().unwrap();
let mut swapchain_semaphores = texture.surface_semaphores.lock();
let swapchains = [ssc.raw];
let image_indices = [texture.index];
let mut vk_info = vk::PresentInfoKHR::default()
let vk_info = vk::PresentInfoKHR::default()
.swapchains(&swapchains)
.image_indices(&image_indices);
let old_index = self.relay_index.swap(-1, Ordering::Relaxed);
if old_index >= 0 {
vk_info = vk_info.wait_semaphores(
&self.relay_semaphores[old_index as usize..old_index as usize + 1],
);
}
.image_indices(&image_indices)
.wait_semaphores(swapchain_semaphores.get_present_wait_semaphores());
let suboptimal = {
profiling::scope!("vkQueuePresentKHR");