[hal] Document Api::Fence, its users, and its Vulkan impl. (#5618)

Also, rename `wgpu_hal::vulkan::Fence::check_active` argument to `last_completed`, and explain better its rationale.
2024-11-21 22:33:49 +00:00 · 2024-04-27 22:46:06 -07:00 · 2024-04-27 22:46:06 -07:00 · 1ea96391ea
commit 1ea96391ea
parent 4af2e7b8fb
2 changed files with 107 additions and 7 deletions
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@ -406,6 +406,24 @@ pub trait Api: Clone + fmt::Debug + Sized {
    type TextureView: fmt::Debug + WasmNotSendSync;
    type Sampler: fmt::Debug + WasmNotSendSync;
    type QuerySet: fmt::Debug + WasmNotSendSync;
+
+    /// A value you can block on to wait for something to finish.
+    ///
+    /// A `Fence` holds a monotonically increasing [`FenceValue`]. You can call
+    /// [`Device::wait`] to block until a fence reaches or passes a value you
+    /// choose. [`Queue::submit`] can take a `Fence` and a [`FenceValue`] to
+    /// store in it when the submitted work is complete.
+    ///
+    /// Attempting to set a fence to a value less than its current value has no
+    /// effect.
+    ///
+    /// Waiting on a fence returns as soon as the fence reaches *or passes* the
+    /// requested value. This implies that, in order to reliably determine when
+    /// an operation has completed, operations must finish in order of
+    /// increasing fence values: if a higher-valued operation were to finish
+    /// before a lower-valued operation, then waiting for the fence to reach the
+    /// lower value could return before the lower-valued operation has actually
+    /// finished.
    type Fence: fmt::Debug + WasmNotSendSync;

    type BindGroupLayout: fmt::Debug + WasmNotSendSync;
@ -605,7 +623,25 @@ pub trait Device: WasmNotSendSync {
        &self,
        fence: &<Self::A as Api>::Fence,
    ) -> Result<FenceValue, DeviceError>;
-    /// Calling wait with a lower value than the current fence value will immediately return.
+
+    /// Wait for `fence` to reach `value`.
+    ///
+    /// Operations like [`Queue::submit`] can accept a [`Fence`] and a
+    /// [`FenceValue`] to store in it, so you can use this `wait` function
+    /// to wait for a given queue submission to finish execution.
+    ///
+    /// The `value` argument must be a value that some actual operation you have
+    /// already presented to the device is going to store in `fence`. You cannot
+    /// wait for values yet to be submitted. (This restriction accommodates
+    /// implementations like the `vulkan` backend's [`FencePool`] that must
+    /// allocate a distinct synchronization object for each fence value one is
+    /// able to wait for.)
+    ///
+    /// Calling `wait` with a lower [`FenceValue`] than `fence`'s current value
+    /// returns immediately.
+    ///
+    /// [`Fence`]: Api::Fence
+    /// [`FencePool`]: vulkan/enum.Fence.html#variant.FencePool
    unsafe fn wait(
        &self,
        fence: &<Self::A as Api>::Fence,
@ -637,7 +673,10 @@ pub trait Device: WasmNotSendSync {
 pub trait Queue: WasmNotSendSync {
    type A: Api;

-    /// Submits the command buffers for execution on GPU.
+    /// Submit `command_buffers` for execution on GPU.
+    ///
+    /// If `signal_fence` is `Some(fence, value)`, update `fence` to `value`
+    /// when the operation is complete. See [`Fence`] for details.
    ///
    /// If two calls to `submit` on a single `Queue` occur in a particular order
    /// (that is, they happen on the same thread, or on two threads that have
@ -672,7 +711,7 @@ pub trait Queue: WasmNotSendSync {
    /// - All of the [`SurfaceTexture`][st]s that the command buffers
    ///   write to appear in the `surface_textures` argument.
    ///
-    /// [`Fence`]: crate::Api::Fence
+    /// [`Fence`]: Api::Fence
    /// [cb]: Api::CommandBuffer
    /// [ce]: Api::CommandEncoder
    /// [st]: Api::SurfaceTexture
--- a/wgpu-hal/src/vulkan/mod.rs
+++ b/wgpu-hal/src/vulkan/mod.rs
@ -559,9 +559,47 @@ pub struct QuerySet {
    raw: vk::QueryPool,
 }

+/// The [`Api::Fence`] type for [`vulkan::Api`].
+///
+/// This is an `enum` because there are two possible implementations of
+/// `wgpu-hal` fences on Vulkan: Vulkan fences, which work on any version of
+/// Vulkan, and Vulkan timeline semaphores, which are easier and cheaper but
+/// require non-1.0 features.
+///
+/// [`Device::create_fence`] returns a [`TimelineSemaphore`] if
+/// [`VK_KHR_timeline_semaphore`] is available and enabled, and a [`FencePool`]
+/// otherwise.
+///
+/// [`Api::Fence`]: crate::Api::Fence
+/// [`vulkan::Api`]: Api
+/// [`Device::create_fence`]: crate::Device::create_fence
+/// [`TimelineSemaphore`]: Fence::TimelineSemaphore
+/// [`VK_KHR_timeline_semaphore`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VK_KHR_timeline_semaphore
+/// [`FencePool`]: Fence::FencePool
 #[derive(Debug)]
 pub enum Fence {
+    /// A Vulkan [timeline semaphore].
+    ///
+    /// These are simpler to use than Vulkan fences, since timeline semaphores
+    /// work exactly the way [`wpgu_hal::Api::Fence`] is specified to work.
+    ///
+    /// [timeline semaphore]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-semaphores
+    /// [`wpgu_hal::Api::Fence`]: crate::Api::Fence
    TimelineSemaphore(vk::Semaphore),
+
+    /// A collection of Vulkan [fence]s, each associated with a [`FenceValue`].
+    ///
+    /// The effective [`FenceValue`] of this variant is the greater of
+    /// `last_completed` and the maximum value associated with a signalled fence
+    /// in `active`.
+    ///
+    /// Fences are available in all versions of Vulkan, but since they only have
+    /// two states, "signaled" and "unsignaled", we need to use a separate fence
+    /// for each queue submission we might want to wait for, and remember which
+    /// [`FenceValue`] each one represents.
+    ///
+    /// [fence]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-fences
+    /// [`FenceValue`]: crate::FenceValue
    FencePool {
        last_completed: crate::FenceValue,
        /// The pending fence values have to be ascending.
@ -571,21 +609,32 @@ pub enum Fence {
 }

 impl Fence {
+    /// Return the highest [`FenceValue`] among the signalled fences in `active`.
+    ///
+    /// As an optimization, assume that we already know that the fence has
+    /// reached `last_completed`, and don't bother checking fences whose values
+    /// are less than that: those fences remain in the `active` array only
+    /// because we haven't called `maintain` yet to clean them up.
+    ///
+    /// [`FenceValue`]: crate::FenceValue
    fn check_active(
        device: &ash::Device,
-        mut max_value: crate::FenceValue,
+        mut last_completed: crate::FenceValue,
        active: &[(crate::FenceValue, vk::Fence)],
    ) -> Result<crate::FenceValue, crate::DeviceError> {
        for &(value, raw) in active.iter() {
            unsafe {
-                if value > max_value && device.get_fence_status(raw)? {
-                    max_value = value;
+                if value > last_completed && device.get_fence_status(raw)? {
+                    last_completed = value;
                }
            }
        }
-        Ok(max_value)
+        Ok(last_completed)
    }

+    /// Return the highest signalled [`FenceValue`] for `self`.
+    ///
+    /// [`FenceValue`]: crate::FenceValue
    fn get_latest(
        &self,
        device: &ash::Device,
@ -606,6 +655,18 @@ impl Fence {
        }
    }

+    /// Trim the internal state of this [`Fence`].
+    ///
+    /// This function has no externally visible effect, but you should call it
+    /// periodically to keep this fence's resource consumption under control.
+    ///
+    /// For fences using the [`FencePool`] implementation, this function
+    /// recycles fences that have been signaled. If you don't call this,
+    /// [`Queue::submit`] will just keep allocating a new Vulkan fence every
+    /// time it's called.
+    ///
+    /// [`FencePool`]: Fence::FencePool
+    /// [`Queue::submit`]: crate::Queue::submit
    fn maintain(&mut self, device: &ash::Device) -> Result<(), crate::DeviceError> {
        match *self {
            Self::TimelineSemaphore(_) => {}