Expose maximum_frame_latency (#4899)

Co-authored-by: Emil Ernerfeldt <emil.ernerfeldt@gmail.com>
2024-11-21 22:33:49 +00:00 · 2024-01-17 18:46:34 +01:00 · 2024-01-17 18:46:34 +01:00 · b8f27c7284
commit b8f27c7284
parent 2e38187954
19 changed files with 83 additions and 70 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -98,6 +98,7 @@ By @wumpf in [#5044](https://github.com/gfx-rs/wgpu/pull/5044)
 - Added support for the float32-filterable feature. By @almarklein in [#4759](https://github.com/gfx-rs/wgpu/pull/4759)
 - GPU buffer memory is released during "lose the device". By @bradwerth in [#4851](https://github.com/gfx-rs/wgpu/pull/4851)
 - wgpu and wgpu-core features are now documented on docs.rs. By @wumpf in [#4886](https://github.com/gfx-rs/wgpu/pull/4886)
+- `SurfaceConfiguration` now exposes `desired_maximum_frame_latency` which was previously hard-coded to 2. By setting it to 1 you can reduce latency under the risk of making GPU & CPU work sequential. Currently, on DX12 this affects the `MaximumFrameLatency`, on all other backends except OpenGL the size of the swapchain (on OpenGL this has no effect). By @emilk & @wumpf in [#4899](https://github.com/gfx-rs/wgpu/pull/4899)
 - DeviceLostClosure is guaranteed to be invoked exactly once. By @bradwerth in [#4862](https://github.com/gfx-rs/wgpu/pull/4862)

 #### OpenGL
--- a/examples/src/framework.rs
+++ b/examples/src/framework.rs
@ -571,6 +571,7 @@ impl<E: Example + wgpu::WasmNotSendSync> From<ExampleTestParams<E>>
                        format,
                        width: params.width,
                        height: params.height,
+                        desired_maximum_frame_latency: 2,
                        present_mode: wgpu::PresentMode::Fifo,
                        alpha_mode: wgpu::CompositeAlphaMode::Auto,
                        view_formats: vec![format],
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@ -72,16 +72,9 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
        multiview: None,
    });

-    let mut config = wgpu::SurfaceConfiguration {
-        usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-        format: swapchain_format,
-        width: size.width,
-        height: size.height,
-        present_mode: wgpu::PresentMode::Fifo,
-        alpha_mode: swapchain_capabilities.alpha_modes[0],
-        view_formats: vec![],
-    };
-
+    let mut config = surface
+        .get_default_config(&adapter, size.width, size.height)
+        .unwrap();
    surface.configure(&device, &config);

    let window = &window;
--- a/examples/src/hello_windows/mod.rs
+++ b/examples/src/hello_windows/mod.rs
@ -30,20 +30,11 @@ impl ViewportDesc {

    fn build(self, adapter: &wgpu::Adapter, device: &wgpu::Device) -> Viewport {
        let size = self.window.inner_size();
-
-        let caps = self.surface.get_capabilities(adapter);
-        let config = wgpu::SurfaceConfiguration {
-            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-            format: caps.formats[0],
-            width: size.width,
-            height: size.height,
-            present_mode: wgpu::PresentMode::Fifo,
-            alpha_mode: caps.alpha_modes[0],
-            view_formats: vec![],
-        };
-
+        let config = self
+            .surface
+            .get_default_config(adapter, size.width, size.height)
+            .unwrap();
        self.surface.configure(device, &config);
-
        Viewport { desc: self, config }
    }
 }
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@ -192,15 +192,9 @@ impl WgpuContext {
            multiview: None,
        });

-        let surface_config = wgpu::SurfaceConfiguration {
-            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
-            format: swapchain_format,
-            width: size.width,
-            height: size.height,
-            present_mode: wgpu::PresentMode::Fifo,
-            alpha_mode: swapchain_capabilities.alpha_modes[0],
-            view_formats: vec![],
-        };
+        let surface_config = surface
+            .get_default_config(&adapter, size.width, size.height)
+            .unwrap();
        surface.configure(&device, &surface_config);

        // (5)
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@ -1982,10 +1982,12 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                    }
                }

-                let num_frames = present::DESIRED_NUM_FRAMES
-                    .clamp(*caps.swap_chain_sizes.start(), *caps.swap_chain_sizes.end());
+                let maximum_frame_latency = config.desired_maximum_frame_latency.clamp(
+                    *caps.maximum_frame_latency.start(),
+                    *caps.maximum_frame_latency.end(),
+                );
                let mut hal_config = hal::SurfaceConfiguration {
-                    swap_chain_size: num_frames,
+                    maximum_frame_latency,
                    present_mode: config.present_mode,
                    composite_alpha_mode: config.alpha_mode,
                    format: config.format,
@ -2056,7 +2058,6 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                *presentation = Some(present::Presentation {
                    device: super::any_device::AnyDevice::new(device.clone()),
                    config: config.clone(),
-                    num_frames,
                    acquired_texture: None,
                });
            }
--- a/wgpu-core/src/present.rs
+++ b/wgpu-core/src/present.rs
@ -37,14 +37,11 @@ use thiserror::Error;
 use wgt::SurfaceStatus as Status;

 const FRAME_TIMEOUT_MS: u32 = 1000;
-pub const DESIRED_NUM_FRAMES: u32 = 3;

 #[derive(Debug)]
 pub(crate) struct Presentation {
    pub(crate) device: AnyDevice,
    pub(crate) config: wgt::SurfaceConfiguration<Vec<wgt::TextureFormat>>,
-    #[allow(unused)]
-    pub(crate) num_frames: u32,
    pub(crate) acquired_texture: Option<TextureId>,
 }

--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@ -23,7 +23,7 @@ const BUNNY_SIZE: f32 = 0.15 * 256.0;
 const GRAVITY: f32 = -9.8 * 100.0;
 const MAX_VELOCITY: f32 = 750.0;
 const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
-const DESIRED_FRAMES: u32 = 3;
+const DESIRED_MAX_LATENCY: u32 = 2;

 #[repr(C)]
 #[derive(Clone, Copy)]
@ -132,9 +132,9 @@ impl<A: hal::Api> Example<A> {

        let window_size: (u32, u32) = window.inner_size().into();
        let surface_config = hal::SurfaceConfiguration {
-            swap_chain_size: DESIRED_FRAMES.clamp(
-                *surface_caps.swap_chain_sizes.start(),
-                *surface_caps.swap_chain_sizes.end(),
+            maximum_frame_latency: DESIRED_MAX_LATENCY.clamp(
+                *surface_caps.maximum_frame_latency.start(),
+                *surface_caps.maximum_frame_latency.end(),
            ),
            present_mode: wgt::PresentMode::Fifo,
            composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@ -14,7 +14,7 @@ use std::{
 use winit::window::WindowButtons;

 const COMMAND_BUFFER_PER_CONTEXT: usize = 100;
-const DESIRED_FRAMES: u32 = 3;
+const DESIRED_MAX_LATENCY: u32 = 2;

 /// [D3D12_RAYTRACING_INSTANCE_DESC](https://microsoft.github.io/DirectX-Specs/d3d/Raytracing.html#d3d12_raytracing_instance_desc)
 /// [VkAccelerationStructureInstanceKHR](https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkAccelerationStructureInstanceKHR.html)
@ -264,9 +264,9 @@ impl<A: hal::Api> Example<A> {
            *surface_caps.formats.first().unwrap()
        };
        let surface_config = hal::SurfaceConfiguration {
-            swap_chain_size: DESIRED_FRAMES
-                .max(*surface_caps.swap_chain_sizes.start())
-                .min(*surface_caps.swap_chain_sizes.end()),
+            maximum_frame_latency: DESIRED_MAX_LATENCY
+                .max(*surface_caps.maximum_frame_latency.start())
+                .min(*surface_caps.maximum_frame_latency.end()),
            present_mode: wgt::PresentMode::Fifo,
            composite_alpha_mode: wgt::CompositeAlphaMode::Opaque,
            format: surface_format,
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@ -626,8 +626,8 @@ impl crate::Adapter<super::Api> for super::Adapter {
                wgt::TextureFormat::Rgb10a2Unorm,
                wgt::TextureFormat::Rgba16Float,
            ],
-            // we currently use a flip effect which supports 2..=16 buffers
-            swap_chain_sizes: 2..=16,
+            // See https://learn.microsoft.com/en-us/windows/win32/api/dxgi/nf-dxgi-idxgidevice1-setmaximumframelatency
+            maximum_frame_latency: 1..=16,
            current_extent,
            usage: crate::TextureUses::COLOR_TARGET
                | crate::TextureUses::COPY_SRC
--- a/wgpu-hal/src/dx12/mod.rs
+++ b/wgpu-hal/src/dx12/mod.rs
@ -660,13 +660,18 @@ impl crate::Surface<Api> for Surface {

        let non_srgb_format = auxil::dxgi::conv::map_texture_format_nosrgb(config.format);

+        // Nvidia recommends to use 1-2 more buffers than the maximum latency
+        // https://developer.nvidia.com/blog/advanced-api-performance-swap-chains/
+        // For high latency extra buffers seems excessive, so go with a minimum of 3 and beyond that add 1.
+        let swap_chain_buffer = (config.maximum_frame_latency + 1).min(3);
+
        let swap_chain = match self.swap_chain.write().take() {
            //Note: this path doesn't properly re-initialize all of the things
            Some(sc) => {
                let raw = unsafe { sc.release_resources() };
                let result = unsafe {
                    raw.ResizeBuffers(
-                        config.swap_chain_size,
+                        swap_chain_buffer,
                        config.extent.width,
                        config.extent.height,
                        non_srgb_format,
@ -693,7 +698,7 @@ impl crate::Surface<Api> for Surface {
                        quality: 0,
                    },
                    buffer_usage: dxgitype::DXGI_USAGE_RENDER_TARGET_OUTPUT,
-                    buffer_count: config.swap_chain_size,
+                    buffer_count: swap_chain_buffer,
                    scaling: d3d12::Scaling::Stretch,
                    swap_effect: d3d12::SwapEffect::FlipDiscard,
                    flags,
@ -797,11 +802,11 @@ impl crate::Surface<Api> for Surface {
            | SurfaceTarget::SwapChainPanel(_) => {}
        }

-        unsafe { swap_chain.SetMaximumFrameLatency(config.swap_chain_size) };
+        unsafe { swap_chain.SetMaximumFrameLatency(config.maximum_frame_latency) };
        let waitable = unsafe { swap_chain.GetFrameLatencyWaitableObject() };

-        let mut resources = Vec::with_capacity(config.swap_chain_size as usize);
-        for i in 0..config.swap_chain_size {
+        let mut resources = Vec::with_capacity(config.maximum_frame_latency as usize);
+        for i in 0..config.maximum_frame_latency {
            let mut resource = d3d12::Resource::null();
            unsafe {
                swap_chain.GetBuffer(i, &d3d12_ty::ID3D12Resource::uuidof(), resource.mut_void())
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@ -1138,7 +1138,7 @@ impl crate::Adapter<super::Api> for super::Adapter {
                    vec![wgt::PresentMode::Fifo] //TODO
                },
                composite_alpha_modes: vec![wgt::CompositeAlphaMode::Opaque], //TODO
-                swap_chain_sizes: 2..=2,
+                maximum_frame_latency: 2..=2, //TODO, unused currently
                current_extent: None,
                usage: crate::TextureUses::COLOR_TARGET,
            })
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@ -922,11 +922,14 @@ pub struct SurfaceCapabilities {
    /// Must be at least one.
    pub formats: Vec<wgt::TextureFormat>,

-    /// Range for the swap chain sizes.
+    /// Range for the number of queued frames.
    ///
-    /// - `swap_chain_sizes.start` must be at least 1.
-    /// - `swap_chain_sizes.end` must be larger or equal to `swap_chain_sizes.start`.
-    pub swap_chain_sizes: RangeInclusive<u32>,
+    /// This adjusts either the swapchain frame count to value + 1 - or sets SetMaximumFrameLatency to the value given,
+    /// or uses a wait-for-present in the acquire method to limit rendering such that it acts like it's a value + 1 swapchain frame set.
+    ///
+    /// - `maximum_frame_latency.start` must be at least 1.
+    /// - `maximum_frame_latency.end` must be larger or equal to `maximum_frame_latency.start`.
+    pub maximum_frame_latency: RangeInclusive<u32>,

    /// Current extent of the surface, if known.
    pub current_extent: Option<wgt::Extent3d>,
@ -1252,9 +1255,9 @@ pub struct RenderPipelineDescriptor<'a, A: Api> {

 #[derive(Debug, Clone)]
 pub struct SurfaceConfiguration {
-    /// Number of textures in the swap chain. Must be in
-    /// `SurfaceCapabilities::swap_chain_size` range.
-    pub swap_chain_size: u32,
+    /// Maximum number of queued frames. Must be in
+    /// `SurfaceCapabilities::maximum_frame_latency` range.
+    pub maximum_frame_latency: u32,
    /// Vertical synchronization mode.
    pub present_mode: wgt::PresentMode,
    /// Alpha composition mode.
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@ -320,13 +320,14 @@ impl crate::Adapter<super::Api> for super::Adapter {
        let pc = &self.shared.private_caps;
        Some(crate::SurfaceCapabilities {
            formats,
-            //Note: this is hardcoded in `CAMetalLayer` documentation
-            swap_chain_sizes: if pc.can_set_maximum_drawables_count {
-                2..=3
+            // We use this here to govern the maximum number of drawables + 1.
+            // See https://developer.apple.com/documentation/quartzcore/cametallayer/2938720-maximumdrawablecount
+            maximum_frame_latency: if pc.can_set_maximum_drawables_count {
+                1..=2
            } else {
-                // 3 is the default in `CAMetalLayer` documentation
+                // 3 is the default value for maximum drawables in `CAMetalLayer` documentation
                // iOS 10.3 was tested to use 3 on iphone5s
-                3..=3
+                2..=2
            },
            present_modes: if pc.can_set_display_sync {
                vec![wgt::PresentMode::Fifo, wgt::PresentMode::Immediate]
--- a/wgpu-hal/src/metal/surface.rs
+++ b/wgpu-hal/src/metal/surface.rs
@ -221,7 +221,7 @@ impl crate::Surface<super::Api> for super::Surface {
        }

        // this gets ignored on iOS for certain OS/device combinations (iphone5s iOS 10.3)
-        render_layer.set_maximum_drawable_count(config.swap_chain_size as _);
+        render_layer.set_maximum_drawable_count(config.maximum_frame_latency as u64 + 1);
        render_layer.set_drawable_size(drawable_size);
        if caps.can_set_next_drawable_timeout {
            let () = msg_send![*render_layer, setAllowsNextDrawableTimeout:false];
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@ -1848,7 +1848,11 @@ impl crate::Adapter<super::Api> for super::Adapter {
            .collect();
        Some(crate::SurfaceCapabilities {
            formats,
-            swap_chain_sizes: caps.min_image_count..=max_image_count,
+            // TODO: Right now we're always trunkating the swap chain
+            // (presumably - we're actually setting the min image count which isn't necessarily the swap chain size)
+            // Instead, we should use extensions when available to wait in present.
+            // See https://github.com/gfx-rs/wgpu/issues/2869
+            maximum_frame_latency: (caps.min_image_count - 1)..=(max_image_count - 1), // Note this can't underflow since both `min_image_count` is at least one and we already patched `max_image_count`.
            current_extent,
            usage: conv::map_vk_image_usage(caps.supported_usage_flags),
            present_modes: raw_present_modes
--- a/wgpu-hal/src/vulkan/device.rs
+++ b/wgpu-hal/src/vulkan/device.rs
@ -579,7 +579,7 @@ impl super::Device {
        let mut info = vk::SwapchainCreateInfoKHR::builder()
            .flags(raw_flags)
            .surface(surface.raw)
-            .min_image_count(config.swap_chain_size)
+            .min_image_count(config.maximum_frame_latency + 1) // TODO: https://github.com/gfx-rs/wgpu/issues/2869
            .image_format(original_format)
            .image_color_space(color_space)
            .image_extent(vk::Extent2D {
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -5151,6 +5151,26 @@ pub struct SurfaceConfiguration<V> {
    /// AutoNoVsync will gracefully do a designed sets of fallbacks if their primary modes are
    /// unsupported.
    pub present_mode: PresentMode,
+    /// Desired maximum number of frames that the presentation engine should queue in advance.
+    ///
+    /// This is a hint to the backend implementation and will always be clamped to the supported range.
+    /// As a consequence, either the maximum frame latency is set directly on the swap chain,
+    /// or waits on present are scheduled to avoid exceeding the maximum frame latency if supported,
+    /// or the swap chain size is set to (max-latency + 1).
+    ///
+    /// Defaults to 2 when created via `wgpu::Surface::get_default_config`.
+    ///
+    /// Typical values range from 3 to 1, but higher values are possible:
+    /// * Choose 2 or higher for potentially smoother frame display, as it allows to be at least one frame
+    /// to be queued up. This typically avoids starving the GPU's work queue.
+    /// Higher values are useful for achieving a constant flow of frames to the display under varying load.
+    /// * Choose 1 for low latency from frame recording to frame display.
+    /// ⚠️ If the backend does not support waiting on present, this will cause the CPU to wait for the GPU
+    /// to finish all work related to the previous frame when calling `wgpu::Surface::get_current_texture`,
+    /// causing CPU-GPU serialization (i.e. when `wgpu::Surface::get_current_texture` returns, the GPU might be idle).
+    /// It is currently not possible to query this. See <https://github.com/gfx-rs/wgpu/issues/2869>.
+    /// * A value of 0 is generally not supported and always clamped to a higher value.
+    pub desired_maximum_frame_latency: u32,
    /// Specifies how the alpha channel of the textures should be handled during compositing.
    pub alpha_mode: CompositeAlphaMode,
    /// Specifies what view formats will be allowed when calling create_view() on texture returned by get_current_texture().
@ -5170,6 +5190,7 @@ impl<V: Clone> SurfaceConfiguration<V> {
            width: self.width,
            height: self.height,
            present_mode: self.present_mode,
+            desired_maximum_frame_latency: self.desired_maximum_frame_latency,
            alpha_mode: self.alpha_mode,
            view_formats: fun(self.view_formats.clone()),
        }
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@ -4793,6 +4793,7 @@ impl Surface<'_> {
            format: *caps.formats.get(0)?,
            width,
            height,
+            desired_maximum_frame_latency: 2,
            present_mode: *caps.present_modes.get(0)?,
            alpha_mode: wgt::CompositeAlphaMode::Auto,
            view_formats: vec![],