Add downlevel limit for vertex shader access to SSBO

2024-11-22 06:44:14 +00:00 · 2021-07-02 23:24:23 -04:00 · 2021-07-02 23:24:23 -04:00 · 26f3c03150
commit 26f3c03150
parent 93cca6d182
8 changed files with 78 additions and 11 deletions
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@ -980,6 +980,7 @@ impl<A: HalApi> Device<A> {
            use wgt::BindingType as Bt;

            let mut required_features = wgt::Features::empty();
+            let mut required_downlevel_flags = wgt::DownlevelFlags::empty();
            let (array_feature, is_writable_storage) = match entry.ty {
                Bt::Buffer {
                    ty: wgt::BufferBindingType::Uniform,
@ -1029,6 +1030,9 @@ impl<A: HalApi> Device<A> {
                        error,
                    })?;
            }
+            if entry.visibility.contains(wgt::ShaderStages::VERTEX) {
+                required_downlevel_flags |= wgt::DownlevelFlags::VERTEX_ACCESSABLE_STORAGE_BUFFERS;
+            }
            if is_writable_storage && entry.visibility.contains(wgt::ShaderStages::VERTEX) {
                required_features |= wgt::Features::VERTEX_WRITABLE_STORAGE;
            }
@ -1047,6 +1051,13 @@ impl<A: HalApi> Device<A> {
                    binding: entry.binding,
                    error,
                })?;
+
+            self.require_downlevel_flags(required_downlevel_flags)
+                .map_err(binding_model::BindGroupLayoutEntryError::MissingDownlevelFlags)
+                .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry {
+                    binding: entry.binding,
+                    error,
+                })?;
        }

        let mut hal_bindings = entry_map.values().cloned().collect::<Vec<_>>();
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@ -114,6 +114,7 @@ impl super::Adapter {
            "igp",
            "mali",
            "intel",
+            "v3d",
        ];
        let strings_that_imply_cpu = ["mesa offscreen", "swiftshader", "llvmpipe"];

@ -144,6 +145,8 @@ impl super::Adapter {
            0x5143
        } else if vendor.contains("intel") {
            0x8086
+        } else if vendor.contains("broadcom") {
+            0x14e4
        } else {
            0
        };
@ -184,6 +187,11 @@ impl super::Adapter {
        );
        features.set(wgt::Features::VERTEX_WRITABLE_STORAGE, ver >= (3, 1));

+        let vertex_shader_storage_blocks =
+            gl.get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS);
+        let fragment_shader_storage_blocks =
+            gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS);
+
        let mut downlevel_flags = wgt::DownlevelFlags::empty()
            | wgt::DownlevelFlags::DEVICE_LOCAL_IMAGE_COPIES
            | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES
@ -202,6 +210,10 @@ impl super::Adapter {
            wgt::DownlevelFlags::INDEPENDENT_BLENDING,
            ver >= (3, 2) || extensions.contains("GL_EXT_draw_buffers_indexed"),
        );
+        downlevel_flags.set(
+            wgt::DownlevelFlags::VERTEX_ACCESSABLE_STORAGE_BUFFERS,
+            vertex_shader_storage_blocks > 0,
+        );

        let max_texture_size = gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) as u32;
        let max_texture_3d_size = gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) as u32;
@ -216,10 +228,12 @@ impl super::Adapter {
        let max_uniform_buffers_per_shader_stage =
            gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS)
                .min(gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS)) as u32;
-        let max_storage_buffers_per_shader_stage = gl
-            .get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS)
-            .min(gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS))
-            as u32;
+        let max_storage_buffers_per_shader_stage = if vertex_shader_storage_blocks > 0 {
+            vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) as u32
+        } else {
+            fragment_shader_storage_blocks
+        };
+
        let max_storage_textures_per_shader_stage =
            gl.get_parameter_i32(glow::MAX_FRAGMENT_IMAGE_UNIFORMS) as u32;

--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -497,6 +497,20 @@ impl Features {

 /// Represents the sets of limits an adapter/device supports.
 ///
+/// We provide two different defaults. 
+/// - [`Limits::downlevel_limits()]. This is a set of limits that is guarenteed to
+///   work on all backends, including "downlevel" backends such
+///   as OpenGL and D3D11. For most applications we recommend using these
+///   limits, assuming they are high enough for your application.
+/// - [`Limits::default()`]. This is the set of limits that is guarenteed to
+///   work on all modern backends and is guarenteed to be supported by WebGPU. 
+///   Applications needing more modern features can use this as a reasonable set of
+///   limits if they are targetting only desktop and modern mobile devices.
+///
+/// We recommend starting with the most restrictive limits you can and manually
+/// increasing the limits you need boosted. This will let you stay running on
+/// all hardware that supports the limits you need.
+///
 /// Limits "better" than the default must be supported by the adapter and requested when requesting
 /// a device. If limits "better" than the adapter supports are requested, requesting a device will panic.
 /// Once a device is requested, you may only use resources up to the limits requested _even_ if the
@ -593,6 +607,32 @@ impl Default for Limits {
    }
 }

+impl Limits {
+    /// These default limits are guarenteed to be compatible with GLES3, WebGL, and D3D11
+    pub fn downlevel_limits() -> Self {
+        Self {
+            max_texture_dimension_1d: 2096,
+            max_texture_dimension_2d: 2096,
+            max_texture_dimension_3d: 256,
+            max_texture_array_layers: 256,
+            max_bind_groups: 4,
+            max_dynamic_uniform_buffers_per_pipeline_layout: 8,
+            max_dynamic_storage_buffers_per_pipeline_layout: 4,
+            max_sampled_textures_per_shader_stage: 16,
+            max_samplers_per_shader_stage: 16,
+            max_storage_buffers_per_shader_stage: 4,
+            max_storage_textures_per_shader_stage: 4,
+            max_uniform_buffers_per_shader_stage: 12,
+            max_uniform_buffer_binding_size: 16384,
+            max_storage_buffer_binding_size: 128 << 20,
+            max_vertex_buffers: 8,
+            max_vertex_attributes: 16,
+            max_vertex_buffer_array_stride: 2048,
+            max_push_constant_size: 0,
+        }
+    }
+}
+
 /// Represents the sets of additional limits on an adapter,
 /// which take place when running on downlevel backends.
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -662,8 +702,10 @@ bitflags::bitflags! {
        const COMPARISON_SAMPLERS = 0x0000_0100;
        /// Supports different blending modes per color target.
        const INDEPENDENT_BLENDING = 0x0000_0200;
-        /// Supports samplers with anisotropic filtering
-        const ANISOTROPIC_FILTERING = 0x0001_0000;
+        /// Supports samplers with anisotropic filtering.
+        const ANISOTROPIC_FILTERING = 0x0000_0400;
+        /// Supports attaching storage buffers to vertex shaders.
+        const VERTEX_ACCESSABLE_STORAGE_BUFFERS = 0x0000_0800;
    }
 }

--- a/wgpu/examples/capture/main.rs
+++ b/wgpu/examples/capture/main.rs
@ -38,7 +38,7 @@ async fn create_red_image_with_dimensions(
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/framework.rs
+++ b/wgpu/examples/framework.rs
@ -41,7 +41,7 @@ pub trait Example: 'static + Sized {
        wgpu::Features::empty()
    }
    fn required_limits() -> wgpu::Limits {
-        wgpu::Limits::default()
+        wgpu::Limits::downlevel_limits() // These downlevel limits will allow the code to run on all possible hardware
    }
    fn init(
        sc_desc: &wgpu::SwapChainDescriptor,
--- a/wgpu/examples/hello-compute/main.rs
+++ b/wgpu/examples/hello-compute/main.rs
@ -47,7 +47,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option<Vec<u32>> {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/hello-triangle/main.rs
+++ b/wgpu/examples/hello-triangle/main.rs
@ -24,7 +24,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/hello-windows/main.rs
+++ b/wgpu/examples/hello-windows/main.rs
@ -85,7 +85,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )