Add downlevel limit for vertex shader access to SSBO

2024-11-26 00:33:51 +00:00 · 2021-07-02 23:24:23 -04:00 · 2021-07-02 23:24:23 -04:00 · 26f3c03150
commit 26f3c03150
parent 93cca6d182
8 changed files with 78 additions and 11 deletions
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@ -980,6 +980,7 @@ impl<A: HalApi> Device<A> {
            use wgt::BindingType as Bt;
            let mut required_features = wgt::Features::empty();
            let mut required_downlevel_flags = wgt::DownlevelFlags::empty();
            let (array_feature, is_writable_storage) = match entry.ty {
                Bt::Buffer {
                    ty: wgt::BufferBindingType::Uniform,
@ -1029,6 +1030,9 @@ impl<A: HalApi> Device<A> {
                        error,
                    })?;
            }
            if entry.visibility.contains(wgt::ShaderStages::VERTEX) {
                required_downlevel_flags |= wgt::DownlevelFlags::VERTEX_ACCESSABLE_STORAGE_BUFFERS;
            }
            if is_writable_storage && entry.visibility.contains(wgt::ShaderStages::VERTEX) {
                required_features |= wgt::Features::VERTEX_WRITABLE_STORAGE;
            }
@ -1047,6 +1051,13 @@ impl<A: HalApi> Device<A> {
                    binding: entry.binding,
                    error,
                })?;
            self.require_downlevel_flags(required_downlevel_flags)
                .map_err(binding_model::BindGroupLayoutEntryError::MissingDownlevelFlags)
                .map_err(|error| binding_model::CreateBindGroupLayoutError::Entry {
                    binding: entry.binding,
                    error,
                })?;
        }
        let mut hal_bindings = entry_map.values().cloned().collect::<Vec<_>>();
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@ -114,6 +114,7 @@ impl super::Adapter {
            "igp",
            "mali",
            "intel",
            "v3d",
        ];
        let strings_that_imply_cpu = ["mesa offscreen", "swiftshader", "llvmpipe"];
@ -144,6 +145,8 @@ impl super::Adapter {
            0x5143
        } else if vendor.contains("intel") {
            0x8086
        } else if vendor.contains("broadcom") {
            0x14e4
        } else {
            0
        };
@ -184,6 +187,11 @@ impl super::Adapter {
        );
        features.set(wgt::Features::VERTEX_WRITABLE_STORAGE, ver >= (3, 1));
        let vertex_shader_storage_blocks =
            gl.get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS);
        let fragment_shader_storage_blocks =
            gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS);
        let mut downlevel_flags = wgt::DownlevelFlags::empty()
            | wgt::DownlevelFlags::DEVICE_LOCAL_IMAGE_COPIES
            | wgt::DownlevelFlags::NON_POWER_OF_TWO_MIPMAPPED_TEXTURES
@ -202,6 +210,10 @@ impl super::Adapter {
            wgt::DownlevelFlags::INDEPENDENT_BLENDING,
            ver >= (3, 2) || extensions.contains("GL_EXT_draw_buffers_indexed"),
        );
        downlevel_flags.set(
            wgt::DownlevelFlags::VERTEX_ACCESSABLE_STORAGE_BUFFERS,
            vertex_shader_storage_blocks > 0,
        );
        let max_texture_size = gl.get_parameter_i32(glow::MAX_TEXTURE_SIZE) as u32;
        let max_texture_3d_size = gl.get_parameter_i32(glow::MAX_3D_TEXTURE_SIZE) as u32;
@ -216,10 +228,12 @@ impl super::Adapter {
        let max_uniform_buffers_per_shader_stage =
            gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS)
                .min(gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS)) as u32;
-        let max_storage_buffers_per_shader_stage = gl
+        let max_storage_buffers_per_shader_stage = if vertex_shader_storage_blocks > 0 {
-            .get_parameter_i32(glow::MAX_VERTEX_SHADER_STORAGE_BLOCKS)
+            vertex_shader_storage_blocks.min(fragment_shader_storage_blocks) as u32
-            .min(gl.get_parameter_i32(glow::MAX_FRAGMENT_SHADER_STORAGE_BLOCKS))
+        } else {
-            as u32;
+            fragment_shader_storage_blocks
        };
        let max_storage_textures_per_shader_stage =
            gl.get_parameter_i32(glow::MAX_FRAGMENT_IMAGE_UNIFORMS) as u32;
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -497,6 +497,20 @@ impl Features {
 /// Represents the sets of limits an adapter/device supports.
 ///
 /// We provide two different defaults. 
 /// - [`Limits::downlevel_limits()]. This is a set of limits that is guarenteed to
 ///   work on all backends, including "downlevel" backends such
 ///   as OpenGL and D3D11. For most applications we recommend using these
 ///   limits, assuming they are high enough for your application.
 /// - [`Limits::default()`]. This is the set of limits that is guarenteed to
 ///   work on all modern backends and is guarenteed to be supported by WebGPU. 
 ///   Applications needing more modern features can use this as a reasonable set of
 ///   limits if they are targetting only desktop and modern mobile devices.
 ///
 /// We recommend starting with the most restrictive limits you can and manually
 /// increasing the limits you need boosted. This will let you stay running on
 /// all hardware that supports the limits you need.
 ///
 /// Limits "better" than the default must be supported by the adapter and requested when requesting
 /// a device. If limits "better" than the adapter supports are requested, requesting a device will panic.
 /// Once a device is requested, you may only use resources up to the limits requested _even_ if the
@ -593,6 +607,32 @@ impl Default for Limits {
    }
 }
 impl Limits {
    /// These default limits are guarenteed to be compatible with GLES3, WebGL, and D3D11
    pub fn downlevel_limits() -> Self {
        Self {
            max_texture_dimension_1d: 2096,
            max_texture_dimension_2d: 2096,
            max_texture_dimension_3d: 256,
            max_texture_array_layers: 256,
            max_bind_groups: 4,
            max_dynamic_uniform_buffers_per_pipeline_layout: 8,
            max_dynamic_storage_buffers_per_pipeline_layout: 4,
            max_sampled_textures_per_shader_stage: 16,
            max_samplers_per_shader_stage: 16,
            max_storage_buffers_per_shader_stage: 4,
            max_storage_textures_per_shader_stage: 4,
            max_uniform_buffers_per_shader_stage: 12,
            max_uniform_buffer_binding_size: 16384,
            max_storage_buffer_binding_size: 128 << 20,
            max_vertex_buffers: 8,
            max_vertex_attributes: 16,
            max_vertex_buffer_array_stride: 2048,
            max_push_constant_size: 0,
        }
    }
 }
 /// Represents the sets of additional limits on an adapter,
 /// which take place when running on downlevel backends.
 #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
@ -662,8 +702,10 @@ bitflags::bitflags! {
        const COMPARISON_SAMPLERS = 0x0000_0100;
        /// Supports different blending modes per color target.
        const INDEPENDENT_BLENDING = 0x0000_0200;
-        /// Supports samplers with anisotropic filtering
+        /// Supports samplers with anisotropic filtering.
-        const ANISOTROPIC_FILTERING = 0x0001_0000;
+        const ANISOTROPIC_FILTERING = 0x0000_0400;
        /// Supports attaching storage buffers to vertex shaders.
        const VERTEX_ACCESSABLE_STORAGE_BUFFERS = 0x0000_0800;
    }
 }
--- a/wgpu/examples/capture/main.rs
+++ b/wgpu/examples/capture/main.rs
@ -38,7 +38,7 @@ async fn create_red_image_with_dimensions(
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/framework.rs
+++ b/wgpu/examples/framework.rs
@ -41,7 +41,7 @@ pub trait Example: 'static + Sized {
        wgpu::Features::empty()
    }
    fn required_limits() -> wgpu::Limits {
-        wgpu::Limits::default()
+        wgpu::Limits::downlevel_limits() // These downlevel limits will allow the code to run on all possible hardware
    }
    fn init(
        sc_desc: &wgpu::SwapChainDescriptor,
--- a/wgpu/examples/hello-compute/main.rs
+++ b/wgpu/examples/hello-compute/main.rs
@ -47,7 +47,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option<Vec<u32>> {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/hello-triangle/main.rs
+++ b/wgpu/examples/hello-triangle/main.rs
@ -24,7 +24,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )
--- a/wgpu/examples/hello-windows/main.rs
+++ b/wgpu/examples/hello-windows/main.rs
@ -85,7 +85,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Window, wgpu::Color)>) {
            &wgpu::DeviceDescriptor {
                label: None,
                features: wgpu::Features::empty(),
-                limits: wgpu::Limits::default(),
+                limits: wgpu::Limits::downlevel_limits(),
            },
            None,
        )