hal/gles: totally rework the vertex data binding

2024-11-22 06:44:14 +00:00 · 2021-06-29 23:40:14 -04:00 · 2021-06-29 23:40:14 -04:00 · 4be8864b38
commit 4be8864b38
parent 40e2c33c6f
10 changed files with 256 additions and 71 deletions
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@ -1171,6 +1171,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                            .inputs
                            .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots));
                        let vertex_state = &mut state.vertex.inputs[slot as usize];
+                        //TODO: where are we checking that the offset is in bound?
                        vertex_state.total_size = match size {
                            Some(s) => s.get(),
                            None => buffer.size - offset,
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@ -2518,7 +2518,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
        let (device_guard, _) = hub.devices.read(&mut token);
        let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?;

-        Ok(device.downlevel)
+        Ok(device.downlevel.clone())
    }

    pub fn device_create_buffer<A: HalApi>(
@ -3640,7 +3640,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                encoder,
                dev_stored,
                device.limits.clone(),
-                device.downlevel,
+                device.downlevel.clone(),
                device.features,
                #[cfg(feature = "trace")]
                device.trace.is_some(),
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@ -216,6 +216,7 @@ impl<A: HalApi> Adapter<A> {
                missing_flags,
                DOWNLEVEL_WARNING_MESSAGE
            );
+            log::info!("{:#?}", caps.downlevel);
        }

        // Verify feature preconditions
@ -257,7 +258,7 @@ impl<A: HalApi> Adapter<A> {
                ref_count: self.life_guard.add_ref(),
            },
            caps.alignments.clone(),
-            caps.downlevel,
+            caps.downlevel.clone(),
            desc,
            trace_path,
        )
@ -658,7 +659,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
        let (adapter_guard, _) = hub.adapters.read(&mut token);
        adapter_guard
            .get(adapter_id)
-            .map(|adapter| adapter.raw.capabilities.downlevel)
+            .map(|adapter| adapter.raw.capabilities.downlevel.clone())
            .map_err(|_| InvalidAdapter)
    }

--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@ -261,6 +261,12 @@ impl super::Adapter {
            extensions.contains("GL_EXT_texture_shadow_lod"),
        );
        private_caps.set(super::PrivateCapability::MEMORY_BARRIERS, ver >= (3, 1));
+        private_caps.set(
+            super::PrivateCapability::VERTEX_BUFFER_LAYOUT,
+            ver >= (3, 1),
+        );
+
+        let downlevel_limits = wgt::DownlevelLimits {};

        Some(crate::ExposedAdapter {
            adapter: super::Adapter {
@ -276,6 +282,7 @@ impl super::Adapter {
                limits,
                downlevel: wgt::DownlevelCapabilities {
                    flags: downlevel_flags,
+                    limits: downlevel_limits,
                    shader_model: wgt::ShaderModel::Sm5,
                },
                alignments: crate::Alignments {
--- a/wgpu-hal/src/gles/command.rs
+++ b/wgpu-hal/src/gles/command.rs
@ -2,13 +2,6 @@ use super::{conv, Command as C};
 use arrayvec::ArrayVec;
 use std::{mem, ops::Range};

-bitflags::bitflags! {
-    #[derive(Default)]
-    struct Dirty: u32 {
-        const VERTEX_BUFFERS = 0x0001;
-    }
-}
-
 #[derive(Clone, Copy, Debug, Default)]
 struct TextureSlotDesc {
    tex_target: super::BindTarget,
@ -32,7 +25,8 @@ pub(super) struct State {
    resolve_attachments: ArrayVec<[(u32, super::TextureView); crate::MAX_COLOR_TARGETS]>,
    invalidate_attachments: ArrayVec<[u32; crate::MAX_COLOR_TARGETS + 2]>,
    has_pass_label: bool,
-    dirty: Dirty,
+    instance_vbuf_mask: usize,
+    dirty_vbuf_mask: usize,
 }

 impl super::CommandBuffer {
@ -75,21 +69,48 @@ impl super::CommandEncoder {
        }
    }

-    fn rebind_vertex_attributes(&mut self, first_instance: u32) {
-        for attribute in self.state.vertex_attributes.iter() {
-            let (buffer_desc, buffer) =
-                self.state.vertex_buffers[attribute.buffer_index as usize].clone();
-
-            let mut attribute_desc = attribute.clone();
-            if buffer_desc.step == wgt::InputStepMode::Instance {
-                attribute_desc.offset += buffer_desc.stride * first_instance;
+    fn rebind_vertex_data(&mut self, first_instance: u32) {
+        if self
+            .private_caps
+            .contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT)
+        {
+            for (index, &(ref vb_desc, ref vb)) in self.state.vertex_buffers.iter().enumerate() {
+                if self.state.dirty_vbuf_mask & (1 << index) == 0 {
+                    continue;
+                }
+                let instance_offset = match vb_desc.step {
+                    wgt::InputStepMode::Vertex => 0,
+                    wgt::InputStepMode::Instance => first_instance * vb_desc.stride,
+                };
+                self.cmd_buffer.commands.push(C::SetVertexBuffer {
+                    index: index as u32,
+                    buffer: super::BufferBinding {
+                        raw: vb.raw,
+                        offset: vb.offset + instance_offset as wgt::BufferAddress,
+                    },
+                    buffer_desc: vb_desc.clone(),
+                });
            }
+        } else {
+            for attribute in self.state.vertex_attributes.iter() {
+                if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 {
+                    continue;
+                }
+                let (buffer_desc, buffer) =
+                    self.state.vertex_buffers[attribute.buffer_index as usize].clone();

-            self.cmd_buffer.commands.push(C::SetVertexAttribute {
-                buffer_desc,
-                buffer,
-                attribute_desc,
-            });
+                let mut attribute_desc = attribute.clone();
+                attribute_desc.offset += buffer.offset as u32;
+                if buffer_desc.step == wgt::InputStepMode::Instance {
+                    attribute_desc.offset += buffer_desc.stride * first_instance;
+                }
+
+                self.cmd_buffer.commands.push(C::SetVertexAttribute {
+                    buffer: Some(buffer.raw),
+                    buffer_desc,
+                    attribute_desc,
+                });
+            }
        }
    }

@ -111,11 +132,13 @@ impl super::CommandEncoder {

    fn prepare_draw(&mut self, first_instance: u32) {
        if first_instance != 0 {
-            self.rebind_vertex_attributes(first_instance);
-            self.state.dirty.set(Dirty::VERTEX_BUFFERS, true);
-        } else if self.state.dirty.contains(Dirty::VERTEX_BUFFERS) {
-            self.rebind_vertex_attributes(0);
-            self.state.dirty.set(Dirty::VERTEX_BUFFERS, false);
+            self.state.dirty_vbuf_mask = self.state.instance_vbuf_mask;
+        }
+        if self.state.dirty_vbuf_mask != 0 {
+            self.rebind_vertex_data(first_instance);
+            if first_instance == 0 {
+                self.state.dirty_vbuf_mask = 0;
+            }
        }
    }

@ -488,7 +511,8 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
            self.cmd_buffer.commands.push(C::PopDebugGroup);
            self.state.has_pass_label = false;
        }
-        self.state.dirty = Dirty::empty();
+        self.state.instance_vbuf_mask = 0;
+        self.state.dirty_vbuf_mask = 0;
        self.state.color_targets.clear();
        self.state.vertex_attributes.clear();
        self.state.primitive = super::PrimitiveState::default();
@ -591,25 +615,56 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {

    unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {
        self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology);
-        self.state.dirty |= Dirty::VERTEX_BUFFERS;

-        self.set_pipeline_inner(&pipeline.inner);
-
-        // set vertex state
-        self.state.vertex_attributes.clear();
-        for vat in pipeline.vertex_attributes.iter() {
-            self.state.vertex_attributes.push(vat.clone());
+        for index in self.state.vertex_attributes.len()..pipeline.vertex_attributes.len() {
+            self.cmd_buffer
+                .commands
+                .push(C::UnsetVertexAttribute(index as u32));
        }
-        for (&mut (ref mut state_desc, _), pipe_desc) in self
+
+        if self
+            .private_caps
+            .contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT)
+        {
+            for vat in pipeline.vertex_attributes.iter() {
+                let vb = &pipeline.vertex_buffers[vat.buffer_index as usize];
+                // set the layout
+                self.cmd_buffer.commands.push(C::SetVertexAttribute {
+                    buffer: None,
+                    buffer_desc: vb.clone(),
+                    attribute_desc: vat.clone(),
+                });
+            }
+        } else {
+            self.state.dirty_vbuf_mask = 0;
+            // copy vertex attributes
+            for vat in pipeline.vertex_attributes.iter() {
+                //Note: we can invalidate more carefully here.
+                self.state.dirty_vbuf_mask |= 1 << vat.buffer_index;
+                self.state.vertex_attributes.push(vat.clone());
+            }
+        }
+
+        self.state.instance_vbuf_mask = 0;
+        // copy vertex state
+        for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self
            .state
            .vertex_buffers
            .iter_mut()
            .zip(pipeline.vertex_buffers.iter())
+            .enumerate()
        {
-            state_desc.step = pipe_desc.step;
-            state_desc.stride = pipe_desc.stride;
+            if pipe_desc.step == wgt::InputStepMode::Instance {
+                self.state.instance_vbuf_mask |= 1 << index;
+            }
+            if state_desc != pipe_desc {
+                self.state.dirty_vbuf_mask |= 1 << index;
+                *state_desc = pipe_desc.clone();
+            }
        }

+        self.set_pipeline_inner(&pipeline.inner);
+
        // set primitive state
        let prim_state = conv::map_primitive_state(&pipeline.primitive);
        if prim_state != self.state.primitive {
@ -703,8 +758,8 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
        index: u32,
        binding: crate::BufferBinding<'a, super::Api>,
    ) {
-        self.state.dirty |= Dirty::VERTEX_BUFFERS;
-        let vb = &mut self.state.vertex_buffers[index as usize].1;
+        self.state.dirty_vbuf_mask |= 1 << index;
+        let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
        vb.raw = binding.buffer.raw;
        vb.offset = binding.offset;
    }
@ -854,7 +909,6 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
            self.cmd_buffer.commands.push(C::PopDebugGroup);
            self.state.has_pass_label = false;
        }
-        self.state.dirty = Dirty::empty();
    }

    unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {
--- a/wgpu-hal/src/gles/mod.rs
+++ b/wgpu-hal/src/gles/mod.rs
@ -1,3 +1,61 @@
+/*!
+# OpenGL ES3 API (aka GLES3).
+
+Designed to work on Linux and Android, with context provided by EGL.
+
+## Texture views
+
+GLES3 doesn't really have separate texture view objects. We have to remember the
+original texture and the sub-range into it. Problem is, however, that there is
+no way to expose a subset of array layers or mip levels of a sampled texture.
+
+## Binding model
+
+Binding model is very different from WebGPU, especially with regards to samplers.
+GLES3 has sampler objects, but they aren't separately bindable to the shaders.
+Each sampled texture is exposed to the shader as a combined texture-sampler binding.
+
+When building the pipeline layout, we linearize binding entries based on the groups
+(uniform/storage buffers, uniform/storage textures), and record the mapping into
+`BindGroupLayoutInfo`.
+When a pipeline gets created, and we track all the texture-sampler associations
+from the static use in the shader.
+We only support at most one sampler used with each texture so far. The linear index
+of this sampler is stored per texture slot in `SamplerBindMap` array.
+
+The texture-sampler pairs get potentially invalidated in 2 places:
+  - when a new pipeline is set, we update the linear indices of associated samplers
+  - when a new bind group is set, we update both the textures and the samplers
+
+We expect that the changes to sampler states between any 2 pipelines of the same layout
+will be minimal, if any.
+
+## Vertex data
+
+Generally, vertex buffers are marked as dirty and lazily bound on draw.
+
+GLES3 doesn't support "base instance" semantics. However, it's easy to support,
+since we are forced to do late binding anyway. We just adjust the offsets
+into the vertex data.
+
+### Old path
+
+In GLES-3.0 and WebGL2, vertex buffer layout is provided
+together with the actual buffer binding.
+We invalidate the attributes on the vertex buffer change, and re-bind them.
+
+### New path
+
+In GLES-3.1 and higher, the vertex buffer layout can be declared separately
+from the vertex data itself. This mostly matches WebGPU, however there is a catch:
+`stride` needs to be specified with the data, not as a part of the layout.
+
+To address this, we invalidate the vertex buffers based on:
+  - whether or not `start_instance` is used
+  - stride has changed
+
+*/
+
 #[cfg(not(target_arch = "wasm32"))]
 mod egl;

@ -60,6 +118,8 @@ bitflags::bitflags! {
        const SHADER_TEXTURE_SHADOW_LOD = 0x0002;
        /// Support memory barriers.
        const MEMORY_BARRIERS = 0x0004;
+        /// Vertex buffer layouts separate from the data.
+        const VERTEX_BUFFER_LAYOUT = 0x0008;
    }
 }

@ -254,7 +314,7 @@ struct ImageBinding {
    format: u32,
 }

-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug, Default, PartialEq)]
 struct VertexBufferDesc {
    step: wgt::InputStepMode,
    stride: u32,
@ -534,10 +594,16 @@ enum Command {
    SetDepthBias(wgt::DepthBiasState),
    ConfigureDepthStencil(crate::FormatAspect),
    SetVertexAttribute {
-        buffer: BufferBinding,
+        buffer: Option<glow::Buffer>,
        buffer_desc: VertexBufferDesc,
        attribute_desc: AttributeDesc,
    },
+    UnsetVertexAttribute(u32),
+    SetVertexBuffer {
+        index: u32,
+        buffer: BufferBinding,
+        buffer_desc: VertexBufferDesc,
+    },
    SetProgram(glow::Program),
    SetPrimitive(PrimitiveState),
    SetBlendConstant([f32; 4]),
--- a/wgpu-hal/src/gles/queue.rs
+++ b/wgpu-hal/src/gles/queue.rs
@ -590,31 +590,69 @@ impl super::Queue {
                gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass);
            }
            C::SetVertexAttribute {
+                buffer,
                ref buffer_desc,
-                ref buffer,
                attribute_desc: ref vat,
            } => {
-                gl.bind_buffer(glow::ARRAY_BUFFER, Some(buffer.raw));
-                let offset = vat.offset as i32 + buffer.offset as i32;
-                match vat.format_desc.attrib_kind {
-                    super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32(
-                        vat.location,
-                        vat.format_desc.element_count,
-                        vat.format_desc.element_format,
-                        true, // always normalized
-                        buffer_desc.stride as i32,
-                        offset,
-                    ),
-                    super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32(
-                        vat.location,
-                        vat.format_desc.element_count,
-                        vat.format_desc.element_format,
-                        buffer_desc.stride as i32,
-                        offset,
-                    ),
-                }
-                gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32);
+                gl.bind_buffer(glow::ARRAY_BUFFER, buffer);
                gl.enable_vertex_attrib_array(vat.location);
+
+                if buffer.is_none() {
+                    match vat.format_desc.attrib_kind {
+                        super::VertexAttribKind::Float => gl.vertex_attrib_format_f32(
+                            vat.location,
+                            vat.format_desc.element_count,
+                            vat.format_desc.element_format,
+                            true, // always normalized
+                            vat.offset,
+                        ),
+                        super::VertexAttribKind::Integer => gl.vertex_attrib_format_i32(
+                            vat.location,
+                            vat.format_desc.element_count,
+                            vat.format_desc.element_format,
+                            vat.offset,
+                        ),
+                    }
+
+                    //Note: there is apparently a bug on AMD 3500U:
+                    // this call is ignored if the current array is disabled.
+                    gl.vertex_attrib_binding(vat.location, vat.buffer_index);
+                } else {
+                    match vat.format_desc.attrib_kind {
+                        super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32(
+                            vat.location,
+                            vat.format_desc.element_count,
+                            vat.format_desc.element_format,
+                            true, // always normalized
+                            buffer_desc.stride as i32,
+                            vat.offset as i32,
+                        ),
+                        super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32(
+                            vat.location,
+                            vat.format_desc.element_count,
+                            vat.format_desc.element_format,
+                            buffer_desc.stride as i32,
+                            vat.offset as i32,
+                        ),
+                    }
+                    gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32);
+                }
+            }
+            C::UnsetVertexAttribute(location) => {
+                gl.disable_vertex_attrib_array(location);
+            }
+            C::SetVertexBuffer {
+                index,
+                ref buffer,
+                ref buffer_desc,
+            } => {
+                gl.vertex_binding_divisor(index, buffer_desc.step as u32);
+                gl.bind_vertex_buffer(
+                    index,
+                    Some(buffer.raw),
+                    buffer.offset as i32,
+                    buffer_desc.stride as i32,
+                );
            }
            C::SetDepth(ref depth) => {
                gl.depth_func(depth.function);
--- a/wgpu-info/src/main.rs
+++ b/wgpu-info/src/main.rs
@ -70,7 +70,8 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
    println!("\tDownlevel Properties:");
    let wgpu::DownlevelCapabilities {
        shader_model,
-        flags
+        limits: _,
+        flags,
    } = downlevel;
    println!("\t\tShader Model:                        {:?}", shader_model);
    for i in 0..(size_of::<wgpu::DownlevelFlags>() * 8) {
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -586,11 +586,24 @@ impl Default for Limits {
    }
 }

+/// Represents the sets of additional limits on an adapter,
+/// which take place when running on downlevel backends.
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct DownlevelLimits {}
+
+impl Default for DownlevelLimits {
+    fn default() -> Self {
+        DownlevelLimits {}
+    }
+}
+
 /// Lists various ways the underlying platform does not conform to the WebGPU standard.
-#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct DownlevelCapabilities {
    /// Combined boolean flags.
    pub flags: DownlevelFlags,
+    /// Additional limits
+    pub limits: DownlevelLimits,
    /// Which collections of features shaders support. Defined in terms of D3D's shader models.
    pub shader_model: ShaderModel,
 }
@ -599,6 +612,7 @@ impl Default for DownlevelCapabilities {
    fn default() -> Self {
        Self {
            flags: DownlevelFlags::COMPLIANT,
+            limits: DownlevelLimits::default(),
            shader_model: ShaderModel::Sm5,
        }
    }
@ -609,8 +623,10 @@ impl DownlevelCapabilities {
    ///
    /// If this returns false, some parts of the API will result in validation errors where they would not normally.
    /// These parts can be determined by the values in this structure.
-    pub fn is_webgpu_compliant(self) -> bool {
-        self.flags.contains(DownlevelFlags::COMPLIANT) && self.shader_model >= ShaderModel::Sm5
+    pub fn is_webgpu_compliant(&self) -> bool {
+        self.flags.contains(DownlevelFlags::COMPLIANT)
+            && self.limits == DownlevelLimits::default()
+            && self.shader_model >= ShaderModel::Sm5
    }
 }

--- a/wgpu/tests/common/mod.rs
+++ b/wgpu/tests/common/mod.rs
@ -66,6 +66,7 @@ pub fn lowest_reasonable_limits() -> Limits {
 fn lowest_downlevel_properties() -> DownlevelCapabilities {
    DownlevelCapabilities {
        flags: wgt::DownlevelFlags::empty(),
+        limits: wgt::DownlevelLimits {},
        shader_model: wgt::ShaderModel::Sm2,
    }
 }