diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 66c0965fa..0acba4a77 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1171,6 +1171,7 @@ impl Global { .inputs .extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots)); let vertex_state = &mut state.vertex.inputs[slot as usize]; + //TODO: where are we checking that the offset is in bound? vertex_state.total_size = match size { Some(s) => s.get(), None => buffer.size - offset, diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 56ea142f9..4d972ad68 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -2518,7 +2518,7 @@ impl Global { let (device_guard, _) = hub.devices.read(&mut token); let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?; - Ok(device.downlevel) + Ok(device.downlevel.clone()) } pub fn device_create_buffer( @@ -3640,7 +3640,7 @@ impl Global { encoder, dev_stored, device.limits.clone(), - device.downlevel, + device.downlevel.clone(), device.features, #[cfg(feature = "trace")] device.trace.is_some(), diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 6a16a6633..2d6cd1368 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -216,6 +216,7 @@ impl Adapter { missing_flags, DOWNLEVEL_WARNING_MESSAGE ); + log::info!("{:#?}", caps.downlevel); } // Verify feature preconditions @@ -257,7 +258,7 @@ impl Adapter { ref_count: self.life_guard.add_ref(), }, caps.alignments.clone(), - caps.downlevel, + caps.downlevel.clone(), desc, trace_path, ) @@ -658,7 +659,7 @@ impl Global { let (adapter_guard, _) = hub.adapters.read(&mut token); adapter_guard .get(adapter_id) - .map(|adapter| adapter.raw.capabilities.downlevel) + .map(|adapter| adapter.raw.capabilities.downlevel.clone()) .map_err(|_| InvalidAdapter) } diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 1a1717083..9230da3d4 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -261,6 +261,12 @@ impl super::Adapter { extensions.contains("GL_EXT_texture_shadow_lod"), ); private_caps.set(super::PrivateCapability::MEMORY_BARRIERS, ver >= (3, 1)); + private_caps.set( + super::PrivateCapability::VERTEX_BUFFER_LAYOUT, + ver >= (3, 1), + ); + + let downlevel_limits = wgt::DownlevelLimits {}; Some(crate::ExposedAdapter { adapter: super::Adapter { @@ -276,6 +282,7 @@ impl super::Adapter { limits, downlevel: wgt::DownlevelCapabilities { flags: downlevel_flags, + limits: downlevel_limits, shader_model: wgt::ShaderModel::Sm5, }, alignments: crate::Alignments { diff --git a/wgpu-hal/src/gles/command.rs b/wgpu-hal/src/gles/command.rs index 8ba5fbebb..859fc7e10 100644 --- a/wgpu-hal/src/gles/command.rs +++ b/wgpu-hal/src/gles/command.rs @@ -2,13 +2,6 @@ use super::{conv, Command as C}; use arrayvec::ArrayVec; use std::{mem, ops::Range}; -bitflags::bitflags! { - #[derive(Default)] - struct Dirty: u32 { - const VERTEX_BUFFERS = 0x0001; - } -} - #[derive(Clone, Copy, Debug, Default)] struct TextureSlotDesc { tex_target: super::BindTarget, @@ -32,7 +25,8 @@ pub(super) struct State { resolve_attachments: ArrayVec<[(u32, super::TextureView); crate::MAX_COLOR_TARGETS]>, invalidate_attachments: ArrayVec<[u32; crate::MAX_COLOR_TARGETS + 2]>, has_pass_label: bool, - dirty: Dirty, + instance_vbuf_mask: usize, + dirty_vbuf_mask: usize, } impl super::CommandBuffer { @@ -75,21 +69,48 @@ impl super::CommandEncoder { } } - fn rebind_vertex_attributes(&mut self, first_instance: u32) { - for attribute in self.state.vertex_attributes.iter() { - let (buffer_desc, buffer) = - self.state.vertex_buffers[attribute.buffer_index as usize].clone(); - - let mut attribute_desc = attribute.clone(); - if buffer_desc.step == wgt::InputStepMode::Instance { - attribute_desc.offset += buffer_desc.stride * first_instance; + fn rebind_vertex_data(&mut self, first_instance: u32) { + if self + .private_caps + .contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT) + { + for (index, &(ref vb_desc, ref vb)) in self.state.vertex_buffers.iter().enumerate() { + if self.state.dirty_vbuf_mask & (1 << index) == 0 { + continue; + } + let instance_offset = match vb_desc.step { + wgt::InputStepMode::Vertex => 0, + wgt::InputStepMode::Instance => first_instance * vb_desc.stride, + }; + self.cmd_buffer.commands.push(C::SetVertexBuffer { + index: index as u32, + buffer: super::BufferBinding { + raw: vb.raw, + offset: vb.offset + instance_offset as wgt::BufferAddress, + }, + buffer_desc: vb_desc.clone(), + }); } + } else { + for attribute in self.state.vertex_attributes.iter() { + if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 { + continue; + } + let (buffer_desc, buffer) = + self.state.vertex_buffers[attribute.buffer_index as usize].clone(); - self.cmd_buffer.commands.push(C::SetVertexAttribute { - buffer_desc, - buffer, - attribute_desc, - }); + let mut attribute_desc = attribute.clone(); + attribute_desc.offset += buffer.offset as u32; + if buffer_desc.step == wgt::InputStepMode::Instance { + attribute_desc.offset += buffer_desc.stride * first_instance; + } + + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: Some(buffer.raw), + buffer_desc, + attribute_desc, + }); + } } } @@ -111,11 +132,13 @@ impl super::CommandEncoder { fn prepare_draw(&mut self, first_instance: u32) { if first_instance != 0 { - self.rebind_vertex_attributes(first_instance); - self.state.dirty.set(Dirty::VERTEX_BUFFERS, true); - } else if self.state.dirty.contains(Dirty::VERTEX_BUFFERS) { - self.rebind_vertex_attributes(0); - self.state.dirty.set(Dirty::VERTEX_BUFFERS, false); + self.state.dirty_vbuf_mask = self.state.instance_vbuf_mask; + } + if self.state.dirty_vbuf_mask != 0 { + self.rebind_vertex_data(first_instance); + if first_instance == 0 { + self.state.dirty_vbuf_mask = 0; + } } } @@ -488,7 +511,8 @@ impl crate::CommandEncoder for super::CommandEncoder { self.cmd_buffer.commands.push(C::PopDebugGroup); self.state.has_pass_label = false; } - self.state.dirty = Dirty::empty(); + self.state.instance_vbuf_mask = 0; + self.state.dirty_vbuf_mask = 0; self.state.color_targets.clear(); self.state.vertex_attributes.clear(); self.state.primitive = super::PrimitiveState::default(); @@ -591,25 +615,56 @@ impl crate::CommandEncoder for super::CommandEncoder { unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) { self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology); - self.state.dirty |= Dirty::VERTEX_BUFFERS; - self.set_pipeline_inner(&pipeline.inner); - - // set vertex state - self.state.vertex_attributes.clear(); - for vat in pipeline.vertex_attributes.iter() { - self.state.vertex_attributes.push(vat.clone()); + for index in self.state.vertex_attributes.len()..pipeline.vertex_attributes.len() { + self.cmd_buffer + .commands + .push(C::UnsetVertexAttribute(index as u32)); } - for (&mut (ref mut state_desc, _), pipe_desc) in self + + if self + .private_caps + .contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT) + { + for vat in pipeline.vertex_attributes.iter() { + let vb = &pipeline.vertex_buffers[vat.buffer_index as usize]; + // set the layout + self.cmd_buffer.commands.push(C::SetVertexAttribute { + buffer: None, + buffer_desc: vb.clone(), + attribute_desc: vat.clone(), + }); + } + } else { + self.state.dirty_vbuf_mask = 0; + // copy vertex attributes + for vat in pipeline.vertex_attributes.iter() { + //Note: we can invalidate more carefully here. + self.state.dirty_vbuf_mask |= 1 << vat.buffer_index; + self.state.vertex_attributes.push(vat.clone()); + } + } + + self.state.instance_vbuf_mask = 0; + // copy vertex state + for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self .state .vertex_buffers .iter_mut() .zip(pipeline.vertex_buffers.iter()) + .enumerate() { - state_desc.step = pipe_desc.step; - state_desc.stride = pipe_desc.stride; + if pipe_desc.step == wgt::InputStepMode::Instance { + self.state.instance_vbuf_mask |= 1 << index; + } + if state_desc != pipe_desc { + self.state.dirty_vbuf_mask |= 1 << index; + *state_desc = pipe_desc.clone(); + } } + self.set_pipeline_inner(&pipeline.inner); + // set primitive state let prim_state = conv::map_primitive_state(&pipeline.primitive); if prim_state != self.state.primitive { @@ -703,8 +758,8 @@ impl crate::CommandEncoder for super::CommandEncoder { index: u32, binding: crate::BufferBinding<'a, super::Api>, ) { - self.state.dirty |= Dirty::VERTEX_BUFFERS; - let vb = &mut self.state.vertex_buffers[index as usize].1; + self.state.dirty_vbuf_mask |= 1 << index; + let (_, ref mut vb) = self.state.vertex_buffers[index as usize]; vb.raw = binding.buffer.raw; vb.offset = binding.offset; } @@ -854,7 +909,6 @@ impl crate::CommandEncoder for super::CommandEncoder { self.cmd_buffer.commands.push(C::PopDebugGroup); self.state.has_pass_label = false; } - self.state.dirty = Dirty::empty(); } unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) { diff --git a/wgpu-hal/src/gles/mod.rs b/wgpu-hal/src/gles/mod.rs index 7fc8d9ecc..4258e585b 100644 --- a/wgpu-hal/src/gles/mod.rs +++ b/wgpu-hal/src/gles/mod.rs @@ -1,3 +1,61 @@ +/*! +# OpenGL ES3 API (aka GLES3). + +Designed to work on Linux and Android, with context provided by EGL. + +## Texture views + +GLES3 doesn't really have separate texture view objects. We have to remember the +original texture and the sub-range into it. Problem is, however, that there is +no way to expose a subset of array layers or mip levels of a sampled texture. + +## Binding model + +Binding model is very different from WebGPU, especially with regards to samplers. +GLES3 has sampler objects, but they aren't separately bindable to the shaders. +Each sampled texture is exposed to the shader as a combined texture-sampler binding. + +When building the pipeline layout, we linearize binding entries based on the groups +(uniform/storage buffers, uniform/storage textures), and record the mapping into +`BindGroupLayoutInfo`. +When a pipeline gets created, and we track all the texture-sampler associations +from the static use in the shader. +We only support at most one sampler used with each texture so far. The linear index +of this sampler is stored per texture slot in `SamplerBindMap` array. + +The texture-sampler pairs get potentially invalidated in 2 places: + - when a new pipeline is set, we update the linear indices of associated samplers + - when a new bind group is set, we update both the textures and the samplers + +We expect that the changes to sampler states between any 2 pipelines of the same layout +will be minimal, if any. + +## Vertex data + +Generally, vertex buffers are marked as dirty and lazily bound on draw. + +GLES3 doesn't support "base instance" semantics. However, it's easy to support, +since we are forced to do late binding anyway. We just adjust the offsets +into the vertex data. + +### Old path + +In GLES-3.0 and WebGL2, vertex buffer layout is provided +together with the actual buffer binding. +We invalidate the attributes on the vertex buffer change, and re-bind them. + +### New path + +In GLES-3.1 and higher, the vertex buffer layout can be declared separately +from the vertex data itself. This mostly matches WebGPU, however there is a catch: +`stride` needs to be specified with the data, not as a part of the layout. + +To address this, we invalidate the vertex buffers based on: + - whether or not `start_instance` is used + - stride has changed + +*/ + #[cfg(not(target_arch = "wasm32"))] mod egl; @@ -60,6 +118,8 @@ bitflags::bitflags! { const SHADER_TEXTURE_SHADOW_LOD = 0x0002; /// Support memory barriers. const MEMORY_BARRIERS = 0x0004; + /// Vertex buffer layouts separate from the data. + const VERTEX_BUFFER_LAYOUT = 0x0008; } } @@ -254,7 +314,7 @@ struct ImageBinding { format: u32, } -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, PartialEq)] struct VertexBufferDesc { step: wgt::InputStepMode, stride: u32, @@ -534,10 +594,16 @@ enum Command { SetDepthBias(wgt::DepthBiasState), ConfigureDepthStencil(crate::FormatAspect), SetVertexAttribute { - buffer: BufferBinding, + buffer: Option, buffer_desc: VertexBufferDesc, attribute_desc: AttributeDesc, }, + UnsetVertexAttribute(u32), + SetVertexBuffer { + index: u32, + buffer: BufferBinding, + buffer_desc: VertexBufferDesc, + }, SetProgram(glow::Program), SetPrimitive(PrimitiveState), SetBlendConstant([f32; 4]), diff --git a/wgpu-hal/src/gles/queue.rs b/wgpu-hal/src/gles/queue.rs index 04e34d48c..65595a352 100644 --- a/wgpu-hal/src/gles/queue.rs +++ b/wgpu-hal/src/gles/queue.rs @@ -590,31 +590,69 @@ impl super::Queue { gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass); } C::SetVertexAttribute { + buffer, ref buffer_desc, - ref buffer, attribute_desc: ref vat, } => { - gl.bind_buffer(glow::ARRAY_BUFFER, Some(buffer.raw)); - let offset = vat.offset as i32 + buffer.offset as i32; - match vat.format_desc.attrib_kind { - super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32( - vat.location, - vat.format_desc.element_count, - vat.format_desc.element_format, - true, // always normalized - buffer_desc.stride as i32, - offset, - ), - super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32( - vat.location, - vat.format_desc.element_count, - vat.format_desc.element_format, - buffer_desc.stride as i32, - offset, - ), - } - gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32); + gl.bind_buffer(glow::ARRAY_BUFFER, buffer); gl.enable_vertex_attrib_array(vat.location); + + if buffer.is_none() { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => gl.vertex_attrib_format_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + vat.offset, + ), + super::VertexAttribKind::Integer => gl.vertex_attrib_format_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + vat.offset, + ), + } + + //Note: there is apparently a bug on AMD 3500U: + // this call is ignored if the current array is disabled. + gl.vertex_attrib_binding(vat.location, vat.buffer_index); + } else { + match vat.format_desc.attrib_kind { + super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + true, // always normalized + buffer_desc.stride as i32, + vat.offset as i32, + ), + super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32( + vat.location, + vat.format_desc.element_count, + vat.format_desc.element_format, + buffer_desc.stride as i32, + vat.offset as i32, + ), + } + gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32); + } + } + C::UnsetVertexAttribute(location) => { + gl.disable_vertex_attrib_array(location); + } + C::SetVertexBuffer { + index, + ref buffer, + ref buffer_desc, + } => { + gl.vertex_binding_divisor(index, buffer_desc.step as u32); + gl.bind_vertex_buffer( + index, + Some(buffer.raw), + buffer.offset as i32, + buffer_desc.stride as i32, + ); } C::SetDepth(ref depth) => { gl.depth_func(depth.function); diff --git a/wgpu-info/src/main.rs b/wgpu-info/src/main.rs index 0f79cfd74..c1cdd4221 100644 --- a/wgpu-info/src/main.rs +++ b/wgpu-info/src/main.rs @@ -70,7 +70,8 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) { println!("\tDownlevel Properties:"); let wgpu::DownlevelCapabilities { shader_model, - flags + limits: _, + flags, } = downlevel; println!("\t\tShader Model: {:?}", shader_model); for i in 0..(size_of::() * 8) { diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 94016d881..a732ef7aa 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -586,11 +586,24 @@ impl Default for Limits { } } +/// Represents the sets of additional limits on an adapter, +/// which take place when running on downlevel backends. +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct DownlevelLimits {} + +impl Default for DownlevelLimits { + fn default() -> Self { + DownlevelLimits {} + } +} + /// Lists various ways the underlying platform does not conform to the WebGPU standard. -#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub struct DownlevelCapabilities { /// Combined boolean flags. pub flags: DownlevelFlags, + /// Additional limits + pub limits: DownlevelLimits, /// Which collections of features shaders support. Defined in terms of D3D's shader models. pub shader_model: ShaderModel, } @@ -599,6 +612,7 @@ impl Default for DownlevelCapabilities { fn default() -> Self { Self { flags: DownlevelFlags::COMPLIANT, + limits: DownlevelLimits::default(), shader_model: ShaderModel::Sm5, } } @@ -609,8 +623,10 @@ impl DownlevelCapabilities { /// /// If this returns false, some parts of the API will result in validation errors where they would not normally. /// These parts can be determined by the values in this structure. - pub fn is_webgpu_compliant(self) -> bool { - self.flags.contains(DownlevelFlags::COMPLIANT) && self.shader_model >= ShaderModel::Sm5 + pub fn is_webgpu_compliant(&self) -> bool { + self.flags.contains(DownlevelFlags::COMPLIANT) + && self.limits == DownlevelLimits::default() + && self.shader_model >= ShaderModel::Sm5 } } diff --git a/wgpu/tests/common/mod.rs b/wgpu/tests/common/mod.rs index 4b4422a5e..330484920 100644 --- a/wgpu/tests/common/mod.rs +++ b/wgpu/tests/common/mod.rs @@ -66,6 +66,7 @@ pub fn lowest_reasonable_limits() -> Limits { fn lowest_downlevel_properties() -> DownlevelCapabilities { DownlevelCapabilities { flags: wgt::DownlevelFlags::empty(), + limits: wgt::DownlevelLimits {}, shader_model: wgt::ShaderModel::Sm2, } }