hal/gles: totally rework the vertex data binding

This commit is contained in:
Dzmitry Malyshau 2021-06-29 23:40:14 -04:00 committed by Dzmitry Malyshau
parent 40e2c33c6f
commit 4be8864b38
10 changed files with 256 additions and 71 deletions

View File

@ -1171,6 +1171,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
.inputs
.extend(iter::repeat(VertexBufferState::EMPTY).take(empty_slots));
let vertex_state = &mut state.vertex.inputs[slot as usize];
//TODO: where are we checking that the offset is in bound?
vertex_state.total_size = match size {
Some(s) => s.get(),
None => buffer.size - offset,

View File

@ -2518,7 +2518,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (device_guard, _) = hub.devices.read(&mut token);
let device = device_guard.get(device_id).map_err(|_| InvalidDevice)?;
Ok(device.downlevel)
Ok(device.downlevel.clone())
}
pub fn device_create_buffer<A: HalApi>(
@ -3640,7 +3640,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
encoder,
dev_stored,
device.limits.clone(),
device.downlevel,
device.downlevel.clone(),
device.features,
#[cfg(feature = "trace")]
device.trace.is_some(),

View File

@ -216,6 +216,7 @@ impl<A: HalApi> Adapter<A> {
missing_flags,
DOWNLEVEL_WARNING_MESSAGE
);
log::info!("{:#?}", caps.downlevel);
}
// Verify feature preconditions
@ -257,7 +258,7 @@ impl<A: HalApi> Adapter<A> {
ref_count: self.life_guard.add_ref(),
},
caps.alignments.clone(),
caps.downlevel,
caps.downlevel.clone(),
desc,
trace_path,
)
@ -658,7 +659,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let (adapter_guard, _) = hub.adapters.read(&mut token);
adapter_guard
.get(adapter_id)
.map(|adapter| adapter.raw.capabilities.downlevel)
.map(|adapter| adapter.raw.capabilities.downlevel.clone())
.map_err(|_| InvalidAdapter)
}

View File

@ -261,6 +261,12 @@ impl super::Adapter {
extensions.contains("GL_EXT_texture_shadow_lod"),
);
private_caps.set(super::PrivateCapability::MEMORY_BARRIERS, ver >= (3, 1));
private_caps.set(
super::PrivateCapability::VERTEX_BUFFER_LAYOUT,
ver >= (3, 1),
);
let downlevel_limits = wgt::DownlevelLimits {};
Some(crate::ExposedAdapter {
adapter: super::Adapter {
@ -276,6 +282,7 @@ impl super::Adapter {
limits,
downlevel: wgt::DownlevelCapabilities {
flags: downlevel_flags,
limits: downlevel_limits,
shader_model: wgt::ShaderModel::Sm5,
},
alignments: crate::Alignments {

View File

@ -2,13 +2,6 @@ use super::{conv, Command as C};
use arrayvec::ArrayVec;
use std::{mem, ops::Range};
bitflags::bitflags! {
#[derive(Default)]
struct Dirty: u32 {
const VERTEX_BUFFERS = 0x0001;
}
}
#[derive(Clone, Copy, Debug, Default)]
struct TextureSlotDesc {
tex_target: super::BindTarget,
@ -32,7 +25,8 @@ pub(super) struct State {
resolve_attachments: ArrayVec<[(u32, super::TextureView); crate::MAX_COLOR_TARGETS]>,
invalidate_attachments: ArrayVec<[u32; crate::MAX_COLOR_TARGETS + 2]>,
has_pass_label: bool,
dirty: Dirty,
instance_vbuf_mask: usize,
dirty_vbuf_mask: usize,
}
impl super::CommandBuffer {
@ -75,21 +69,48 @@ impl super::CommandEncoder {
}
}
fn rebind_vertex_attributes(&mut self, first_instance: u32) {
for attribute in self.state.vertex_attributes.iter() {
let (buffer_desc, buffer) =
self.state.vertex_buffers[attribute.buffer_index as usize].clone();
let mut attribute_desc = attribute.clone();
if buffer_desc.step == wgt::InputStepMode::Instance {
attribute_desc.offset += buffer_desc.stride * first_instance;
fn rebind_vertex_data(&mut self, first_instance: u32) {
if self
.private_caps
.contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT)
{
for (index, &(ref vb_desc, ref vb)) in self.state.vertex_buffers.iter().enumerate() {
if self.state.dirty_vbuf_mask & (1 << index) == 0 {
continue;
}
let instance_offset = match vb_desc.step {
wgt::InputStepMode::Vertex => 0,
wgt::InputStepMode::Instance => first_instance * vb_desc.stride,
};
self.cmd_buffer.commands.push(C::SetVertexBuffer {
index: index as u32,
buffer: super::BufferBinding {
raw: vb.raw,
offset: vb.offset + instance_offset as wgt::BufferAddress,
},
buffer_desc: vb_desc.clone(),
});
}
} else {
for attribute in self.state.vertex_attributes.iter() {
if self.state.dirty_vbuf_mask & (1 << attribute.buffer_index) == 0 {
continue;
}
let (buffer_desc, buffer) =
self.state.vertex_buffers[attribute.buffer_index as usize].clone();
self.cmd_buffer.commands.push(C::SetVertexAttribute {
buffer_desc,
buffer,
attribute_desc,
});
let mut attribute_desc = attribute.clone();
attribute_desc.offset += buffer.offset as u32;
if buffer_desc.step == wgt::InputStepMode::Instance {
attribute_desc.offset += buffer_desc.stride * first_instance;
}
self.cmd_buffer.commands.push(C::SetVertexAttribute {
buffer: Some(buffer.raw),
buffer_desc,
attribute_desc,
});
}
}
}
@ -111,11 +132,13 @@ impl super::CommandEncoder {
fn prepare_draw(&mut self, first_instance: u32) {
if first_instance != 0 {
self.rebind_vertex_attributes(first_instance);
self.state.dirty.set(Dirty::VERTEX_BUFFERS, true);
} else if self.state.dirty.contains(Dirty::VERTEX_BUFFERS) {
self.rebind_vertex_attributes(0);
self.state.dirty.set(Dirty::VERTEX_BUFFERS, false);
self.state.dirty_vbuf_mask = self.state.instance_vbuf_mask;
}
if self.state.dirty_vbuf_mask != 0 {
self.rebind_vertex_data(first_instance);
if first_instance == 0 {
self.state.dirty_vbuf_mask = 0;
}
}
}
@ -488,7 +511,8 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
self.cmd_buffer.commands.push(C::PopDebugGroup);
self.state.has_pass_label = false;
}
self.state.dirty = Dirty::empty();
self.state.instance_vbuf_mask = 0;
self.state.dirty_vbuf_mask = 0;
self.state.color_targets.clear();
self.state.vertex_attributes.clear();
self.state.primitive = super::PrimitiveState::default();
@ -591,25 +615,56 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
unsafe fn set_render_pipeline(&mut self, pipeline: &super::RenderPipeline) {
self.state.topology = conv::map_primitive_topology(pipeline.primitive.topology);
self.state.dirty |= Dirty::VERTEX_BUFFERS;
self.set_pipeline_inner(&pipeline.inner);
// set vertex state
self.state.vertex_attributes.clear();
for vat in pipeline.vertex_attributes.iter() {
self.state.vertex_attributes.push(vat.clone());
for index in self.state.vertex_attributes.len()..pipeline.vertex_attributes.len() {
self.cmd_buffer
.commands
.push(C::UnsetVertexAttribute(index as u32));
}
for (&mut (ref mut state_desc, _), pipe_desc) in self
if self
.private_caps
.contains(super::PrivateCapability::VERTEX_BUFFER_LAYOUT)
{
for vat in pipeline.vertex_attributes.iter() {
let vb = &pipeline.vertex_buffers[vat.buffer_index as usize];
// set the layout
self.cmd_buffer.commands.push(C::SetVertexAttribute {
buffer: None,
buffer_desc: vb.clone(),
attribute_desc: vat.clone(),
});
}
} else {
self.state.dirty_vbuf_mask = 0;
// copy vertex attributes
for vat in pipeline.vertex_attributes.iter() {
//Note: we can invalidate more carefully here.
self.state.dirty_vbuf_mask |= 1 << vat.buffer_index;
self.state.vertex_attributes.push(vat.clone());
}
}
self.state.instance_vbuf_mask = 0;
// copy vertex state
for (index, (&mut (ref mut state_desc, _), pipe_desc)) in self
.state
.vertex_buffers
.iter_mut()
.zip(pipeline.vertex_buffers.iter())
.enumerate()
{
state_desc.step = pipe_desc.step;
state_desc.stride = pipe_desc.stride;
if pipe_desc.step == wgt::InputStepMode::Instance {
self.state.instance_vbuf_mask |= 1 << index;
}
if state_desc != pipe_desc {
self.state.dirty_vbuf_mask |= 1 << index;
*state_desc = pipe_desc.clone();
}
}
self.set_pipeline_inner(&pipeline.inner);
// set primitive state
let prim_state = conv::map_primitive_state(&pipeline.primitive);
if prim_state != self.state.primitive {
@ -703,8 +758,8 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
index: u32,
binding: crate::BufferBinding<'a, super::Api>,
) {
self.state.dirty |= Dirty::VERTEX_BUFFERS;
let vb = &mut self.state.vertex_buffers[index as usize].1;
self.state.dirty_vbuf_mask |= 1 << index;
let (_, ref mut vb) = self.state.vertex_buffers[index as usize];
vb.raw = binding.buffer.raw;
vb.offset = binding.offset;
}
@ -854,7 +909,6 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
self.cmd_buffer.commands.push(C::PopDebugGroup);
self.state.has_pass_label = false;
}
self.state.dirty = Dirty::empty();
}
unsafe fn set_compute_pipeline(&mut self, pipeline: &super::ComputePipeline) {

View File

@ -1,3 +1,61 @@
/*!
# OpenGL ES3 API (aka GLES3).
Designed to work on Linux and Android, with context provided by EGL.
## Texture views
GLES3 doesn't really have separate texture view objects. We have to remember the
original texture and the sub-range into it. Problem is, however, that there is
no way to expose a subset of array layers or mip levels of a sampled texture.
## Binding model
Binding model is very different from WebGPU, especially with regards to samplers.
GLES3 has sampler objects, but they aren't separately bindable to the shaders.
Each sampled texture is exposed to the shader as a combined texture-sampler binding.
When building the pipeline layout, we linearize binding entries based on the groups
(uniform/storage buffers, uniform/storage textures), and record the mapping into
`BindGroupLayoutInfo`.
When a pipeline gets created, and we track all the texture-sampler associations
from the static use in the shader.
We only support at most one sampler used with each texture so far. The linear index
of this sampler is stored per texture slot in `SamplerBindMap` array.
The texture-sampler pairs get potentially invalidated in 2 places:
- when a new pipeline is set, we update the linear indices of associated samplers
- when a new bind group is set, we update both the textures and the samplers
We expect that the changes to sampler states between any 2 pipelines of the same layout
will be minimal, if any.
## Vertex data
Generally, vertex buffers are marked as dirty and lazily bound on draw.
GLES3 doesn't support "base instance" semantics. However, it's easy to support,
since we are forced to do late binding anyway. We just adjust the offsets
into the vertex data.
### Old path
In GLES-3.0 and WebGL2, vertex buffer layout is provided
together with the actual buffer binding.
We invalidate the attributes on the vertex buffer change, and re-bind them.
### New path
In GLES-3.1 and higher, the vertex buffer layout can be declared separately
from the vertex data itself. This mostly matches WebGPU, however there is a catch:
`stride` needs to be specified with the data, not as a part of the layout.
To address this, we invalidate the vertex buffers based on:
- whether or not `start_instance` is used
- stride has changed
*/
#[cfg(not(target_arch = "wasm32"))]
mod egl;
@ -60,6 +118,8 @@ bitflags::bitflags! {
const SHADER_TEXTURE_SHADOW_LOD = 0x0002;
/// Support memory barriers.
const MEMORY_BARRIERS = 0x0004;
/// Vertex buffer layouts separate from the data.
const VERTEX_BUFFER_LAYOUT = 0x0008;
}
}
@ -254,7 +314,7 @@ struct ImageBinding {
format: u32,
}
#[derive(Clone, Debug, Default)]
#[derive(Clone, Debug, Default, PartialEq)]
struct VertexBufferDesc {
step: wgt::InputStepMode,
stride: u32,
@ -534,10 +594,16 @@ enum Command {
SetDepthBias(wgt::DepthBiasState),
ConfigureDepthStencil(crate::FormatAspect),
SetVertexAttribute {
buffer: BufferBinding,
buffer: Option<glow::Buffer>,
buffer_desc: VertexBufferDesc,
attribute_desc: AttributeDesc,
},
UnsetVertexAttribute(u32),
SetVertexBuffer {
index: u32,
buffer: BufferBinding,
buffer_desc: VertexBufferDesc,
},
SetProgram(glow::Program),
SetPrimitive(PrimitiveState),
SetBlendConstant([f32; 4]),

View File

@ -590,31 +590,69 @@ impl super::Queue {
gl.stencil_op_separate(face, ops.fail, ops.depth_fail, ops.pass);
}
C::SetVertexAttribute {
buffer,
ref buffer_desc,
ref buffer,
attribute_desc: ref vat,
} => {
gl.bind_buffer(glow::ARRAY_BUFFER, Some(buffer.raw));
let offset = vat.offset as i32 + buffer.offset as i32;
match vat.format_desc.attrib_kind {
super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
true, // always normalized
buffer_desc.stride as i32,
offset,
),
super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
buffer_desc.stride as i32,
offset,
),
}
gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32);
gl.bind_buffer(glow::ARRAY_BUFFER, buffer);
gl.enable_vertex_attrib_array(vat.location);
if buffer.is_none() {
match vat.format_desc.attrib_kind {
super::VertexAttribKind::Float => gl.vertex_attrib_format_f32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
true, // always normalized
vat.offset,
),
super::VertexAttribKind::Integer => gl.vertex_attrib_format_i32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
vat.offset,
),
}
//Note: there is apparently a bug on AMD 3500U:
// this call is ignored if the current array is disabled.
gl.vertex_attrib_binding(vat.location, vat.buffer_index);
} else {
match vat.format_desc.attrib_kind {
super::VertexAttribKind::Float => gl.vertex_attrib_pointer_f32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
true, // always normalized
buffer_desc.stride as i32,
vat.offset as i32,
),
super::VertexAttribKind::Integer => gl.vertex_attrib_pointer_i32(
vat.location,
vat.format_desc.element_count,
vat.format_desc.element_format,
buffer_desc.stride as i32,
vat.offset as i32,
),
}
gl.vertex_attrib_divisor(vat.location, buffer_desc.step as u32);
}
}
C::UnsetVertexAttribute(location) => {
gl.disable_vertex_attrib_array(location);
}
C::SetVertexBuffer {
index,
ref buffer,
ref buffer_desc,
} => {
gl.vertex_binding_divisor(index, buffer_desc.step as u32);
gl.bind_vertex_buffer(
index,
Some(buffer.raw),
buffer.offset as i32,
buffer_desc.stride as i32,
);
}
C::SetDepth(ref depth) => {
gl.depth_func(depth.function);

View File

@ -70,7 +70,8 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
println!("\tDownlevel Properties:");
let wgpu::DownlevelCapabilities {
shader_model,
flags
limits: _,
flags,
} = downlevel;
println!("\t\tShader Model: {:?}", shader_model);
for i in 0..(size_of::<wgpu::DownlevelFlags>() * 8) {

View File

@ -586,11 +586,24 @@ impl Default for Limits {
}
}
/// Represents the sets of additional limits on an adapter,
/// which take place when running on downlevel backends.
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DownlevelLimits {}
impl Default for DownlevelLimits {
fn default() -> Self {
DownlevelLimits {}
}
}
/// Lists various ways the underlying platform does not conform to the WebGPU standard.
#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct DownlevelCapabilities {
/// Combined boolean flags.
pub flags: DownlevelFlags,
/// Additional limits
pub limits: DownlevelLimits,
/// Which collections of features shaders support. Defined in terms of D3D's shader models.
pub shader_model: ShaderModel,
}
@ -599,6 +612,7 @@ impl Default for DownlevelCapabilities {
fn default() -> Self {
Self {
flags: DownlevelFlags::COMPLIANT,
limits: DownlevelLimits::default(),
shader_model: ShaderModel::Sm5,
}
}
@ -609,8 +623,10 @@ impl DownlevelCapabilities {
///
/// If this returns false, some parts of the API will result in validation errors where they would not normally.
/// These parts can be determined by the values in this structure.
pub fn is_webgpu_compliant(self) -> bool {
self.flags.contains(DownlevelFlags::COMPLIANT) && self.shader_model >= ShaderModel::Sm5
pub fn is_webgpu_compliant(&self) -> bool {
self.flags.contains(DownlevelFlags::COMPLIANT)
&& self.limits == DownlevelLimits::default()
&& self.shader_model >= ShaderModel::Sm5
}
}

View File

@ -66,6 +66,7 @@ pub fn lowest_reasonable_limits() -> Limits {
fn lowest_downlevel_properties() -> DownlevelCapabilities {
DownlevelCapabilities {
flags: wgt::DownlevelFlags::empty(),
limits: wgt::DownlevelLimits {},
shader_model: wgt::ShaderModel::Sm2,
}
}