Add an experimental vertex pulling flag to Metal pipelines.

This proves a flag in msl::PipelineOptions that attempts to write all
Metal vertex entry points to use a vertex pulling technique. It does
this by:

1) Forcing the _buffer_sizes structure to be generated for all vertex
entry points. The structure has additional buffer_size members that
contain the byte sizes of the vertex buffers.
2) Adding new args to vertex entry points for the vertex id and/or
the instance id and for the bound buffers. If there is an existing
@builtin(vertex_index) or @builtin(instance_index) param, then no
duplicate arg is created.
3) Adding code at the beginning of the function for vertex entry points
to compare the vertex id or instance id against the lengths of all the
bound buffers, and force an early-exit if the bounds are violated.
4) Extracting the raw bytes from the vertex buffer(s) and unpacking
those bytes into the bound attributes with the expected types.
5) Replacing the varyings input and instead using the unpacked
attributes to fill any structs-as-args that are rebuilt in the entry
point.

A new naga test is added which exercises this flag and demonstrates the
effect of the transform. The msl generated by this test passes
validation.

Eventually this transformation will be the default, always-on behavior
for Metal pipelines, though the flag may remain so that naga
translation tests can be run with and without the tranformation.
This commit is contained in:
Brad Werth 2024-02-06 16:35:17 -08:00 committed by Teodor Tanasoaia
parent 480d4dbd73
commit 9b7a965667
25 changed files with 1540 additions and 71 deletions

View File

@ -114,6 +114,7 @@ pub fn op_webgpu_create_compute_pipeline(
entry_point: compute.entry_point.map(Cow::from), entry_point: compute.entry_point.map(Cow::from),
constants: Cow::Owned(compute.constants.unwrap_or_default()), constants: Cow::Owned(compute.constants.unwrap_or_default()),
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}, },
cache: None, cache: None,
}; };
@ -363,6 +364,7 @@ pub fn op_webgpu_create_render_pipeline(
constants: Cow::Owned(fragment.constants.unwrap_or_default()), constants: Cow::Owned(fragment.constants.unwrap_or_default()),
// Required to be true for WebGPU // Required to be true for WebGPU
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}, },
targets: Cow::Owned(fragment.targets), targets: Cow::Owned(fragment.targets),
}) })
@ -388,6 +390,7 @@ pub fn op_webgpu_create_render_pipeline(
constants: Cow::Owned(args.vertex.constants.unwrap_or_default()), constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
// Required to be true for WebGPU // Required to be true for WebGPU
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}, },
buffers: Cow::Owned(vertex_buffers), buffers: Cow::Owned(vertex_buffers),
}, },

View File

@ -79,6 +79,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
- Add and fix minimum Metal version checks for optional functionality. ([#2486](https://github.com/gfx-rs/naga/pull/2486)) **@teoxoy** - Add and fix minimum Metal version checks for optional functionality. ([#2486](https://github.com/gfx-rs/naga/pull/2486)) **@teoxoy**
- Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop** - Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
- Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
#### GLSL-OUT #### GLSL-OUT

View File

@ -222,6 +222,113 @@ impl Default for Options {
} }
} }
/// Corresponds to [WebGPU `GPUVertexFormat`](
/// https://gpuweb.github.io/gpuweb/#enumdef-gpuvertexformat).
#[repr(u32)]
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
pub enum VertexFormat {
/// Two unsigned bytes (u8). `vec2<u32>` in shaders.
Uint8x2 = 0,
/// Four unsigned bytes (u8). `vec4<u32>` in shaders.
Uint8x4 = 1,
/// Two signed bytes (i8). `vec2<i32>` in shaders.
Sint8x2 = 2,
/// Four signed bytes (i8). `vec4<i32>` in shaders.
Sint8x4 = 3,
/// Two unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec2<f32>` in shaders.
Unorm8x2 = 4,
/// Four unsigned bytes (u8). [0, 255] converted to float [0, 1] `vec4<f32>` in shaders.
Unorm8x4 = 5,
/// Two signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec2<f32>` in shaders.
Snorm8x2 = 6,
/// Four signed bytes (i8). [-127, 127] converted to float [-1, 1] `vec4<f32>` in shaders.
Snorm8x4 = 7,
/// Two unsigned shorts (u16). `vec2<u32>` in shaders.
Uint16x2 = 8,
/// Four unsigned shorts (u16). `vec4<u32>` in shaders.
Uint16x4 = 9,
/// Two signed shorts (i16). `vec2<i32>` in shaders.
Sint16x2 = 10,
/// Four signed shorts (i16). `vec4<i32>` in shaders.
Sint16x4 = 11,
/// Two unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec2<f32>` in shaders.
Unorm16x2 = 12,
/// Four unsigned shorts (u16). [0, 65535] converted to float [0, 1] `vec4<f32>` in shaders.
Unorm16x4 = 13,
/// Two signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec2<f32>` in shaders.
Snorm16x2 = 14,
/// Four signed shorts (i16). [-32767, 32767] converted to float [-1, 1] `vec4<f32>` in shaders.
Snorm16x4 = 15,
/// Two half-precision floats (no Rust equiv). `vec2<f32>` in shaders.
Float16x2 = 16,
/// Four half-precision floats (no Rust equiv). `vec4<f32>` in shaders.
Float16x4 = 17,
/// One single-precision float (f32). `f32` in shaders.
Float32 = 18,
/// Two single-precision floats (f32). `vec2<f32>` in shaders.
Float32x2 = 19,
/// Three single-precision floats (f32). `vec3<f32>` in shaders.
Float32x3 = 20,
/// Four single-precision floats (f32). `vec4<f32>` in shaders.
Float32x4 = 21,
/// One unsigned int (u32). `u32` in shaders.
Uint32 = 22,
/// Two unsigned ints (u32). `vec2<u32>` in shaders.
Uint32x2 = 23,
/// Three unsigned ints (u32). `vec3<u32>` in shaders.
Uint32x3 = 24,
/// Four unsigned ints (u32). `vec4<u32>` in shaders.
Uint32x4 = 25,
/// One signed int (i32). `i32` in shaders.
Sint32 = 26,
/// Two signed ints (i32). `vec2<i32>` in shaders.
Sint32x2 = 27,
/// Three signed ints (i32). `vec3<i32>` in shaders.
Sint32x3 = 28,
/// Four signed ints (i32). `vec4<i32>` in shaders.
Sint32x4 = 29,
/// Three unsigned 10-bit integers and one 2-bit integer, packed into a 32-bit integer (u32). [0, 1024] converted to float [0, 1] `vec4<f32>` in shaders.
#[cfg_attr(feature = "serde", serde(rename = "unorm10-10-10-2"))]
Unorm10_10_10_2 = 34,
}
/// A mapping of vertex buffers and their attributes to shader
/// locations.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
pub struct AttributeMapping {
/// Shader location associated with this attribute
pub shader_location: u32,
/// Offset in bytes from start of vertex buffer structure
pub offset: u32,
/// Format code to help us unpack the attribute into the type
/// used by the shader. Codes correspond to a 0-based index of
/// <https://gpuweb.github.io/gpuweb/#enumdef-gpuvertexformat>.
/// The conversion process is described by
/// <https://gpuweb.github.io/gpuweb/#vertex-processing>.
pub format: VertexFormat,
}
/// A description of a vertex buffer with all the information we
/// need to address the attributes within it.
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))]
#[cfg_attr(feature = "deserialize", derive(serde::Deserialize))]
pub struct VertexBufferMapping {
/// Shader location associated with this buffer
pub id: u32,
/// Size of the structure in bytes
pub stride: u32,
/// True if the buffer is indexed by vertex, false if indexed
/// by instance.
pub indexed_by_vertex: bool,
/// Vec of the attributes within the structure
pub attributes: Vec<AttributeMapping>,
}
/// A subset of options that are meant to be changed per pipeline. /// A subset of options that are meant to be changed per pipeline.
#[derive(Debug, Default, Clone)] #[derive(Debug, Default, Clone)]
#[cfg_attr(feature = "serialize", derive(serde::Serialize))] #[cfg_attr(feature = "serialize", derive(serde::Serialize))]
@ -234,6 +341,17 @@ pub struct PipelineOptions {
/// ///
/// Enable this for vertex shaders with point primitive topologies. /// Enable this for vertex shaders with point primitive topologies.
pub allow_and_force_point_size: bool, pub allow_and_force_point_size: bool,
/// If set, when generating the Metal vertex shader, transform it
/// to receive the vertex buffers, lengths, and vertex id as args,
/// and bounds-check the vertex id and use the index into the
/// vertex buffers to access attributes, rather than using Metal's
/// [[stage-in]] assembled attribute data.
pub vertex_pulling_transform: bool,
/// vertex_buffer_mappings are used during shader translation to
/// support vertex pulling.
pub vertex_buffer_mappings: Vec<VertexBufferMapping>,
} }
impl Options { impl Options {

File diff suppressed because it is too large Load Diff

View File

@ -27,5 +27,7 @@
), ),
msl_pipeline: ( msl_pipeline: (
allow_and_force_point_size: true, allow_and_force_point_size: true,
vertex_pulling_transform: false,
vertex_buffer_mappings: [],
), ),
) )

View File

@ -0,0 +1,31 @@
(
msl_pipeline: (
allow_and_force_point_size: false,
vertex_pulling_transform: true,
vertex_buffer_mappings: [(
id: 1,
stride: 20,
indexed_by_vertex: true,
attributes: [(
shader_location: 0, // position
offset: 0,
format: Float32, // too small, inflated to a vec4
),
(
shader_location: 1, // normal
offset: 4,
format: Float32x4, // too big, truncated to a vec3
)],
),
(
id: 2,
stride: 16,
indexed_by_vertex: false,
attributes: [(
shader_location: 2, // texcoord
offset: 0,
format: Float32x2,
)],
)],
),
)

View File

@ -0,0 +1,32 @@
struct VertexOutput {
@builtin(position) position: vec4<f32>,
@location(0) color: vec4<f32>,
@location(1) texcoord: vec2<f32>,
}
struct VertexInput {
@location(0) position: vec4<f32>,
@location(1) normal: vec3<f32>,
@location(2) texcoord: vec2<f32>,
}
@group(0) @binding(0) var<uniform> mvp_matrix: mat4x4<f32>;
@vertex
fn render_vertex(
v_in: VertexInput,
@builtin(vertex_index) v_existing_id: u32,
) -> VertexOutput
{
var v_out: VertexOutput;
v_out.position = v_in.position * mvp_matrix;
v_out.color = do_lighting(v_in.position,
v_in.normal);
v_out.texcoord = v_in.texcoord;
return v_out;
}
fn do_lighting(position: vec4<f32>, normal: vec3<f32>) -> vec4<f32> {
// blah blah blah
return vec4<f32>(0);
}

View File

@ -0,0 +1,76 @@
// language: metal1.0
#include <metal_stdlib>
#include <simd/simd.h>
using metal::uint;
struct _mslBufferSizes {
uint buffer_size1;
uint buffer_size2;
};
struct VertexOutput {
metal::float4 position;
metal::float4 color;
metal::float2 texcoord;
};
struct VertexInput {
metal::float4 position;
metal::float3 normal;
metal::float2 texcoord;
};
float unpackFloat32_(uint b0, uint b1, uint b2, uint b3) {
return as_type<float>(b3 << 24 | b2 << 16 | b1 << 8 | b0);
}
metal::float4 unpackFloat32x4_(uint b0, uint b1, uint b2, uint b3, uint b4, uint b5, uint b6, uint b7, uint b8, uint b9, uint b10, uint b11, uint b12, uint b13, uint b14, uint b15) {
return metal::float4(as_type<float>(b3 << 24 | b2 << 16 | b1 << 8 | b0), as_type<float>(b7 << 24 | b6 << 16 | b5 << 8 | b4), as_type<float>(b11 << 24 | b10 << 16 | b9 << 8 | b8), as_type<float>(b15 << 24 | b14 << 16 | b13 << 8 | b12));
}
metal::float2 unpackFloat32x2_(uint b0, uint b1, uint b2, uint b3, uint b4, uint b5, uint b6, uint b7) {
return metal::float2(as_type<float>(b3 << 24 | b2 << 16 | b1 << 8 | b0), as_type<float>(b7 << 24 | b6 << 16 | b5 << 8 | b4));
}
metal::float4 do_lighting(
metal::float4 position,
metal::float3 normal
) {
return metal::float4(0.0);
}
struct render_vertexOutput {
metal::float4 position [[position]];
metal::float4 color [[user(loc0), center_perspective]];
metal::float2 texcoord [[user(loc1), center_perspective]];
};
struct vb_1_type { metal::uchar data[20]; };
struct vb_2_type { metal::uchar data[16]; };
vertex render_vertexOutput render_vertex(
uint v_existing_id [[vertex_id]]
, constant metal::float4x4& mvp_matrix [[user(fake0)]]
, uint i_id [[instance_id]]
, const device vb_1_type* vb_1_in [[buffer(1)]]
, const device vb_2_type* vb_2_in [[buffer(2)]]
, constant _mslBufferSizes& _buffer_sizes [[user(fake0)]]
) {
metal::float4 position_1 = {};
metal::float3 normal_1 = {};
if (v_existing_id < (_buffer_sizes.buffer_size1 / 20)) {
const vb_1_type vb_1_elem = vb_1_in[v_existing_id];
position_1 = metal::float4(unpackFloat32_(vb_1_elem.data[0], vb_1_elem.data[1], vb_1_elem.data[2], vb_1_elem.data[3]), 0.0, 0.0, 1.0);
normal_1 = metal::float3(unpackFloat32x4_(vb_1_elem.data[4], vb_1_elem.data[5], vb_1_elem.data[6], vb_1_elem.data[7], vb_1_elem.data[8], vb_1_elem.data[9], vb_1_elem.data[10], vb_1_elem.data[11], vb_1_elem.data[12], vb_1_elem.data[13], vb_1_elem.data[14], vb_1_elem.data[15], vb_1_elem.data[16], vb_1_elem.data[17], vb_1_elem.data[18], vb_1_elem.data[19]));
}
metal::float2 texcoord = {};
if (i_id < (_buffer_sizes.buffer_size2 / 16)) {
const vb_2_type vb_2_elem = vb_2_in[i_id];
texcoord = unpackFloat32x2_(vb_2_elem.data[0], vb_2_elem.data[1], vb_2_elem.data[2], vb_2_elem.data[3], vb_2_elem.data[4], vb_2_elem.data[5], vb_2_elem.data[6], vb_2_elem.data[7]);
}
const VertexInput v_in = { position_1, normal_1, texcoord };
VertexOutput v_out = {};
metal::float4x4 _e6 = mvp_matrix;
v_out.position = v_in.position * _e6;
metal::float4 _e11 = do_lighting(v_in.position, v_in.normal);
v_out.color = _e11;
v_out.texcoord = v_in.texcoord;
VertexOutput _e14 = v_out;
const auto _tmp = _e14;
return render_vertexOutput { _tmp.position, _tmp.color, _tmp.texcoord };
}

View File

@ -890,6 +890,7 @@ fn convert_wgsl() {
"overrides-ray-query", "overrides-ray-query",
Targets::IR | Targets::SPIRV | Targets::METAL, Targets::IR | Targets::SPIRV | Targets::METAL,
), ),
("vertex-pulling-transform", Targets::METAL),
]; ];
for &(name, targets) in inputs.iter() { for &(name, targets) in inputs.iter() {

View File

@ -59,6 +59,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
), ),
), ),

View File

@ -32,6 +32,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
), ),
), ),

View File

@ -60,6 +60,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
buffers: [], buffers: [],
), ),
@ -69,6 +70,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
targets: [ targets: [
Some(( Some((

View File

@ -136,6 +136,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
), ),
), ),

View File

@ -137,6 +137,7 @@
entry_point: None, entry_point: None,
constants: {}, constants: {},
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
), ),
), ),
), ),

View File

@ -185,6 +185,7 @@ struct Test {
id_source: IdSource, id_source: IdSource,
draw_call_kind: DrawCallKind, draw_call_kind: DrawCallKind,
encoder_kind: EncoderKind, encoder_kind: EncoderKind,
vertex_pulling_transform: bool,
} }
impl Test { impl Test {
@ -298,6 +299,16 @@ async fn vertex_index_common(ctx: TestingContext) {
cache: None, cache: None,
}; };
let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc); let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = true;
let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = false;
pipeline_desc.vertex.entry_point = "vs_main_buffers"; pipeline_desc.vertex.entry_point = "vs_main_buffers";
pipeline_desc.vertex.buffers = &[ pipeline_desc.vertex.buffers = &[
wgpu::VertexBufferLayout { wgpu::VertexBufferLayout {
@ -312,6 +323,15 @@ async fn vertex_index_common(ctx: TestingContext) {
}, },
]; ];
let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc); let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = true;
let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = false;
let dummy = ctx let dummy = ctx
.device .device
@ -336,17 +356,20 @@ async fn vertex_index_common(ctx: TestingContext) {
) )
.create_view(&wgpu::TextureViewDescriptor::default()); .create_view(&wgpu::TextureViewDescriptor::default());
let mut tests = Vec::with_capacity(5 * 2 * 2); let mut tests = Vec::with_capacity(5 * 2 * 2 * 2);
for case in TestCase::ARRAY { for case in TestCase::ARRAY {
for id_source in IdSource::ARRAY { for id_source in IdSource::ARRAY {
for draw_call_kind in DrawCallKind::ARRAY { for draw_call_kind in DrawCallKind::ARRAY {
for encoder_kind in EncoderKind::ARRAY { for encoder_kind in EncoderKind::ARRAY {
tests.push(Test { for vertex_pulling_transform in [false, true] {
case, tests.push(Test {
id_source, case,
draw_call_kind, id_source,
encoder_kind, draw_call_kind,
}) encoder_kind,
vertex_pulling_transform,
})
}
} }
} }
} }
@ -357,8 +380,20 @@ async fn vertex_index_common(ctx: TestingContext) {
let mut failed = false; let mut failed = false;
for test in tests { for test in tests {
let pipeline = match test.id_source { let pipeline = match test.id_source {
IdSource::Buffers => &buffer_pipeline, IdSource::Buffers => {
IdSource::Builtins => &builtin_pipeline, if test.vertex_pulling_transform {
&buffer_pipeline_vpt
} else {
&buffer_pipeline
}
}
IdSource::Builtins => {
if test.vertex_pulling_transform {
&builtin_pipeline_vpt
} else {
&builtin_pipeline
}
}
}; };
let expected = test.expectation(&ctx); let expected = test.expectation(&ctx);

View File

@ -2737,6 +2737,7 @@ impl<A: HalApi> Device<A> {
entry_point: final_entry_point_name.as_ref(), entry_point: final_entry_point_name.as_ref(),
constants: desc.stage.constants.as_ref(), constants: desc.stage.constants.as_ref(),
zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
}, },
cache: cache.as_ref().and_then(|it| it.raw.as_ref()), cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
}; };
@ -3165,6 +3166,7 @@ impl<A: HalApi> Device<A> {
entry_point: &vertex_entry_point_name, entry_point: &vertex_entry_point_name,
constants: stage_desc.constants.as_ref(), constants: stage_desc.constants.as_ref(),
zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
vertex_pulling_transform: stage_desc.vertex_pulling_transform,
} }
}; };
@ -3228,6 +3230,7 @@ impl<A: HalApi> Device<A> {
zero_initialize_workgroup_memory: fragment_state zero_initialize_workgroup_memory: fragment_state
.stage .stage
.zero_initialize_workgroup_memory, .zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
}) })
} }
None => None, None => None,

View File

@ -166,6 +166,8 @@ pub struct ProgrammableStageDescriptor<'a> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided /// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications /// for cross-platform applications
pub zero_initialize_workgroup_memory: bool, pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
} }
/// Number of implicit bind groups derived at pipeline creation. /// Number of implicit bind groups derived at pipeline creation.

View File

@ -254,6 +254,7 @@ impl<A: hal::Api> Example<A> {
entry_point: "vs_main", entry_point: "vs_main",
constants: &constants, constants: &constants,
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}, },
vertex_buffers: &[], vertex_buffers: &[],
fragment_stage: Some(hal::ProgrammableStage { fragment_stage: Some(hal::ProgrammableStage {
@ -261,6 +262,7 @@ impl<A: hal::Api> Example<A> {
entry_point: "fs_main", entry_point: "fs_main",
constants: &constants, constants: &constants,
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}), }),
primitive: wgt::PrimitiveState { primitive: wgt::PrimitiveState {
topology: wgt::PrimitiveTopology::TriangleStrip, topology: wgt::PrimitiveTopology::TriangleStrip,

View File

@ -373,6 +373,7 @@ impl<A: hal::Api> Example<A> {
entry_point: "main", entry_point: "main",
constants: &Default::default(), constants: &Default::default(),
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}, },
cache: None, cache: None,
}) })

View File

@ -1714,6 +1714,8 @@ pub struct ProgrammableStage<'a, A: Api> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided /// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications /// for cross-platform applications
pub zero_initialize_workgroup_memory: bool, pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
} }
// Rust gets confused about the impl requirements for `A` // Rust gets confused about the impl requirements for `A`
@ -1724,6 +1726,7 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
entry_point: self.entry_point, entry_point: self.entry_point,
constants: self.constants, constants: self.constants,
zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
vertex_pulling_transform: self.vertex_pulling_transform,
} }
} }
} }

View File

@ -16,6 +16,7 @@ impl Default for super::CommandState {
raw_wg_size: metal::MTLSize::new(0, 0, 0), raw_wg_size: metal::MTLSize::new(0, 0, 0),
stage_infos: Default::default(), stage_infos: Default::default(),
storage_buffer_length_map: Default::default(), storage_buffer_length_map: Default::default(),
vertex_buffer_size_map: Default::default(),
work_group_memory_sizes: Vec::new(), work_group_memory_sizes: Vec::new(),
push_constants: Vec::new(), push_constants: Vec::new(),
pending_timer_queries: Vec::new(), pending_timer_queries: Vec::new(),
@ -137,6 +138,7 @@ impl super::CommandEncoder {
impl super::CommandState { impl super::CommandState {
fn reset(&mut self) { fn reset(&mut self) {
self.storage_buffer_length_map.clear(); self.storage_buffer_length_map.clear();
self.vertex_buffer_size_map.clear();
self.stage_infos.vs.clear(); self.stage_infos.vs.clear();
self.stage_infos.fs.clear(); self.stage_infos.fs.clear();
self.stage_infos.cs.clear(); self.stage_infos.cs.clear();
@ -160,6 +162,15 @@ impl super::CommandState {
.unwrap_or_default() .unwrap_or_default()
})); }));
// Extend with the sizes of the mapped vertex buffers, in the order
// they were added to the map.
result_sizes.extend(stage_info.vertex_buffer_mappings.iter().map(|vbm| {
self.vertex_buffer_size_map
.get(&(vbm.id as u64))
.map(|size| u32::try_from(size.get()).unwrap_or(u32::MAX))
.unwrap_or_default()
}));
if !result_sizes.is_empty() { if !result_sizes.is_empty() {
Some((slot as _, result_sizes)) Some((slot as _, result_sizes))
} else { } else {
@ -927,6 +938,27 @@ impl crate::CommandEncoder for super::CommandEncoder {
let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64; let buffer_index = self.shared.private_caps.max_vertex_buffers as u64 - 1 - index as u64;
let encoder = self.state.render.as_ref().unwrap(); let encoder = self.state.render.as_ref().unwrap();
encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset); encoder.set_vertex_buffer(buffer_index, Some(&binding.buffer.raw), binding.offset);
let buffer_size = binding.resolve_size();
if buffer_size > 0 {
self.state.vertex_buffer_size_map.insert(
buffer_index,
std::num::NonZeroU64::new(buffer_size).unwrap(),
);
} else {
self.state.vertex_buffer_size_map.remove(&buffer_index);
}
if let Some((index, sizes)) = self
.state
.make_sizes_buffer_update(naga::ShaderStage::Vertex, &mut self.temp.binding_sizes)
{
encoder.set_vertex_bytes(
index as _,
(sizes.len() * WORD_SIZE) as u64,
sizes.as_ptr() as _,
);
}
} }
unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) { unsafe fn set_viewport(&mut self, rect: &crate::Rect<f32>, depth_range: Range<f32>) {

View File

@ -59,10 +59,48 @@ fn create_depth_stencil_desc(state: &wgt::DepthStencilState) -> metal::DepthSten
desc desc
} }
const fn convert_vertex_format_to_naga(format: wgt::VertexFormat) -> naga::back::msl::VertexFormat {
match format {
wgt::VertexFormat::Uint8x2 => naga::back::msl::VertexFormat::Uint8x2,
wgt::VertexFormat::Uint8x4 => naga::back::msl::VertexFormat::Uint8x4,
wgt::VertexFormat::Sint8x2 => naga::back::msl::VertexFormat::Sint8x2,
wgt::VertexFormat::Sint8x4 => naga::back::msl::VertexFormat::Sint8x4,
wgt::VertexFormat::Unorm8x2 => naga::back::msl::VertexFormat::Unorm8x2,
wgt::VertexFormat::Unorm8x4 => naga::back::msl::VertexFormat::Unorm8x4,
wgt::VertexFormat::Snorm8x2 => naga::back::msl::VertexFormat::Snorm8x2,
wgt::VertexFormat::Snorm8x4 => naga::back::msl::VertexFormat::Snorm8x4,
wgt::VertexFormat::Uint16x2 => naga::back::msl::VertexFormat::Uint16x2,
wgt::VertexFormat::Uint16x4 => naga::back::msl::VertexFormat::Uint16x4,
wgt::VertexFormat::Sint16x2 => naga::back::msl::VertexFormat::Sint16x2,
wgt::VertexFormat::Sint16x4 => naga::back::msl::VertexFormat::Sint16x4,
wgt::VertexFormat::Unorm16x2 => naga::back::msl::VertexFormat::Unorm16x2,
wgt::VertexFormat::Unorm16x4 => naga::back::msl::VertexFormat::Unorm16x4,
wgt::VertexFormat::Snorm16x2 => naga::back::msl::VertexFormat::Snorm16x2,
wgt::VertexFormat::Snorm16x4 => naga::back::msl::VertexFormat::Snorm16x4,
wgt::VertexFormat::Float16x2 => naga::back::msl::VertexFormat::Float16x2,
wgt::VertexFormat::Float16x4 => naga::back::msl::VertexFormat::Float16x4,
wgt::VertexFormat::Float32 => naga::back::msl::VertexFormat::Float32,
wgt::VertexFormat::Float32x2 => naga::back::msl::VertexFormat::Float32x2,
wgt::VertexFormat::Float32x3 => naga::back::msl::VertexFormat::Float32x3,
wgt::VertexFormat::Float32x4 => naga::back::msl::VertexFormat::Float32x4,
wgt::VertexFormat::Uint32 => naga::back::msl::VertexFormat::Uint32,
wgt::VertexFormat::Uint32x2 => naga::back::msl::VertexFormat::Uint32x2,
wgt::VertexFormat::Uint32x3 => naga::back::msl::VertexFormat::Uint32x3,
wgt::VertexFormat::Uint32x4 => naga::back::msl::VertexFormat::Uint32x4,
wgt::VertexFormat::Sint32 => naga::back::msl::VertexFormat::Sint32,
wgt::VertexFormat::Sint32x2 => naga::back::msl::VertexFormat::Sint32x2,
wgt::VertexFormat::Sint32x3 => naga::back::msl::VertexFormat::Sint32x3,
wgt::VertexFormat::Sint32x4 => naga::back::msl::VertexFormat::Sint32x4,
wgt::VertexFormat::Unorm10_10_10_2 => naga::back::msl::VertexFormat::Unorm10_10_10_2,
_ => unimplemented!(),
}
}
impl super::Device { impl super::Device {
fn load_shader( fn load_shader(
&self, &self,
stage: &crate::ProgrammableStage<super::Api>, stage: &crate::ProgrammableStage<super::Api>,
vertex_buffer_mappings: &[naga::back::msl::VertexBufferMapping],
layout: &super::PipelineLayout, layout: &super::PipelineLayout,
primitive_class: metal::MTLPrimitiveTopologyClass, primitive_class: metal::MTLPrimitiveTopologyClass,
naga_stage: naga::ShaderStage, naga_stage: naga::ShaderStage,
@ -120,6 +158,8 @@ impl super::Device {
metal::MTLPrimitiveTopologyClass::Point => true, metal::MTLPrimitiveTopologyClass::Point => true,
_ => false, _ => false,
}, },
vertex_pulling_transform: stage.vertex_pulling_transform,
vertex_buffer_mappings: vertex_buffer_mappings.to_vec(),
}; };
let (source, info) = let (source, info) =
@ -548,7 +588,7 @@ impl crate::Device for super::Device {
pc_buffer: Option<super::ResourceIndex>, pc_buffer: Option<super::ResourceIndex>,
pc_limit: u32, pc_limit: u32,
sizes_buffer: Option<super::ResourceIndex>, sizes_buffer: Option<super::ResourceIndex>,
sizes_count: u8, need_sizes_buffer: bool,
resources: naga::back::msl::BindingMap, resources: naga::back::msl::BindingMap,
} }
@ -558,7 +598,7 @@ impl crate::Device for super::Device {
pc_buffer: None, pc_buffer: None,
pc_limit: 0, pc_limit: 0,
sizes_buffer: None, sizes_buffer: None,
sizes_count: 0, need_sizes_buffer: false,
resources: Default::default(), resources: Default::default(),
}); });
let mut bind_group_infos = arrayvec::ArrayVec::new(); let mut bind_group_infos = arrayvec::ArrayVec::new();
@ -603,7 +643,7 @@ impl crate::Device for super::Device {
{ {
for info in stage_data.iter_mut() { for info in stage_data.iter_mut() {
if entry.visibility.contains(map_naga_stage(info.stage)) { if entry.visibility.contains(map_naga_stage(info.stage)) {
info.sizes_count += 1; info.need_sizes_buffer = true;
} }
} }
} }
@ -661,11 +701,13 @@ impl crate::Device for super::Device {
// Finally, make sure we fit the limits // Finally, make sure we fit the limits
for info in stage_data.iter_mut() { for info in stage_data.iter_mut() {
// handle the sizes buffer assignment and shader overrides if info.need_sizes_buffer || info.stage == naga::ShaderStage::Vertex {
if info.sizes_count != 0 { // Set aside space for the sizes_buffer, which is required
// for variable-length buffers, or to support vertex pulling.
info.sizes_buffer = Some(info.counters.buffers); info.sizes_buffer = Some(info.counters.buffers);
info.counters.buffers += 1; info.counters.buffers += 1;
} }
if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage if info.counters.buffers > self.shared.private_caps.max_buffers_per_stage
|| info.counters.textures > self.shared.private_caps.max_textures_per_stage || info.counters.textures > self.shared.private_caps.max_textures_per_stage
|| info.counters.samplers > self.shared.private_caps.max_samplers_per_stage || info.counters.samplers > self.shared.private_caps.max_samplers_per_stage
@ -832,8 +874,38 @@ impl crate::Device for super::Device {
// Vertex shader // Vertex shader
let (vs_lib, vs_info) = { let (vs_lib, vs_info) = {
let mut vertex_buffer_mappings = Vec::<naga::back::msl::VertexBufferMapping>::new();
for (i, vbl) in desc.vertex_buffers.iter().enumerate() {
let mut attributes = Vec::<naga::back::msl::AttributeMapping>::new();
for attribute in vbl.attributes.iter() {
attributes.push(naga::back::msl::AttributeMapping {
shader_location: attribute.shader_location,
offset: attribute.offset as u32,
format: convert_vertex_format_to_naga(attribute.format),
});
}
vertex_buffer_mappings.push(naga::back::msl::VertexBufferMapping {
id: self.shared.private_caps.max_vertex_buffers - 1 - i as u32,
stride: if vbl.array_stride > 0 {
vbl.array_stride.try_into().unwrap()
} else {
vbl.attributes
.iter()
.map(|attribute| attribute.offset + attribute.format.size())
.max()
.unwrap_or(0)
.try_into()
.unwrap()
},
indexed_by_vertex: (vbl.step_mode == wgt::VertexStepMode::Vertex {}),
attributes,
});
}
let vs = self.load_shader( let vs = self.load_shader(
&desc.vertex_stage, &desc.vertex_stage,
&vertex_buffer_mappings,
desc.layout, desc.layout,
primitive_class, primitive_class,
naga::ShaderStage::Vertex, naga::ShaderStage::Vertex,
@ -851,6 +923,7 @@ impl crate::Device for super::Device {
push_constants: desc.layout.push_constants_infos.vs, push_constants: desc.layout.push_constants_infos.vs,
sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer, sizes_slot: desc.layout.per_stage_map.vs.sizes_buffer,
sized_bindings: vs.sized_bindings, sized_bindings: vs.sized_bindings,
vertex_buffer_mappings,
}; };
(vs.library, info) (vs.library, info)
@ -861,6 +934,7 @@ impl crate::Device for super::Device {
Some(ref stage) => { Some(ref stage) => {
let fs = self.load_shader( let fs = self.load_shader(
stage, stage,
&[],
desc.layout, desc.layout,
primitive_class, primitive_class,
naga::ShaderStage::Fragment, naga::ShaderStage::Fragment,
@ -878,6 +952,7 @@ impl crate::Device for super::Device {
push_constants: desc.layout.push_constants_infos.fs, push_constants: desc.layout.push_constants_infos.fs,
sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer, sizes_slot: desc.layout.per_stage_map.fs.sizes_buffer,
sized_bindings: fs.sized_bindings, sized_bindings: fs.sized_bindings,
vertex_buffer_mappings: vec![],
}; };
(Some(fs.library), Some(info)) (Some(fs.library), Some(info))
@ -1053,6 +1128,7 @@ impl crate::Device for super::Device {
let cs = self.load_shader( let cs = self.load_shader(
&desc.stage, &desc.stage,
&[],
desc.layout, desc.layout,
metal::MTLPrimitiveTopologyClass::Unspecified, metal::MTLPrimitiveTopologyClass::Unspecified,
naga::ShaderStage::Compute, naga::ShaderStage::Compute,
@ -1070,6 +1146,7 @@ impl crate::Device for super::Device {
push_constants: desc.layout.push_constants_infos.cs, push_constants: desc.layout.push_constants_infos.cs,
sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer, sizes_slot: desc.layout.per_stage_map.cs.sizes_buffer,
sized_bindings: cs.sized_bindings, sized_bindings: cs.sized_bindings,
vertex_buffer_mappings: vec![],
}; };
if let Some(name) = desc.label { if let Some(name) = desc.label {

View File

@ -466,6 +466,15 @@ impl Buffer {
} }
} }
impl crate::BufferBinding<'_, Api> {
fn resolve_size(&self) -> wgt::BufferAddress {
match self.size {
Some(size) => size.get(),
None => self.buffer.size - self.offset,
}
}
}
#[derive(Debug)] #[derive(Debug)]
pub struct Texture { pub struct Texture {
raw: metal::Texture, raw: metal::Texture,
@ -690,6 +699,9 @@ struct PipelineStageInfo {
/// ///
/// See `device::CompiledShader::sized_bindings` for more details. /// See `device::CompiledShader::sized_bindings` for more details.
sized_bindings: Vec<naga::ResourceBinding>, sized_bindings: Vec<naga::ResourceBinding>,
/// Info on all bound vertex buffers.
vertex_buffer_mappings: Vec<naga::back::msl::VertexBufferMapping>,
} }
impl PipelineStageInfo { impl PipelineStageInfo {
@ -697,6 +709,7 @@ impl PipelineStageInfo {
self.push_constants = None; self.push_constants = None;
self.sizes_slot = None; self.sizes_slot = None;
self.sized_bindings.clear(); self.sized_bindings.clear();
self.vertex_buffer_mappings.clear();
} }
fn assign_from(&mut self, other: &Self) { fn assign_from(&mut self, other: &Self) {
@ -704,6 +717,9 @@ impl PipelineStageInfo {
self.sizes_slot = other.sizes_slot; self.sizes_slot = other.sizes_slot;
self.sized_bindings.clear(); self.sized_bindings.clear();
self.sized_bindings.extend_from_slice(&other.sized_bindings); self.sized_bindings.extend_from_slice(&other.sized_bindings);
self.vertex_buffer_mappings.clear();
self.vertex_buffer_mappings
.extend_from_slice(&other.vertex_buffer_mappings);
} }
} }
@ -821,6 +837,8 @@ struct CommandState {
/// [`ResourceBinding`]: naga::ResourceBinding /// [`ResourceBinding`]: naga::ResourceBinding
storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>, storage_buffer_length_map: rustc_hash::FxHashMap<naga::ResourceBinding, wgt::BufferSize>,
vertex_buffer_size_map: rustc_hash::FxHashMap<u64, wgt::BufferSize>,
work_group_memory_sizes: Vec<u32>, work_group_memory_sizes: Vec<u32>,
push_constants: Vec<u32>, push_constants: Vec<u32>,

View File

@ -1189,6 +1189,10 @@ impl crate::Context for ContextWgpuCore {
.vertex .vertex
.compilation_options .compilation_options
.zero_initialize_workgroup_memory, .zero_initialize_workgroup_memory,
vertex_pulling_transform: desc
.vertex
.compilation_options
.vertex_pulling_transform,
}, },
buffers: Borrowed(&vertex_buffers), buffers: Borrowed(&vertex_buffers),
}, },
@ -1203,6 +1207,7 @@ impl crate::Context for ContextWgpuCore {
zero_initialize_workgroup_memory: frag zero_initialize_workgroup_memory: frag
.compilation_options .compilation_options
.zero_initialize_workgroup_memory, .zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
}, },
targets: Borrowed(frag.targets), targets: Borrowed(frag.targets),
}), }),
@ -1256,6 +1261,7 @@ impl crate::Context for ContextWgpuCore {
zero_initialize_workgroup_memory: desc zero_initialize_workgroup_memory: desc
.compilation_options .compilation_options
.zero_initialize_workgroup_memory, .zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
}, },
cache: desc.cache.map(|c| c.id.into()), cache: desc.cache.map(|c| c.id.into()),
}; };

View File

@ -1987,6 +1987,8 @@ pub struct PipelineCompilationOptions<'a> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided /// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications /// for cross-platform applications
pub zero_initialize_workgroup_memory: bool, pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
} }
impl<'a> Default for PipelineCompilationOptions<'a> { impl<'a> Default for PipelineCompilationOptions<'a> {
@ -2000,6 +2002,7 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
Self { Self {
constants, constants,
zero_initialize_workgroup_memory: true, zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
} }
} }
} }