Remove vertex_pulling_transfrom from PipelineCompilationOptions.

This option was only evaluated for Metal backends, and now it's required
there so the option is going away. It is still configurable for tests
via the PipelineOptions struct, deserialized from .ron files.

This also fixes some type problems with the unpack functions in
writer.rs. Metal << operator extends operand to int-sized, which then
has to be cast back down to the real size before as_type bit conversion.
The math for the snorm values is corrected, in some cases using the
metal unpack_snorm2x16_to_float function because we can't directly
cast a bit-shifted ushort value to half.
This commit is contained in:
Brad Werth 2024-06-04 10:11:03 -07:00 committed by Teodor Tanasoaia
parent bc7622f641
commit 6cd387412f
17 changed files with 756 additions and 105 deletions

View File

@ -112,7 +112,6 @@ pub fn op_webgpu_create_compute_pipeline(
entry_point: compute.entry_point.map(Cow::from),
constants: Cow::Owned(compute.constants.unwrap_or_default()),
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
},
cache: None,
};
@ -348,7 +347,6 @@ pub fn op_webgpu_create_render_pipeline(
constants: Cow::Owned(fragment.constants.unwrap_or_default()),
// Required to be true for WebGPU
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
},
targets: Cow::Owned(fragment.targets),
})
@ -374,7 +372,6 @@ pub fn op_webgpu_create_render_pipeline(
constants: Cow::Owned(args.vertex.constants.unwrap_or_default()),
// Required to be true for WebGPU
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
},
buffers: Cow::Owned(vertex_buffers),
},

View File

@ -81,6 +81,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md).
- Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop**
- Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth**
- Fixup some generated MSL for vertex buffer unpack functions. ([#5829](https://github.com/gfx-rs/wgpu/pull/5829)) **@bradwerth**
- Make vertex pulling transform on by default. ([#5773](https://github.com/gfx-rs/wgpu/pull/5773)) **@bradwerth**
#### GLSL-OUT

View File

@ -354,7 +354,9 @@ pub struct PipelineOptions {
/// to receive the vertex buffers, lengths, and vertex id as args,
/// and bounds-check the vertex id and use the index into the
/// vertex buffers to access attributes, rather than using Metal's
/// [[stage-in]] assembled attribute data.
/// [[stage-in]] assembled attribute data. This is true by default,
/// but remains configurable for use by tests via deserialization
/// of this struct. There is no user-facing way to set this value.
pub vertex_pulling_transform: bool,
/// vertex_buffer_mappings are used during shader translation to

View File

@ -3953,8 +3953,8 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::float2((float(b0) - 128.0f) / 255.0f, \
(float(b1) - 128.0f) / 255.0f);",
"{}return metal::float2(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
metal::max(-1.0f, as_type<char>(b1) / 127.0f));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -3971,10 +3971,10 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::float4((float(b0) - 128.0f) / 255.0f, \
(float(b1) - 128.0f) / 255.0f, \
(float(b2) - 128.0f) / 255.0f, \
(float(b3) - 128.0f) / 255.0f);",
"{}return metal::float4(metal::max(-1.0f, as_type<char>(b0) / 127.0f), \
metal::max(-1.0f, as_type<char>(b1) / 127.0f), \
metal::max(-1.0f, as_type<char>(b2) / 127.0f), \
metal::max(-1.0f, as_type<char>(b3) / 127.0f));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4033,8 +4033,8 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::int2(as_type<metal::short>(b1 << 8 | b0), \
as_type<metal::short>(b3 << 8 | b2));",
"{}return metal::int2(as_type<short>(metal::ushort(b1 << 8 | b0)), \
as_type<short>(metal::ushort(b3 << 8 | b2)));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4055,10 +4055,10 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::int4(as_type<metal::short>(b1 << 8 | b0), \
as_type<metal::short>(b3 << 8 | b2), \
as_type<metal::short>(b5 << 8 | b4), \
as_type<metal::short>(b7 << 8 | b6));",
"{}return metal::int4(as_type<short>(metal::ushort(b1 << 8 | b0)), \
as_type<short>(metal::ushort(b3 << 8 | b2)), \
as_type<short>(metal::ushort(b5 << 8 | b4)), \
as_type<short>(metal::ushort(b7 << 8 | b6)));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4117,8 +4117,7 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::float2((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
(float(b3 << 8 | b2) - 32767.0f) / 65535.0f);",
"{}return metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2);",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4139,10 +4138,8 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::float4((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \
(float(b3 << 8 | b2) - 32767.0f) / 65535.0f, \
(float(b5 << 8 | b4) - 32767.0f) / 65535.0f, \
(float(b7 << 8 | b6) - 32767.0f) / 65535.0f);",
"{}return metal::float4(metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2), \
metal::unpack_snorm2x16_to_float(b5 << 24 | b4 << 16 | b7 << 8 | b6));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4159,8 +4156,8 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::float2(as_type<metal::half>(b1 << 8 | b0), \
as_type<metal::half>(b3 << 8 | b2));",
"{}return metal::float2(as_type<half>(metal::ushort(b1 << 8 | b0)), \
as_type<half>(metal::ushort(b3 << 8 | b2)));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4170,7 +4167,7 @@ impl<W: Write> Writer<W> {
let name = self.namer.call("unpackFloat16x4");
writeln!(
self.out,
"metal::int4 {name}(metal::ushort b0, \
"metal::float4 {name}(metal::ushort b0, \
metal::ushort b1, \
metal::ushort b2, \
metal::ushort b3, \
@ -4181,10 +4178,10 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return metal::int4(as_type<metal::half>(b1 << 8 | b0), \
as_type<metal::half>(b3 << 8 | b2), \
as_type<metal::half>(b5 << 8 | b4), \
as_type<metal::half>(b7 << 8 | b6));",
"{}return metal::float4(as_type<half>(metal::ushort(b1 << 8 | b0)), \
as_type<half>(metal::ushort(b3 << 8 | b2)), \
as_type<half>(metal::ushort(b5 << 8 | b4)), \
as_type<half>(metal::ushort(b7 << 8 | b6)));",
back::INDENT
)?;
writeln!(self.out, "}}")?;
@ -4390,10 +4387,10 @@ impl<W: Write> Writer<W> {
let name = self.namer.call("unpackSint32");
writeln!(
self.out,
"metal::int {name}(uint b0, \
uint b1, \
uint b2, \
uint b3) {{"
"int {name}(uint b0, \
uint b1, \
uint b2, \
uint b3) {{"
)?;
writeln!(
self.out,
@ -4495,7 +4492,18 @@ impl<W: Write> Writer<W> {
)?;
writeln!(
self.out,
"{}return unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
// The following is correct for RGBA packing, but our format seems to
// match ABGR, which can be fed into the Metal builtin function
// unpack_unorm10a2_to_float.
/*
"{}uint v = (b3 << 24 | b2 << 16 | b1 << 8 | b0); \
uint r = (v & 0xFFC00000) >> 22; \
uint g = (v & 0x003FF000) >> 12; \
uint b = (v & 0x00000FFC) >> 2; \
uint a = (v & 0x00000003); \
return metal::float4(float(r) / 1023.0f, float(g) / 1023.0f, float(b) / 1023.0f, float(a) / 3.0f);",
*/
"{}return metal::unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);",
back::INDENT
)?;
writeln!(self.out, "}}")?;

View File

@ -42,6 +42,7 @@ mod subgroup_operations;
mod texture_bounds;
mod texture_view_creation;
mod transfer;
mod vertex_formats;
mod vertex_indices;
mod write_texture;
mod zero_init_texture_after_discard;

View File

@ -0,0 +1,316 @@
@group(0) @binding(0)
var<storage, read_write> checksums: array<f32>;
const index_uint = 0u;
const index_sint = 1u;
const index_unorm = 2u;
const index_snorm = 3u;
const index_float16 = 4u;
const index_float32 = 5u;
fn init_checksums() {
checksums[index_uint] = 0.0;
checksums[index_sint] = 0.0;
checksums[index_unorm] = 0.0;
checksums[index_snorm] = 0.0;
checksums[index_float16] = 0.0;
checksums[index_float32] = 0.0;
}
// Break down the 31 vertex formats specified at
// https://gpuweb.github.io/gpuweb/#vertex-formats into blocks
// of 8, to keep under the limits of max locations. Each
// AttributeBlockX structure will get a corresponding
// vertex_block_X function to process its attributes into
// values written to the checksums buffer.
struct AttributeBlock0 {
// 4-byte-aligned unorm formats
@location(0) unorm8x4: vec4<f32>,
@location(1) unorm16x2: vec2<f32>,
@location(2) unorm16x4: vec4<f32>,
// 4-byte-aligned snorm formats
@location(3) snorm8x4: vec4<f32>,
@location(4) snorm16x2: vec2<f32>,
@location(5) snorm16x4: vec4<f32>,
// 2-byte-aligned formats
@location(6) unorm8x2: vec2<f32>,
@location(7) snorm8x2: vec2<f32>,
}
@vertex
fn vertex_block_0(v_in: AttributeBlock0) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all unorm into one checksum value.
var all_unorm: f32 = 0.0;
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w);
checksums[index_unorm] = f32(all_unorm);
// Accumulate all snorm into one checksum value.
var all_snorm: f32 = 0.0;
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z);
all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w);
checksums[index_snorm] = f32(all_snorm);
return vec4(0.0);
}
struct AttributeBlock1 {
// 4-byte-aligned uint formats
@location(0) uint8x4: vec4<u32>,
@location(1) uint16x2: vec2<u32>,
@location(2) uint16x4: vec4<u32>,
// 4-byte-aligned sint formats
@location(3) sint8x4: vec4<i32>,
@location(4) sint16x2: vec2<i32>,
@location(5) sint16x4: vec4<i32>,
// 2-byte-aligned formats
@location(6) uint8x2: vec2<u32>,
@location(7) sint8x2: vec2<i32>,
}
@vertex
fn vertex_block_1(v_in: AttributeBlock1) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all uint into one checksum value.
var all_uint: u32 = 0;
all_uint = accumulate_uint(all_uint, v_in.uint8x2.x);
all_uint = accumulate_uint(all_uint, v_in.uint8x2.y);
all_uint = accumulate_uint(all_uint, v_in.uint8x4.x);
all_uint = accumulate_uint(all_uint, v_in.uint8x4.y);
all_uint = accumulate_uint(all_uint, v_in.uint8x4.z);
all_uint = accumulate_uint(all_uint, v_in.uint8x4.w);
all_uint = accumulate_uint(all_uint, v_in.uint16x2.x);
all_uint = accumulate_uint(all_uint, v_in.uint16x2.y);
all_uint = accumulate_uint(all_uint, v_in.uint16x4.x);
all_uint = accumulate_uint(all_uint, v_in.uint16x4.y);
all_uint = accumulate_uint(all_uint, v_in.uint16x4.z);
all_uint = accumulate_uint(all_uint, v_in.uint16x4.w);
checksums[index_uint] = f32(all_uint);
// Accumulate all sint into one checksum value.
var all_sint: i32 = 0;
all_sint = accumulate_sint(all_sint, v_in.sint8x2.x);
all_sint = accumulate_sint(all_sint, v_in.sint8x2.y);
all_sint = accumulate_sint(all_sint, v_in.sint8x4.x);
all_sint = accumulate_sint(all_sint, v_in.sint8x4.y);
all_sint = accumulate_sint(all_sint, v_in.sint8x4.z);
all_sint = accumulate_sint(all_sint, v_in.sint8x4.w);
all_sint = accumulate_sint(all_sint, v_in.sint16x2.x);
all_sint = accumulate_sint(all_sint, v_in.sint16x2.y);
all_sint = accumulate_sint(all_sint, v_in.sint16x4.x);
all_sint = accumulate_sint(all_sint, v_in.sint16x4.y);
all_sint = accumulate_sint(all_sint, v_in.sint16x4.z);
all_sint = accumulate_sint(all_sint, v_in.sint16x4.w);
checksums[index_sint] = f32(all_sint);
return vec4(0.0);
}
struct AttributeBlock2 {
@location(0) uint32: u32,
@location(1) uint32x2: vec2<u32>,
@location(2) uint32x3: vec3<u32>,
@location(3) uint32x4: vec4<u32>,
}
@vertex
fn vertex_block_2(v_in: AttributeBlock2) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all uint into one checksum value.
var all_uint: u32 = 0;
all_uint = accumulate_uint(all_uint, v_in.uint32);
all_uint = accumulate_uint(all_uint, v_in.uint32x2.x);
all_uint = accumulate_uint(all_uint, v_in.uint32x2.y);
all_uint = accumulate_uint(all_uint, v_in.uint32x3.x);
all_uint = accumulate_uint(all_uint, v_in.uint32x3.y);
all_uint = accumulate_uint(all_uint, v_in.uint32x3.z);
all_uint = accumulate_uint(all_uint, v_in.uint32x4.x);
all_uint = accumulate_uint(all_uint, v_in.uint32x4.y);
all_uint = accumulate_uint(all_uint, v_in.uint32x4.z);
all_uint = accumulate_uint(all_uint, v_in.uint32x4.w);
checksums[index_uint] = f32(all_uint);
return vec4(0.0);
}
struct AttributeBlock3 {
@location(0) sint32: i32,
@location(1) sint32x2: vec2<i32>,
@location(2) sint32x3: vec3<i32>,
@location(3) sint32x4: vec4<i32>,
}
@vertex
fn vertex_block_3(v_in: AttributeBlock3) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all sint into one checksum value.
var all_sint: i32 = 0;
all_sint = accumulate_sint(all_sint, v_in.sint32);
all_sint = accumulate_sint(all_sint, v_in.sint32x2.x);
all_sint = accumulate_sint(all_sint, v_in.sint32x2.y);
all_sint = accumulate_sint(all_sint, v_in.sint32x3.x);
all_sint = accumulate_sint(all_sint, v_in.sint32x3.y);
all_sint = accumulate_sint(all_sint, v_in.sint32x3.z);
all_sint = accumulate_sint(all_sint, v_in.sint32x4.x);
all_sint = accumulate_sint(all_sint, v_in.sint32x4.y);
all_sint = accumulate_sint(all_sint, v_in.sint32x4.z);
all_sint = accumulate_sint(all_sint, v_in.sint32x4.w);
checksums[index_sint] = f32(all_sint);
return vec4(0.0);
}
struct AttributeBlock4{
@location(0) float32: f32,
@location(1) float32x2: vec2<f32>,
@location(2) float32x3: vec3<f32>,
@location(3) float32x4: vec4<f32>,
@location(4) float16x2: vec2<f32>,
@location(5) float16x4: vec4<f32>,
}
@vertex
fn vertex_block_4(v_in: AttributeBlock4) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all float32 into one checksum value.
var all_float32: f32 = 0.0;
all_float32 = accumulate_float32(all_float32, v_in.float32);
all_float32 = accumulate_float32(all_float32, v_in.float32x2.x);
all_float32 = accumulate_float32(all_float32, v_in.float32x2.y);
all_float32 = accumulate_float32(all_float32, v_in.float32x3.x);
all_float32 = accumulate_float32(all_float32, v_in.float32x3.y);
all_float32 = accumulate_float32(all_float32, v_in.float32x3.z);
all_float32 = accumulate_float32(all_float32, v_in.float32x4.x);
all_float32 = accumulate_float32(all_float32, v_in.float32x4.y);
all_float32 = accumulate_float32(all_float32, v_in.float32x4.z);
all_float32 = accumulate_float32(all_float32, v_in.float32x4.w);
checksums[index_float32] = f32(all_float32);
// Accumulate all float16 into one checksum value.
var all_float16: f32 = 0.0;
all_float16 = accumulate_float16(all_float16, v_in.float16x2.x);
all_float16 = accumulate_float16(all_float16, v_in.float16x2.y);
all_float16 = accumulate_float16(all_float16, v_in.float16x4.x);
all_float16 = accumulate_float16(all_float16, v_in.float16x4.y);
all_float16 = accumulate_float16(all_float16, v_in.float16x4.z);
all_float16 = accumulate_float16(all_float16, v_in.float16x4.w);
checksums[index_float16] = f32(all_float16);
return vec4(0.0);
}
struct AttributeBlock5{
@location(0) unorm10_10_10_2: vec4<f32>,
}
@vertex
fn vertex_block_5(v_in: AttributeBlock5) -> @builtin(position) vec4<f32>
{
init_checksums();
// Accumulate all unorm into one checksum value.
var all_unorm: f32 = 0.0;
all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z);
all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w);
checksums[index_unorm] = f32(all_unorm);
return vec4(0.0);
}
fn accumulate_uint(accum: u32, val: u32) -> u32 {
return accum + val;
}
fn accumulate_sint(accum: i32, val: i32) -> i32 {
return accum + val;
}
fn accumulate_unorm(accum: f32, val: f32) -> f32 {
return accum + val;
}
fn accumulate_snorm(accum: f32, val: f32) -> f32 {
return accum + val;
}
fn accumulate_float16(accum: f32, val: f32) -> f32 {
return accum + val;
}
fn accumulate_float32(accum: f32, val: f32) -> f32 {
return accum + val;
}
@fragment
fn fragment_main() -> @location(0) vec4<f32> {
return vec4<f32>(0.0);
}

View File

@ -0,0 +1,388 @@
//! Tests that vertex formats pass through to vertex shaders accurately.
use std::num::NonZeroU64;
use wgpu::util::{BufferInitDescriptor, DeviceExt};
use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext};
#[derive(Debug, Copy, Clone)]
enum TestCase {
UnormsAndSnorms,
UintsAndSintsSmall,
UintsBig,
SintsBig,
Floats,
Unorm1010102,
}
struct Test<'a> {
case: TestCase,
entry_point: &'a str,
attributes: &'a [wgt::VertexAttribute],
input: &'a [u8],
checksums: &'a [f32],
}
async fn vertex_formats_all(ctx: TestingContext) {
let attributes_block_0 = &wgpu::vertex_attr_array![
0 => Unorm8x4,
1 => Unorm16x2,
2 => Unorm16x4,
3 => Snorm8x4,
4 => Snorm16x2,
5 => Snorm16x4,
6 => Unorm8x2,
7 => Snorm8x2,
];
let attributes_block_1 = &wgpu::vertex_attr_array![
0 => Uint8x4,
1 => Uint16x2,
2 => Uint16x4,
3 => Sint8x4,
4 => Sint16x2,
5 => Sint16x4,
6 => Uint8x2,
7 => Sint8x2,
];
let attributes_block_2 = &wgpu::vertex_attr_array![
0 => Uint32,
1 => Uint32x2,
2 => Uint32x3,
3 => Uint32x4,
];
let attributes_block_3 = &wgpu::vertex_attr_array![
0 => Sint32,
1 => Sint32x2,
2 => Sint32x3,
3 => Sint32x4,
];
let attributes_block_4 = &wgpu::vertex_attr_array![
0 => Float32,
1 => Float32x2,
2 => Float32x3,
3 => Float32x4,
4 => Float16x2,
5 => Float16x4,
];
let tests = vec![
Test {
case: TestCase::UnormsAndSnorms,
entry_point: "vertex_block_0",
attributes: attributes_block_0,
input: &[
128u8, 128u8, 128u8, 128u8, // Unorm8x4 (0.5, 0.5, 0.5, 0.5)
0u8, 128u8, 0u8, 128u8, // Unorm16x2 (0.5, 0.5)
0u8, 64u8, 0u8, 64u8, 0u8, 64u8, 0u8,
64u8, // Unorm16x4 (0.25, 0.25, 0.25, 0.25)
127u8, 127u8, 127u8, 127u8, // Snorm8x4 (1, 1, 1, 1)
0u8, 128u8, 0u8, 128u8, // Snorm16x2 (-1, -1)
255u8, 127u8, 255u8, 127u8, 255u8, 127u8, 255u8,
127u8, // Snorm16x4 (1, 1, 1, 1)
255u8, 255u8, // Unorm8x2 (1, 1)
128u8, 128u8, // Snorm8x2 (-1, -1)
],
checksums: &[0.0, 0.0, 6.0, 4.0, 0.0, 0.0],
},
Test {
case: TestCase::UintsAndSintsSmall,
entry_point: "vertex_block_1",
attributes: attributes_block_1,
input: &[
4u8, 8u8, 16u8, 32u8, // Uint8x4 (4, 8, 16, 32)
64u8, 0u8, 128u8, 0u8, // Uint16x2 (64, 128)
0u8, 1u8, 0u8, 2u8, 0u8, 4u8, 0u8, 8u8, // Uint16x4 (256, 512, 1024, 2048)
127u8, 127u8, 2u8, 0u8, // Sint8x4 (127, 127, 2, 0)
255u8, 255u8, 1u8, 0u8, // Sint16x2 (-1, 1)
128u8, 255u8, 128u8, 255u8, 0u8, 1u8, 240u8,
255u8, // Sint16x4 (-128, -128, 256, -16)
1u8, 2u8, // Uint8x2 (1, 2)
128u8, 128u8, // Sint8x2 (-128, -128)
],
checksums: &[4095.0, -16.0, 0.0, 0.0, 0.0, 0.0],
},
Test {
case: TestCase::UintsBig,
entry_point: "vertex_block_2",
attributes: attributes_block_2,
input: &[
1u8, 0u8, 0u8, 0u8, // Uint32x2 (1)
2u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, // Uint32x2 (2, 4)
8u8, 0u8, 0u8, 0u8, 16u8, 0u8, 0u8, 0u8, 32u8, 0u8, 0u8,
0u8, // Uint32x3 (8, 16, 32)
64u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8, 0u8, 0u8, 2u8, 0u8,
0u8, // Uint32x4 (64, 128, 256, 512)
],
checksums: &[1023.0, 0.0, 0.0, 0.0, 0.0, 0.0],
},
Test {
case: TestCase::SintsBig,
entry_point: "vertex_block_3",
attributes: attributes_block_3,
input: &[
128u8, 255u8, 255u8, 255u8, // Sint32 (-128)
120u8, 0u8, 0u8, 0u8, 8u8, 0u8, 0u8, 0u8, // Sint32x2 (120, 8)
252u8, 255u8, 255u8, 255u8, 2u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8,
0u8, // Sint32x3 (-4, 2, 2)
24u8, 252u8, 255u8, 255u8, 88u8, 2u8, 0u8, 0u8, 44u8, 1u8, 0u8, 0u8, 99u8, 0u8,
0u8, 0u8, // Sint32x4 (-1000, 600, 300, 99)
],
checksums: &[0.0, -1.0, 0.0, 0.0, 0.0, 0.0],
},
Test {
case: TestCase::Floats,
entry_point: "vertex_block_4",
attributes: attributes_block_4,
input: &[
0u8, 0u8, 0u8, 63u8, // Float32 (0.5)
0u8, 0u8, 0u8, 191u8, 0u8, 0u8, 128u8, 64u8, // Float32x2 (-0.5, 4.0)
0u8, 0u8, 0u8, 192u8, 0u8, 0u8, 204u8, 194u8, 0u8, 0u8, 200u8,
66u8, // Float32x3 (-2.0, -102.0, 100.0)
0u8, 0u8, 92u8, 66u8, 0u8, 0u8, 72u8, 194u8, 0u8, 0u8, 32u8, 65u8, 0u8, 0u8, 128u8,
63u8, // Float32x4 (55.0, -50.0, 10.0, 1.0)
0u8, 60u8, 72u8, 53u8, // Float16x2 (1.0, 0.33)
72u8, 57u8, 0u8, 192u8, 0u8, 188u8, 0u8,
184u8, // Float16x4 (0.66, -2.0, -1.0, -0.5)
],
checksums: &[0.0, 0.0, 0.0, 0.0, -1.5, 16.0],
},
];
vertex_formats_common(ctx, &tests).await;
}
async fn vertex_formats_10_10_10_2(ctx: TestingContext) {
let attributes_block_5 = &wgpu::vertex_attr_array![
0 => Unorm10_10_10_2,
];
let tests = vec![Test {
case: TestCase::Unorm1010102,
entry_point: "vertex_block_5",
attributes: attributes_block_5,
input: &[
// We are aiming for rgba of (0.5, 0.5, 0.5, 0.66)
// Packing AA BB BBBB BBBB GGGG GGGG GG RR RRRR RRRR
// Binary 10 10 0000 0000 1000 0000 00 10 0000 0000
// Hex A0 08 02 00
// Decimal 160 8 2 0
// unorm 0.66 0.5 0.5 0.5 = 2.16
0u8, 2u8, 8u8, 160u8, // Unorm10_10_10_2
],
checksums: &[0.0, 0.0, 2.16, 0.0, 0.0, 0.0],
}];
vertex_formats_common(ctx, &tests).await;
}
async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) {
let shader = ctx
.device
.create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl"));
let bgl = ctx
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: None,
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
ty: wgpu::BindingType::Buffer {
ty: wgpu::BufferBindingType::Storage { read_only: false },
has_dynamic_offset: false,
min_binding_size: NonZeroU64::new(4),
},
visibility: wgpu::ShaderStages::VERTEX,
count: None,
}],
});
let ppl = ctx
.device
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: None,
bind_group_layouts: &[&bgl],
push_constant_ranges: &[],
});
let dummy = ctx
.device
.create_texture_with_data(
&ctx.queue,
&wgpu::TextureDescriptor {
label: Some("dummy"),
size: wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8Unorm,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST,
view_formats: &[],
},
wgpu::util::TextureDataOrder::LayerMajor,
&[0, 0, 0, 1],
)
.create_view(&wgpu::TextureViewDescriptor::default());
let mut failed = false;
for test in tests {
let buffer_input = ctx.device.create_buffer_init(&BufferInitDescriptor {
label: None,
contents: bytemuck::cast_slice(test.input),
usage: wgpu::BufferUsages::VERTEX,
});
let pipeline_desc = wgpu::RenderPipelineDescriptor {
label: None,
layout: Some(&ppl),
vertex: wgpu::VertexState {
buffers: &[wgpu::VertexBufferLayout {
array_stride: 0, // Calculate, please!
step_mode: wgpu::VertexStepMode::Vertex,
attributes: test.attributes,
}],
module: &shader,
entry_point: test.entry_point,
compilation_options: Default::default(),
},
primitive: wgpu::PrimitiveState::default(),
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &shader,
entry_point: "fragment_main",
compilation_options: Default::default(),
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8Unorm,
blend: None,
write_mask: wgpu::ColorWrites::ALL,
})],
}),
multiview: None,
cache: None,
};
let pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
let expected = test.checksums;
let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64;
let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: buffer_size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});
let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: buffer_size,
usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE,
mapped_at_creation: false,
});
let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bgl,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: gpu_buffer.as_entire_binding(),
}],
});
let mut encoder1 = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
ops: wgpu::Operations::default(),
resolve_target: None,
view: &dummy,
})],
depth_stencil_attachment: None,
timestamp_writes: None,
occlusion_query_set: None,
});
rpass.set_vertex_buffer(0, buffer_input.slice(..));
rpass.set_pipeline(&pipeline);
rpass.set_bind_group(0, &bg, &[]);
// Draw three vertices and no instance, which is enough to generate the
// checksums.
rpass.draw(0..3, 0..1);
drop(rpass);
let mut encoder2 = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
// See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions
// with a hard wait in between.
ctx.queue.submit([encoder1.finish()]);
ctx.async_poll(wgpu::Maintain::wait())
.await
.panic_on_timeout();
ctx.queue.submit([encoder2.finish()]);
let slice = cpu_buffer.slice(..);
slice.map_async(wgpu::MapMode::Read, |_| ());
ctx.async_poll(wgpu::Maintain::wait())
.await
.panic_on_timeout();
let data: Vec<f32> = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec();
let case_name = format!("Case {:?}", test.case);
// Calculate the difference between data and expected. Since the data is
// a bunch of float checksums, we allow a fairly large epsilon, which helps
// with the accumulation of float rounding errors.
const EPSILON: f32 = 0.01;
let mut deltas = data.iter().zip(expected.iter()).map(|(d, e)| (d - e).abs());
if deltas.any(|x| x > EPSILON) {
eprintln!(
"Failed: Got: {:?} Expected: {:?} - {case_name}",
data, expected,
);
failed = true;
continue;
}
eprintln!("Passed: {case_name}");
}
assert!(!failed);
}
#[gpu_test]
static VERTEX_FORMATS_ALL: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.test_features_limits()
.features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
)
.run_async(vertex_formats_all);
// Some backends can handle Unorm-10-10-2, but GL backends seem to throw this error:
// Validation Error: GL_INVALID_ENUM in glVertexAttribFormat(type = GL_UNSIGNED_INT_10_10_10_2)
#[gpu_test]
static VERTEX_FORMATS_10_10_10_2: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(
TestParameters::default()
.expect_fail(FailureCase::backend(wgpu::Backends::GL))
.test_features_limits()
.features(wgpu::Features::VERTEX_WRITABLE_STORAGE),
)
.run_async(vertex_formats_10_10_10_2);

View File

@ -166,7 +166,6 @@ struct Test {
id_source: IdSource,
draw_call_kind: DrawCallKind,
encoder_kind: EncoderKind,
vertex_pulling_transform: bool,
}
impl Test {
@ -280,15 +279,6 @@ async fn vertex_index_common(ctx: TestingContext) {
cache: None,
};
let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = true;
let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = false;
pipeline_desc.vertex.entry_point = "vs_main_buffers";
pipeline_desc.vertex.buffers = &[
@ -304,15 +294,6 @@ async fn vertex_index_common(ctx: TestingContext) {
},
];
let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = true;
let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc);
pipeline_desc
.vertex
.compilation_options
.vertex_pulling_transform = false;
let dummy = ctx
.device
@ -341,18 +322,12 @@ async fn vertex_index_common(ctx: TestingContext) {
.cartesian_product(IdSource::iter())
.cartesian_product(DrawCallKind::iter())
.cartesian_product(EncoderKind::iter())
.cartesian_product([false, true])
.map(
|((((case, id_source), draw_call_kind), encoder_kind), vertex_pulling_transform)| {
Test {
case,
id_source,
draw_call_kind,
encoder_kind,
vertex_pulling_transform,
}
},
)
.map(|(((case, id_source), draw_call_kind), encoder_kind)| Test {
case,
id_source,
draw_call_kind,
encoder_kind,
})
.collect::<Vec<_>>();
let features = ctx.adapter.features();
@ -360,20 +335,8 @@ async fn vertex_index_common(ctx: TestingContext) {
let mut failed = false;
for test in tests {
let pipeline = match test.id_source {
IdSource::Buffers => {
if test.vertex_pulling_transform {
&buffer_pipeline_vpt
} else {
&buffer_pipeline
}
}
IdSource::Builtins => {
if test.vertex_pulling_transform {
&builtin_pipeline_vpt
} else {
&builtin_pipeline
}
}
IdSource::Buffers => &buffer_pipeline,
IdSource::Builtins => &builtin_pipeline,
};
let expected = test.expectation(&ctx);

View File

@ -1484,7 +1484,6 @@ impl Global {
.vertex
.stage
.zero_initialize_workgroup_memory,
vertex_pulling_transform: desc.vertex.stage.vertex_pulling_transform,
};
ResolvedVertexState {
stage,
@ -1511,7 +1510,6 @@ impl Global {
.vertex
.stage
.zero_initialize_workgroup_memory,
vertex_pulling_transform: state.stage.vertex_pulling_transform,
};
Some(ResolvedFragmentState {
stage,
@ -1720,7 +1718,6 @@ impl Global {
entry_point: desc.stage.entry_point.clone(),
constants: desc.stage.constants.clone(),
zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
vertex_pulling_transform: desc.stage.vertex_pulling_transform,
};
let desc = ResolvedComputePipelineDescriptor {

View File

@ -2694,7 +2694,6 @@ impl<A: HalApi> Device<A> {
entry_point: final_entry_point_name.as_ref(),
constants: desc.stage.constants.as_ref(),
zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
},
cache: cache.as_ref().and_then(|it| it.raw.as_ref()),
};
@ -3114,7 +3113,6 @@ impl<A: HalApi> Device<A> {
entry_point: &vertex_entry_point_name,
constants: stage_desc.constants.as_ref(),
zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory,
vertex_pulling_transform: stage_desc.vertex_pulling_transform,
}
};
@ -3171,7 +3169,6 @@ impl<A: HalApi> Device<A> {
zero_initialize_workgroup_memory: fragment_state
.stage
.zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
})
}
None => None,

View File

@ -147,8 +147,6 @@ pub struct ProgrammableStageDescriptor<'a> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
}
/// Describes a programmable pipeline stage.
@ -176,8 +174,6 @@ pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
}
/// Number of implicit bind groups derived at pipeline creation.

View File

@ -257,7 +257,6 @@ impl<A: hal::Api> Example<A> {
entry_point: "vs_main",
constants: &constants,
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
},
vertex_buffers: &[],
fragment_stage: Some(hal::ProgrammableStage {
@ -265,7 +264,6 @@ impl<A: hal::Api> Example<A> {
entry_point: "fs_main",
constants: &constants,
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}),
primitive: wgt::PrimitiveState {
topology: wgt::PrimitiveTopology::TriangleStrip,

View File

@ -379,7 +379,6 @@ impl<A: hal::Api> Example<A> {
entry_point: "main",
constants: &Default::default(),
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
},
cache: None,
})

View File

@ -1870,8 +1870,6 @@ pub struct ProgrammableStage<'a, A: Api> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
}
// Rust gets confused about the impl requirements for `A`
@ -1882,7 +1880,6 @@ impl<A: Api> Clone for ProgrammableStage<'_, A> {
entry_point: self.entry_point,
constants: self.constants,
zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory,
vertex_pulling_transform: self.vertex_pulling_transform,
}
}
}

View File

@ -158,7 +158,7 @@ impl super::Device {
metal::MTLPrimitiveTopologyClass::Point => true,
_ => false,
},
vertex_pulling_transform: stage.vertex_pulling_transform,
vertex_pulling_transform: true,
vertex_buffer_mappings: vertex_buffer_mappings.to_vec(),
};

View File

@ -1180,10 +1180,6 @@ impl crate::Context for ContextWgpuCore {
.vertex
.compilation_options
.zero_initialize_workgroup_memory,
vertex_pulling_transform: desc
.vertex
.compilation_options
.vertex_pulling_transform,
},
buffers: Borrowed(&vertex_buffers),
},
@ -1198,7 +1194,6 @@ impl crate::Context for ContextWgpuCore {
zero_initialize_workgroup_memory: frag
.compilation_options
.zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
},
targets: Borrowed(frag.targets),
}),
@ -1244,7 +1239,6 @@ impl crate::Context for ContextWgpuCore {
zero_initialize_workgroup_memory: desc
.compilation_options
.zero_initialize_workgroup_memory,
vertex_pulling_transform: false,
},
cache: desc.cache.map(|c| c.id.into()),
};

View File

@ -2059,8 +2059,6 @@ pub struct PipelineCompilationOptions<'a> {
/// This is required by the WebGPU spec, but may have overhead which can be avoided
/// for cross-platform applications
pub zero_initialize_workgroup_memory: bool,
/// Should the pipeline attempt to transform vertex shaders to use vertex pulling.
pub vertex_pulling_transform: bool,
}
impl<'a> Default for PipelineCompilationOptions<'a> {
@ -2074,7 +2072,6 @@ impl<'a> Default for PipelineCompilationOptions<'a> {
Self {
constants,
zero_initialize_workgroup_memory: true,
vertex_pulling_transform: false,
}
}
}