Add Benchmarks (#5694)

This commit is contained in:
Connor Fitzgerald 2024-05-16 09:05:41 -04:00 committed by GitHub
parent 3a798859cd
commit eeb1a9d7b7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 1674 additions and 380 deletions

View File

@ -3,7 +3,17 @@
[profile.default]
slow-timeout = { period = "45s", terminate-after = 2 }
# Use two threads for tests with "2_threads" in their name
# Use two threads for tests with "2 threads" in their name
[[profile.default.overrides]]
filter = 'test(~2_threads)'
filter = 'test(~2_threads) | test(~2 threads)'
threads-required = 2
# Use four threads for tests with "4 threads" in their name
[[profile.default.overrides]]
filter = 'test(~4_threads) | test(~4 threads)'
threads-required = 4
# Use eight threads for tests with "8 threads" in their name
[[profile.default.overrides]]
filter = 'test(~8_threads) | test(~8 threads)'
threads-required = 8

View File

@ -1,6 +1,8 @@
[bans]
multiple-versions = "deny"
skip-tree = [
# We never enable loom in any of our dependencies but it causes dupes
{ name = "loom", version = "0.7.2" },
{ name = "windows-sys", version = "0.45" },
{ name = "winit", version = "0.27" },
{ name = "winit", version = "0.29" },

View File

@ -226,7 +226,7 @@ jobs:
cargo clippy --target ${{ matrix.target }} --no-default-features
# Check with all features.
cargo clippy --target ${{ matrix.target }} --tests --all-features
cargo clippy --target ${{ matrix.target }} --tests --benches --all-features
# build docs
cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps

203
Cargo.lock generated
View File

@ -1511,6 +1511,20 @@ dependencies = [
"slab",
]
[[package]]
name = "generator"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb"
dependencies = [
"cc",
"cfg-if",
"libc",
"log",
"rustversion",
"windows 0.54.0",
]
[[package]]
name = "gethostname"
version = "0.4.3"
@ -1672,7 +1686,7 @@ dependencies = [
"presser",
"thiserror",
"winapi",
"windows",
"windows 0.52.0",
]
[[package]]
@ -2047,6 +2061,19 @@ version = "0.4.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
[[package]]
name = "loom"
version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
dependencies = [
"cfg-if",
"generator",
"scoped-tls",
"tracing",
"tracing-subscriber",
]
[[package]]
name = "malloc_buf"
version = "0.0.6"
@ -2056,6 +2083,15 @@ dependencies = [
"libc",
]
[[package]]
name = "matchers"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
dependencies = [
"regex-automata 0.1.10",
]
[[package]]
name = "memchr"
version = "2.7.2"
@ -2141,11 +2177,9 @@ version = "0.20.0"
dependencies = [
"arbitrary",
"arrayvec 0.7.4",
"bincode",
"bit-set",
"bitflags 2.5.0",
"codespan-reporting",
"criterion",
"diff",
"env_logger",
"hexf-parse",
@ -2326,6 +2360,16 @@ dependencies = [
"rand_xorshift",
]
[[package]]
name = "nu-ansi-term"
version = "0.46.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
dependencies = [
"overload",
"winapi",
]
[[package]]
name = "num-bigint"
version = "0.4.5"
@ -2513,6 +2557,12 @@ version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
[[package]]
name = "overload"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "owned_ttf_parser"
version = "0.21.0"
@ -2892,8 +2942,17 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
dependencies = [
"aho-corasick",
"memchr",
"regex-automata",
"regex-syntax",
"regex-automata 0.4.6",
"regex-syntax 0.8.3",
]
[[package]]
name = "regex-automata"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
dependencies = [
"regex-syntax 0.6.29",
]
[[package]]
@ -2904,9 +2963,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
"regex-syntax 0.8.3",
]
[[package]]
name = "regex-syntax"
version = "0.6.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
[[package]]
name = "regex-syntax"
version = "0.8.3"
@ -3138,6 +3203,15 @@ dependencies = [
"pkg-config",
]
[[package]]
name = "sharded-slab"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
dependencies = [
"lazy_static",
]
[[package]]
name = "shared_library"
version = "0.1.9"
@ -3410,6 +3484,16 @@ dependencies = [
"winapi",
]
[[package]]
name = "thread_local"
version = "1.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
dependencies = [
"cfg-if",
"once_cell",
]
[[package]]
name = "threadpool"
version = "1.8.1"
@ -3567,6 +3651,59 @@ name = "tracing-core"
version = "0.1.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
dependencies = [
"once_cell",
"valuable",
]
[[package]]
name = "tracing-log"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
dependencies = [
"log",
"once_cell",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
dependencies = [
"matchers",
"nu-ansi-term",
"once_cell",
"regex",
"sharded-slab",
"smallvec",
"thread_local",
"tracing",
"tracing-core",
"tracing-log",
]
[[package]]
name = "tracy-client"
version = "0.17.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d"
dependencies = [
"loom",
"once_cell",
"tracy-client-sys",
]
[[package]]
name = "tracy-client-sys"
version = "0.22.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882"
dependencies = [
"cc",
]
[[package]]
name = "ttf-parser"
@ -3716,6 +3853,12 @@ dependencies = [
"which",
]
[[package]]
name = "valuable"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
[[package]]
name = "vec_map"
version = "0.8.2"
@ -4077,6 +4220,23 @@ dependencies = [
"wgpu-types",
]
[[package]]
name = "wgpu-benchmark"
version = "0.20.0"
dependencies = [
"bincode",
"bytemuck",
"criterion",
"naga",
"nanorand",
"once_cell",
"pollster",
"profiling",
"rayon",
"tracy-client",
"wgpu",
]
[[package]]
name = "wgpu-core"
version = "0.20.0"
@ -4304,7 +4464,17 @@ version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
dependencies = [
"windows-core",
"windows-core 0.52.0",
"windows-targets 0.52.5",
]
[[package]]
name = "windows"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
dependencies = [
"windows-core 0.54.0",
"windows-targets 0.52.5",
]
@ -4317,6 +4487,25 @@ dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "windows-core"
version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
dependencies = [
"windows-result",
"windows-targets 0.52.5",
]
[[package]]
name = "windows-result"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "749f0da9cc72d82e600d8d2e44cadd0b9eedb9038f71a1c58556ac1c5791813b"
dependencies = [
"windows-targets 0.52.5",
]
[[package]]
name = "windows-sys"
version = "0.36.1"

View File

@ -5,8 +5,9 @@ members = [
"deno_webgpu",
# default members
"benches",
"d3d12",
"examples/",
"examples",
"naga-cli",
"naga",
"naga/fuzz",
@ -22,8 +23,9 @@ members = [
]
exclude = []
default-members = [
"benches",
"d3d12",
"examples/",
"examples",
"naga-cli",
"naga",
"naga/fuzz",
@ -70,11 +72,13 @@ version = "0.20.0"
[workspace.dependencies]
anyhow = "1.0.23"
arrayvec = "0.7"
bincode = "1"
bit-vec = "0.6"
bitflags = "2"
bytemuck = { version = "1.14", features = ["derive"] }
cfg_aliases = "0.1"
cfg-if = "1"
criterion = "0.5"
codespan-reporting = "0.11"
ctor = "0.2"
document-features = "0.2.8"
@ -109,6 +113,7 @@ png = "0.17.11"
pollster = "0.3"
profiling = { version = "1", default-features = false }
raw-window-handle = "0.6"
rayon = "1"
renderdoc-sys = "1.1.0"
ron = "0.8"
rustc-hash = "1.1.0"
@ -116,6 +121,7 @@ serde = "1"
serde_json = "1.0.116"
smallvec = "1"
static_assertions = "1.1.0"
tracy-client = "0.17"
thiserror = "1"
wgpu = { version = "0.20.0", path = "./wgpu" }
wgpu-core = { version = "0.20.0", path = "./wgpu-core" }
@ -187,6 +193,10 @@ termcolor = "1.4.1"
#js-sys = { path = "../wasm-bindgen/crates/js-sys" }
#wasm-bindgen = { path = "../wasm-bindgen" }
[profile.release]
lto = "thin"
debug = true
# Speed up image comparison even in debug builds
[profile.dev.package."nv-flip-sys"]
opt-level = 3

46
benches/Cargo.toml Normal file
View File

@ -0,0 +1,46 @@
[package]
name = "wgpu-benchmark"
version.workspace = true
authors.workspace = true
edition.workspace = true
description = "wgpu benchmarking suite"
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
license.workspace = true
autobenches = false
publish = false
[[bench]]
name = "root"
harness = false
path = "benches/root.rs"
[features]
# Uncomment these features to enable tracy and superluminal profiling.
# tracy = ["dep:tracy-client", "profiling/profile-with-tracy"]
# superluminal = ["profiling/profile-with-superluminal"]
[dependencies]
bincode.workspace = true
bytemuck.workspace = true
criterion.workspace = true
naga = { workspace = true, features = [
"deserialize",
"serialize",
"wgsl-in",
"spv-in",
"glsl-in",
"spv-out",
"msl-out",
"hlsl-out",
"glsl-out",
"wgsl-out",
] }
nanorand.workspace = true
once_cell.workspace = true
pollster.workspace = true
profiling.workspace = true
rayon.workspace = true
tracy-client = { workspace = true, optional = true }
wgpu.workspace = true

95
benches/README.md Normal file
View File

@ -0,0 +1,95 @@
Collection of CPU benchmarks for `wgpu`.
These benchmarks are designed as a first line of defence against performance regressions and generally approximate the performance for users.
They all do very little GPU work and are testing the CPU performance of the API.
Criterion will give you the end-to-end performance of the benchmark, but you can also use a profiler to get more detailed information about where time is being spent.
## Usage
```sh
# Run all benchmarks
cargo bench -p wgpu-benchmark
# Run a specific benchmarks that contains "filter" in its name
cargo bench -p wgpu-benchmark -- "filter"
```
## Benchmarks
#### `Renderpass`
This benchmark measures the performance of recording and submitting a render pass with a large
number of draw calls and resources, emulating an intense, more traditional graphics application.
By default it measures 10k draw calls, with 90k total resources.
Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
the render pass into multiple passes over multiple command buffers.
#### `Resource Creation`
This benchmark measures the performance of creating large resources. By default it makes buffers that are 256MB. It tests this over a range of thread counts.
#### `Shader Compilation`
This benchmark measures the performance of naga parsing, validating, and generating shaders.
## Comparing Against a Baseline
To compare the current benchmarks against a baseline, you can use the `--save-baseline` and `--baseline` flags.
For example, to compare v0.20 against trunk, you could run the following:
```sh
git checkout v0.20
# Run the baseline benchmarks
cargo bench -p wgpu-benchmark -- --save-baseline "v0.20"
git checkout trunk
# Run the current benchmarks
cargo bench -p wgpu-benchmark -- --baseline "v0.20"
```
You can use this for any bits of code you want to compare.
## Integration with Profilers
The benchmarks can be run with a profiler to get more detailed information about where time is being spent.
Integrations are available for `tracy` and `superluminal`. Due to some implementation details,
you need to uncomment the features in the `Cargo.toml` to allow features to be used.
#### Tracy
Tracy is available prebuilt for Windows on [github](https://github.com/wolfpld/tracy/releases/latest/).
```sh
# Once this is running, you can connect to it with the Tracy Profiler
cargo bench -p wgpu-benchmark --features tracy
```
#### Superluminal
Superluminal is a paid product for windows available [here](https://superluminal.eu/).
```sh
# This command will build the benchmarks, and display the path to the executable
cargo bench -p wgpu-benchmark --features superluminal -- -h
# Have Superluminal run the following command (replacing with the path to the executable)
./target/release/deps/root-2c45d61b38a65438.exe --bench "filter"
```
#### `perf` and others
You can follow the same pattern as above to run the benchmarks with other profilers.
For example, the command line tool `perf` can be used to profile the benchmarks.
```sh
# This command will build the benchmarks, and display the path to the executable
cargo bench -p wgpu-benchmark -- -h
# Run the benchmarks with perf
perf record ./target/release/deps/root-2c45d61b38a65438 --bench "filter"
```

View File

@ -0,0 +1,26 @@
@group(0) @binding(0)
var tex: binding_array<texture_2d<f32>>;
struct VertexOutput {
@builtin(position) position: vec4f,
@location(0) @interpolate(flat) instance_index: u32,
}
@vertex
fn vs_main(@builtin(instance_index) instance_index: u32) -> VertexOutput {
return VertexOutput(
vec4f(0.0, 0.0, 0.0, 1.0),
instance_index
);
}
@fragment
fn fs_main(vs_in: VertexOutput) -> @location(0) vec4f {
return textureLoad(tex[7 * vs_in.instance_index + 0], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 1], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 2], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 3], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 4], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 5], vec2u(0), 0) +
textureLoad(tex[7 * vs_in.instance_index + 6], vec2u(0), 0);
}

View File

@ -0,0 +1,573 @@
use std::{
num::NonZeroU32,
time::{Duration, Instant},
};
use criterion::{criterion_group, Criterion, Throughput};
use nanorand::{Rng, WyRand};
use once_cell::sync::Lazy;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use crate::DeviceState;
const DRAW_COUNT: usize = 10_000;
// Must match the number of textures in the renderpass.wgsl shader
const TEXTURES_PER_DRAW: usize = 7;
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
struct RenderpassState {
device_state: DeviceState,
pipeline: wgpu::RenderPipeline,
bind_groups: Vec<wgpu::BindGroup>,
vertex_buffers: Vec<wgpu::Buffer>,
index_buffers: Vec<wgpu::Buffer>,
render_target: wgpu::TextureView,
// Bindless resources
bindless_bind_group: Option<wgpu::BindGroup>,
bindless_pipeline: Option<wgpu::RenderPipeline>,
}
impl RenderpassState {
/// Create and prepare all the resources needed for the renderpass benchmark.
fn new() -> Self {
let device_state = DeviceState::new();
let supports_bindless = device_state.device.features().contains(
wgpu::Features::TEXTURE_BINDING_ARRAY
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
) && device_state
.device
.limits()
.max_sampled_textures_per_shader_stage
>= TEXTURE_COUNT as _;
// Performance gets considerably worse if the resources are shuffled.
//
// This more closely matches the real-world use case where resources have no
// well defined usage order.
let mut random = WyRand::new_seed(0x8BADF00D);
let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DRAW);
for i in 0..TEXTURES_PER_DRAW {
bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
binding: i as u32,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
});
}
let bind_group_layout =
device_state
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: None,
entries: &bind_group_layout_entries,
});
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
for i in 0..TEXTURE_COUNT {
let texture = device_state
.device
.create_texture(&wgpu::TextureDescriptor {
label: Some(&format!("Texture {i}")),
size: wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsages::TEXTURE_BINDING,
view_formats: &[],
});
texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
label: Some(&format!("Texture View {i}")),
..Default::default()
}));
}
random.shuffle(&mut texture_views);
let texture_view_refs: Vec<_> = texture_views.iter().collect();
let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
for draw_idx in 0..DRAW_COUNT {
let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
for tex_idx in 0..TEXTURES_PER_DRAW {
entries.push(wgpu::BindGroupEntry {
binding: tex_idx as u32,
resource: wgpu::BindingResource::TextureView(
&texture_views[draw_idx * TEXTURES_PER_DRAW + tex_idx],
),
});
}
bind_groups.push(
device_state
.device
.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &bind_group_layout,
entries: &entries,
}),
);
}
random.shuffle(&mut bind_groups);
let sm = device_state
.device
.create_shader_module(wgpu::include_wgsl!("renderpass.wgsl"));
let pipeline_layout =
device_state
.device
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: None,
bind_group_layouts: &[&bind_group_layout],
push_constant_ranges: &[],
});
let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
for _ in 0..VERTEX_BUFFER_COUNT {
vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: 3 * 16,
usage: wgpu::BufferUsages::VERTEX,
mapped_at_creation: false,
}));
}
random.shuffle(&mut vertex_buffers);
let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
for _ in 0..DRAW_COUNT {
index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: 3 * 4,
usage: wgpu::BufferUsages::INDEX,
mapped_at_creation: false,
}));
}
random.shuffle(&mut index_buffers);
let mut vertex_buffer_attributes = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW);
for i in 0..VERTEX_BUFFERS_PER_DRAW {
vertex_buffer_attributes.push(wgpu::vertex_attr_array![i as u32 => Float32x4]);
}
let mut vertex_buffer_layouts = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW);
for attributes in &vertex_buffer_attributes {
vertex_buffer_layouts.push(wgpu::VertexBufferLayout {
array_stride: 16,
step_mode: wgpu::VertexStepMode::Vertex,
attributes,
});
}
let pipeline =
device_state
.device
.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
label: None,
layout: Some(&pipeline_layout),
vertex: wgpu::VertexState {
module: &sm,
entry_point: "vs_main",
buffers: &vertex_buffer_layouts,
compilation_options: wgpu::PipelineCompilationOptions::default(),
},
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: wgpu::FrontFace::Cw,
cull_mode: Some(wgpu::Face::Back),
polygon_mode: wgpu::PolygonMode::Fill,
unclipped_depth: false,
conservative: false,
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &sm,
entry_point: "fs_main",
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8UnormSrgb,
blend: None,
write_mask: wgpu::ColorWrites::ALL,
})],
compilation_options: wgpu::PipelineCompilationOptions::default(),
}),
multiview: None,
});
let render_target = device_state
.device
.create_texture(&wgpu::TextureDescriptor {
label: Some("Render Target"),
size: wgpu::Extent3d {
width: 1,
height: 1,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
format: wgpu::TextureFormat::Rgba8UnormSrgb,
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
view_formats: &[],
})
.create_view(&wgpu::TextureViewDescriptor::default());
let mut bindless_bind_group = None;
let mut bindless_pipeline = None;
if supports_bindless {
let bindless_bind_group_layout =
device_state
.device
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
label: None,
entries: &[wgpu::BindGroupLayoutEntry {
binding: 0,
visibility: wgpu::ShaderStages::FRAGMENT,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
}],
});
bindless_bind_group = Some(device_state.device.create_bind_group(
&wgpu::BindGroupDescriptor {
label: None,
layout: &bindless_bind_group_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureViewArray(&texture_view_refs),
}],
},
));
let bindless_shader_module = device_state
.device
.create_shader_module(wgpu::include_wgsl!("renderpass-bindless.wgsl"));
let bindless_pipeline_layout =
device_state
.device
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
label: None,
bind_group_layouts: &[&bindless_bind_group_layout],
push_constant_ranges: &[],
});
bindless_pipeline = Some(device_state.device.create_render_pipeline(
&wgpu::RenderPipelineDescriptor {
label: None,
layout: Some(&bindless_pipeline_layout),
vertex: wgpu::VertexState {
module: &bindless_shader_module,
entry_point: "vs_main",
buffers: &vertex_buffer_layouts,
compilation_options: wgpu::PipelineCompilationOptions::default(),
},
primitive: wgpu::PrimitiveState {
topology: wgpu::PrimitiveTopology::TriangleList,
strip_index_format: None,
front_face: wgpu::FrontFace::Cw,
cull_mode: Some(wgpu::Face::Back),
polygon_mode: wgpu::PolygonMode::Fill,
unclipped_depth: false,
conservative: false,
},
depth_stencil: None,
multisample: wgpu::MultisampleState::default(),
fragment: Some(wgpu::FragmentState {
module: &bindless_shader_module,
entry_point: "fs_main",
targets: &[Some(wgpu::ColorTargetState {
format: wgpu::TextureFormat::Rgba8UnormSrgb,
blend: None,
write_mask: wgpu::ColorWrites::ALL,
})],
compilation_options: wgpu::PipelineCompilationOptions::default(),
}),
multiview: None,
},
));
}
Self {
device_state,
pipeline,
bind_groups,
vertex_buffers,
index_buffers,
render_target,
bindless_bind_group,
bindless_pipeline,
}
}
fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
let draws_per_pass = DRAW_COUNT / total_passes;
let mut encoder = self
.device_state
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &self.render_target,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
occlusion_query_set: None,
timestamp_writes: None,
depth_stencil_attachment: None,
});
let start_idx = pass_number * draws_per_pass;
let end_idx = start_idx + draws_per_pass;
for draw_idx in start_idx..end_idx {
render_pass.set_pipeline(&self.pipeline);
render_pass.set_bind_group(0, &self.bind_groups[draw_idx], &[]);
for i in 0..VERTEX_BUFFERS_PER_DRAW {
render_pass.set_vertex_buffer(
i as u32,
self.vertex_buffers[draw_idx * VERTEX_BUFFERS_PER_DRAW + i].slice(..),
);
}
render_pass.set_index_buffer(
self.index_buffers[draw_idx].slice(..),
wgpu::IndexFormat::Uint32,
);
render_pass.draw_indexed(0..3, 0, 0..1);
}
drop(render_pass);
encoder.finish()
}
fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
profiling::scope!("Bindless Renderpass");
let mut encoder = self
.device_state
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &self.render_target,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
store: wgpu::StoreOp::Store,
},
})],
occlusion_query_set: None,
timestamp_writes: None,
depth_stencil_attachment: None,
});
render_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
render_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
for i in 0..VERTEX_BUFFERS_PER_DRAW {
render_pass.set_vertex_buffer(i as u32, self.vertex_buffers[0].slice(..));
}
render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
for draw_idx in 0..DRAW_COUNT {
render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
}
drop(render_pass);
encoder.finish()
}
}
fn run_bench(ctx: &mut Criterion) {
let state = Lazy::new(RenderpassState::new);
// Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
group.throughput(Throughput::Elements(DRAW_COUNT as _));
for time_submit in [false, true] {
for rpasses in [1, 2, 4, 8] {
let draws_per_pass = DRAW_COUNT / rpasses;
let label = if time_submit {
"Submit Time"
} else {
"Renderpass Time"
};
group.bench_function(
&format!("{rpasses} renderpasses x {draws_per_pass} draws ({label})"),
|b| {
Lazy::force(&state);
b.iter_custom(|iters| {
profiling::scope!("benchmark invocation");
// This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
if state.device_state.adapter_info.name.contains("Paravirtual") {
return Duration::from_secs_f32(1.0);
}
let mut duration = Duration::ZERO;
for _ in 0..iters {
profiling::scope!("benchmark iteration");
let mut start = Instant::now();
let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
for i in 0..rpasses {
buffers.push(state.run_subpass(i, rpasses));
}
if time_submit {
start = Instant::now();
} else {
duration += start.elapsed();
}
state.device_state.queue.submit(buffers);
if time_submit {
duration += start.elapsed();
}
state.device_state.device.poll(wgpu::Maintain::Wait);
}
duration
})
},
);
}
}
group.finish();
// Test 10k draw calls split up over 2, 4, and 8 threads.
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
group.throughput(Throughput::Elements(DRAW_COUNT as _));
for threads in [2, 4, 8] {
let draws_per_pass = DRAW_COUNT / threads;
group.bench_function(
&format!("{threads} threads x {draws_per_pass} draws"),
|b| {
Lazy::force(&state);
b.iter_custom(|iters| {
profiling::scope!("benchmark invocation");
// This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
if state.device_state.adapter_info.name.contains("Paravirtual") {
return Duration::from_secs_f32(1.0);
}
let mut duration = Duration::ZERO;
for _ in 0..iters {
profiling::scope!("benchmark iteration");
let start = Instant::now();
let buffers = (0..threads)
.into_par_iter()
.map(|i| state.run_subpass(i, threads))
.collect::<Vec<_>>();
duration += start.elapsed();
state.device_state.queue.submit(buffers);
state.device_state.device.poll(wgpu::Maintain::Wait);
}
duration
})
},
);
}
group.finish();
// Test 10k draw calls split up over 1, 2, 4, and 8 threads.
let mut group = ctx.benchmark_group("Renderpass: Bindless");
group.throughput(Throughput::Elements(DRAW_COUNT as _));
group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
Lazy::force(&state);
b.iter_custom(|iters| {
profiling::scope!("benchmark invocation");
// Need bindless to run this benchmark
if state.bindless_bind_group.is_none() {
return Duration::from_secs_f32(1.0);
}
let mut duration = Duration::ZERO;
for _ in 0..iters {
profiling::scope!("benchmark iteration");
let start = Instant::now();
let buffer = state.run_bindless_pass();
duration += start.elapsed();
state.device_state.queue.submit([buffer]);
state.device_state.device.poll(wgpu::Maintain::Wait);
}
duration
})
});
group.finish();
ctx.bench_function(
&format!(
"Renderpass: Empty Submit with {} Resources",
TEXTURE_COUNT + VERTEX_BUFFER_COUNT
),
|b| {
Lazy::force(&state);
b.iter(|| state.device_state.queue.submit([]));
},
);
}
criterion_group! {
name = renderpass;
config = Criterion::default().measurement_time(Duration::from_secs(10));
targets = run_bench,
}

View File

@ -0,0 +1,36 @@
@group(0) @binding(0)
var tex_1: texture_2d<f32>;
@group(0) @binding(1)
var tex_2: texture_2d<f32>;
@group(0) @binding(2)
var tex_3: texture_2d<f32>;
@group(0) @binding(3)
var tex_4: texture_2d<f32>;
@group(0) @binding(4)
var tex_5: texture_2d<f32>;
@group(0) @binding(5)
var tex_6: texture_2d<f32>;
@group(0) @binding(6)
var tex_7: texture_2d<f32>;
@vertex
fn vs_main() -> @builtin(position) vec4f {
return vec4f(0.0, 0.0, 0.0, 1.0);
}
@fragment
fn fs_main() -> @location(0) vec4f {
return textureLoad(tex_1, vec2u(0), 0) +
textureLoad(tex_2, vec2u(0), 0) +
textureLoad(tex_3, vec2u(0), 0) +
textureLoad(tex_4, vec2u(0), 0) +
textureLoad(tex_5, vec2u(0), 0) +
textureLoad(tex_6, vec2u(0), 0) +
textureLoad(tex_7, vec2u(0), 0);
}

View File

@ -0,0 +1,71 @@
use std::time::{Duration, Instant};
use criterion::{criterion_group, Criterion, Throughput};
use once_cell::sync::Lazy;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use crate::DeviceState;
fn run_bench(ctx: &mut Criterion) {
let state = Lazy::new(DeviceState::new);
const RESOURCES_TO_CREATE: usize = 8;
let mut group = ctx.benchmark_group("Resource Creation: Large Buffer");
group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _));
for threads in [1, 2, 4, 8] {
let resources_per_thread = RESOURCES_TO_CREATE / threads;
group.bench_function(
&format!("{threads} threads x {resources_per_thread} resource"),
|b| {
Lazy::force(&state);
b.iter_custom(|iters| {
profiling::scope!("benchmark invocation");
let mut duration = Duration::ZERO;
for _ in 0..iters {
profiling::scope!("benchmark iteration");
// We can't create too many resources at once, so we do it 8 resources at a time.
let start = Instant::now();
let buffers = (0..threads)
.into_par_iter()
.map(|_| {
(0..resources_per_thread)
.map(|_| {
state.device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: 256 * 1024 * 1024,
usage: wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
})
})
.collect::<Vec<_>>()
})
.collect::<Vec<_>>();
duration += start.elapsed();
drop(buffers);
state.queue.submit([]);
state.device.poll(wgpu::Maintain::Wait);
}
duration
})
},
);
}
group.finish();
}
criterion_group! {
name = resource_creation;
config = Criterion::default().measurement_time(Duration::from_secs(10));
targets = run_bench,
}

65
benches/benches/root.rs Normal file
View File

@ -0,0 +1,65 @@
use criterion::criterion_main;
use pollster::block_on;
mod renderpass;
mod resource_creation;
mod shader;
struct DeviceState {
adapter_info: wgpu::AdapterInfo,
device: wgpu::Device,
queue: wgpu::Queue,
}
impl DeviceState {
fn new() -> Self {
#[cfg(feature = "tracy")]
tracy_client::Client::start();
let base_backend = if cfg!(target_os = "macos") {
// We don't want to use Molten-VK on Mac.
wgpu::Backends::METAL
} else {
wgpu::Backends::all()
};
let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
backends: wgpu::util::backend_bits_from_env().unwrap_or(base_backend),
flags: wgpu::InstanceFlags::empty(),
dx12_shader_compiler: wgpu::util::dx12_shader_compiler_from_env()
.unwrap_or(wgpu::Dx12Compiler::Fxc),
gles_minor_version: wgpu::Gles3MinorVersion::Automatic,
});
let adapter = block_on(wgpu::util::initialize_adapter_from_env_or_default(
&instance, None,
))
.unwrap();
let adapter_info = adapter.get_info();
eprintln!("{:?}", adapter_info);
let (device, queue) = block_on(adapter.request_device(
&wgpu::DeviceDescriptor {
required_features: adapter.features(),
required_limits: adapter.limits(),
label: Some("RenderPass Device"),
},
None,
))
.unwrap();
Self {
adapter_info,
device,
queue,
}
}
}
criterion_main!(
renderpass::renderpass,
resource_creation::resource_creation,
shader::shader
);

355
benches/benches/shader.rs Normal file
View File

@ -0,0 +1,355 @@
use criterion::*;
use std::{fs, path::PathBuf};
struct Input {
filename: String,
size: u64,
data: Vec<u8>,
string: Option<String>,
module: Option<naga::Module>,
module_info: Option<naga::valid::ModuleInfo>,
}
struct Inputs {
inner: Vec<Input>,
}
impl Inputs {
fn from_dir(folder: &str, extension: &str) -> Self {
let mut inputs = Vec::new();
let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(folder)
.read_dir()
.unwrap();
for file_entry in read_dir {
match file_entry {
Ok(entry) => match entry.path().extension() {
Some(ostr) if ostr == extension => {
let path = entry.path();
inputs.push(Input {
filename: path.to_string_lossy().into_owned(),
size: entry.metadata().unwrap().len(),
string: None,
data: vec![],
module: None,
module_info: None,
});
}
_ => continue,
},
Err(e) => {
eprintln!("Skipping file: {:?}", e);
continue;
}
}
}
Self { inner: inputs }
}
fn bytes(&self) -> u64 {
self.inner.iter().map(|input| input.size).sum()
}
fn load(&mut self) {
for input in &mut self.inner {
if !input.data.is_empty() {
continue;
}
input.data = fs::read(&input.filename).unwrap_or_default();
}
}
fn load_utf8(&mut self) {
self.load();
for input in &mut self.inner {
if input.string.is_some() {
continue;
}
input.string = Some(std::str::from_utf8(&input.data).unwrap().to_string());
}
}
fn parse(&mut self) {
self.load_utf8();
let mut parser = naga::front::wgsl::Frontend::new();
for input in &mut self.inner {
if input.module.is_some() {
continue;
}
input.module = Some(parser.parse(input.string.as_ref().unwrap()).unwrap());
}
}
fn validate(&mut self) {
self.parse();
let mut validator = naga::valid::Validator::new(
naga::valid::ValidationFlags::all(),
// Note, this is empty, to let all backends work.
naga::valid::Capabilities::empty(),
);
for input in &mut self.inner {
if input.module_info.is_some() {
continue;
}
input.module_info = validator.validate(input.module.as_ref().unwrap()).ok();
}
self.inner.retain(|input| input.module_info.is_some());
}
}
fn parse_glsl(stage: naga::ShaderStage, inputs: &Inputs) {
let mut parser = naga::front::glsl::Frontend::default();
let options = naga::front::glsl::Options {
stage,
defines: Default::default(),
};
for input in &inputs.inner {
parser
.parse(&options, input.string.as_deref().unwrap())
.unwrap();
}
}
fn frontends(c: &mut Criterion) {
let mut group = c.benchmark_group("front");
let mut inputs_wgsl = Inputs::from_dir("../naga/tests/in", "wgsl");
group.throughput(Throughput::Bytes(inputs_wgsl.bytes()));
group.bench_function("shader: naga module bincode decode", |b| {
inputs_wgsl.parse();
let inputs_bin = inputs_wgsl
.inner
.iter()
.map(|input| bincode::serialize(&input.module.as_ref().unwrap()).unwrap())
.collect::<Vec<_>>();
b.iter(move || {
for input in inputs_bin.iter() {
bincode::deserialize::<naga::Module>(input).unwrap();
}
});
});
group.bench_function("shader: wgsl-in", |b| {
inputs_wgsl.load_utf8();
let mut frontend = naga::front::wgsl::Frontend::new();
b.iter(|| {
for input in &inputs_wgsl.inner {
frontend.parse(input.string.as_ref().unwrap()).unwrap();
}
});
});
let mut inputs_spirv = Inputs::from_dir("../naga/tests/in/spv", "spv");
group.throughput(Throughput::Bytes(inputs_spirv.bytes()));
group.bench_function("shader: spv-in", |b| {
inputs_spirv.load();
b.iter(|| {
let options = naga::front::spv::Options::default();
for input in &inputs_spirv.inner {
let spv = bytemuck::cast_slice(&input.data);
let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options);
parser.parse().unwrap();
}
});
});
let mut inputs_vertex = Inputs::from_dir("../naga/tests/in/glsl", "vert");
let mut inputs_fragment = Inputs::from_dir("../naga/tests/in/glsl", "frag");
// let mut inputs_compute = Inputs::from_dir("../naga/tests/in/glsl", "comp");
group.throughput(Throughput::Bytes(
inputs_vertex.bytes() + inputs_fragment.bytes(), // + inputs_compute.bytes()
));
group.bench_function("shader: glsl-in", |b| {
inputs_vertex.load();
inputs_vertex.load_utf8();
inputs_fragment.load_utf8();
// inputs_compute.load_utf8();
b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_vertex));
b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_fragment));
// TODO: This one hangs for some reason
// b.iter(move || parse_glsl(naga::ShaderStage::Compute, &inputs_compute));
});
}
fn validation(c: &mut Criterion) {
let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl");
let mut group = c.benchmark_group("validate");
group.throughput(Throughput::Bytes(inputs.bytes()));
group.bench_function("shader: validation", |b| {
inputs.load();
inputs.load_utf8();
inputs.parse();
let mut validator = naga::valid::Validator::new(
naga::valid::ValidationFlags::all(),
naga::valid::Capabilities::all(),
);
validator
.subgroup_stages(naga::valid::ShaderStages::all())
.subgroup_operations(naga::valid::SubgroupOperationSet::all());
b.iter(|| {
for input in &inputs.inner {
validator.validate(input.module.as_ref().unwrap()).unwrap();
}
});
});
group.finish();
}
fn backends(c: &mut Criterion) {
let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl");
let mut group = c.benchmark_group("back");
// While normally this would be done inside the bench_function callback, we need to
// run this to properly know the size of the inputs, as any that fail validation
// will be removed.
inputs.validate();
group.throughput(Throughput::Bytes(inputs.bytes()));
group.bench_function("shader: wgsl-out", |b| {
b.iter(|| {
let mut string = String::new();
let flags = naga::back::wgsl::WriterFlags::empty();
for input in &inputs.inner {
let mut writer = naga::back::wgsl::Writer::new(&mut string, flags);
let _ = writer.write(
input.module.as_ref().unwrap(),
input.module_info.as_ref().unwrap(),
);
string.clear();
}
});
});
group.bench_function("shader: spv-out", |b| {
b.iter(|| {
let mut data = Vec::new();
let options = naga::back::spv::Options::default();
for input in &inputs.inner {
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
let _ = writer.write(
input.module.as_ref().unwrap(),
input.module_info.as_ref().unwrap(),
None,
&None,
&mut data,
);
data.clear();
}
});
});
group.bench_function("shader: spv-out multiple entrypoints", |b| {
b.iter(|| {
let mut data = Vec::new();
let options = naga::back::spv::Options::default();
for input in &inputs.inner {
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
let module = input.module.as_ref().unwrap();
for ep in module.entry_points.iter() {
let pipeline_options = naga::back::spv::PipelineOptions {
shader_stage: ep.stage,
entry_point: ep.name.clone(),
};
let _ = writer.write(
input.module.as_ref().unwrap(),
input.module_info.as_ref().unwrap(),
Some(&pipeline_options),
&None,
&mut data,
);
data.clear();
}
}
});
});
group.bench_function("shader: msl-out", |b| {
b.iter(|| {
let mut string = String::new();
let options = naga::back::msl::Options::default();
for input in &inputs.inner {
let pipeline_options = naga::back::msl::PipelineOptions::default();
let mut writer = naga::back::msl::Writer::new(&mut string);
let _ = writer.write(
input.module.as_ref().unwrap(),
input.module_info.as_ref().unwrap(),
&options,
&pipeline_options,
);
string.clear();
}
});
});
group.bench_function("shader: hlsl-out", |b| {
b.iter(|| {
let options = naga::back::hlsl::Options::default();
let mut string = String::new();
for input in &inputs.inner {
let mut writer = naga::back::hlsl::Writer::new(&mut string, &options);
let _ = writer.write(
input.module.as_ref().unwrap(),
input.module_info.as_ref().unwrap(),
); // may fail on unimplemented things
string.clear();
}
});
});
group.bench_function("shader: glsl-out multiple entrypoints", |b| {
b.iter(|| {
let mut string = String::new();
let options = naga::back::glsl::Options {
version: naga::back::glsl::Version::new_gles(320),
writer_flags: naga::back::glsl::WriterFlags::empty(),
binding_map: Default::default(),
zero_initialize_workgroup_memory: true,
};
for input in &inputs.inner {
let module = input.module.as_ref().unwrap();
let info = input.module_info.as_ref().unwrap();
for ep in module.entry_points.iter() {
let pipeline_options = naga::back::glsl::PipelineOptions {
shader_stage: ep.stage,
entry_point: ep.name.clone(),
multiview: None,
};
// might be `Err` if missing features
if let Ok(mut writer) = naga::back::glsl::Writer::new(
&mut string,
module,
info,
&options,
&pipeline_options,
naga::proc::BoundsCheckPolicies::default(),
) {
let _ = writer.write(); // might be `Err` if unsupported
}
string.clear();
}
}
});
});
}
criterion_group!(shader, frontends, validation, backends);

View File

@ -35,10 +35,6 @@ wgsl-out = []
hlsl-out = []
compact = []
[[bench]]
name = "criterion"
harness = false
[dependencies]
arbitrary = { version = "1.3", features = ["derive"], optional = true }
bitflags = "2.5"
@ -60,11 +56,7 @@ hexf-parse = { version = "0.2.1", optional = true }
unicode-xid = { version = "0.2.3", optional = true }
arrayvec.workspace = true
[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
criterion = { version = "0.5", features = [] }
[dev-dependencies]
bincode = "1"
diff = "0.1"
env_logger = "0.11"
# This _cannot_ have a version specified. If it does, crates.io will look

View File

@ -1,273 +0,0 @@
#![cfg(not(target_arch = "wasm32"))]
#![allow(clippy::needless_borrowed_reference)]
use criterion::*;
use std::{fs, path::PathBuf, slice};
fn gather_inputs(folder: &str, extension: &str) -> Vec<Box<[u8]>> {
let mut list = Vec::new();
let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
.join(folder)
.read_dir()
.unwrap();
for file_entry in read_dir {
match file_entry {
Ok(entry) => match entry.path().extension() {
Some(ostr) if ostr == extension => {
let input = fs::read(entry.path()).unwrap_or_default();
list.push(input.into_boxed_slice());
}
_ => continue,
},
Err(e) => {
log::warn!("Skipping file: {:?}", e);
continue;
}
}
}
list
}
fn parse_glsl(stage: naga::ShaderStage, inputs: &[Box<[u8]>]) {
let mut parser = naga::front::glsl::Frontend::default();
let options = naga::front::glsl::Options {
stage,
defines: Default::default(),
};
for input in inputs.iter() {
let string = std::str::from_utf8(input).unwrap();
parser.parse(&options, string).unwrap();
}
}
fn frontends(c: &mut Criterion) {
let mut group = c.benchmark_group("front");
#[cfg(all(feature = "wgsl-in", feature = "serialize", feature = "deserialize"))]
group.bench_function("bin", |b| {
let inputs_wgsl = gather_inputs("tests/in", "wgsl");
let mut frontend = naga::front::wgsl::Frontend::new();
let inputs_bin = inputs_wgsl
.iter()
.map(|input| {
let string = std::str::from_utf8(input).unwrap();
let module = frontend.parse(string).unwrap();
bincode::serialize(&module).unwrap()
})
.collect::<Vec<_>>();
b.iter(move || {
for input in inputs_bin.iter() {
bincode::deserialize::<naga::Module>(input).unwrap();
}
});
});
#[cfg(feature = "wgsl-in")]
group.bench_function("wgsl", |b| {
let inputs_wgsl = gather_inputs("tests/in", "wgsl");
let inputs = inputs_wgsl
.iter()
.map(|input| std::str::from_utf8(input).unwrap())
.collect::<Vec<_>>();
let mut frontend = naga::front::wgsl::Frontend::new();
b.iter(move || {
for &input in inputs.iter() {
frontend.parse(input).unwrap();
}
});
});
#[cfg(feature = "spv-in")]
group.bench_function("spv", |b| {
let inputs = gather_inputs("tests/in/spv", "spv");
b.iter(move || {
let options = naga::front::spv::Options::default();
for input in inputs.iter() {
let spv =
unsafe { slice::from_raw_parts(input.as_ptr() as *const u32, input.len() / 4) };
let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options);
parser.parse().unwrap();
}
});
});
#[cfg(feature = "glsl-in")]
group.bench_function("glsl", |b| {
let vert = gather_inputs("tests/in/glsl", "vert");
b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &vert));
let frag = gather_inputs("tests/in/glsl", "frag");
b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &frag));
//TODO: hangs for some reason!
//let comp = gather_inputs("tests/in/glsl", "comp");
//b.iter(move || parse_glsl(naga::ShaderStage::Compute, &comp));
});
}
#[cfg(feature = "wgsl-in")]
fn gather_modules() -> Vec<naga::Module> {
let inputs = gather_inputs("tests/in", "wgsl");
let mut frontend = naga::front::wgsl::Frontend::new();
inputs
.iter()
.map(|input| {
let string = std::str::from_utf8(input).unwrap();
frontend.parse(string).unwrap()
})
.collect()
}
#[cfg(not(feature = "wgsl-in"))]
fn gather_modules() -> Vec<naga::Module> {
Vec::new()
}
fn validation(c: &mut Criterion) {
let inputs = gather_modules();
let mut group = c.benchmark_group("valid");
group.bench_function("safe", |b| {
let mut validator = naga::valid::Validator::new(
naga::valid::ValidationFlags::all(),
naga::valid::Capabilities::all(),
);
b.iter(|| {
for input in inputs.iter() {
validator.validate(input).unwrap();
}
});
});
group.bench_function("unsafe", |b| {
let mut validator = naga::valid::Validator::new(
naga::valid::ValidationFlags::empty(),
naga::valid::Capabilities::all(),
);
b.iter(|| {
for input in inputs.iter() {
validator.validate(input).unwrap();
}
});
});
}
fn backends(c: &mut Criterion) {
let inputs = {
let mut validator = naga::valid::Validator::new(
naga::valid::ValidationFlags::empty(),
naga::valid::Capabilities::default(),
);
let input_modules = gather_modules();
input_modules
.into_iter()
.flat_map(|module| validator.validate(&module).ok().map(|info| (module, info)))
.collect::<Vec<_>>()
};
let mut group = c.benchmark_group("back");
#[cfg(feature = "wgsl-out")]
group.bench_function("wgsl", |b| {
b.iter(|| {
let mut string = String::new();
let flags = naga::back::wgsl::WriterFlags::empty();
for &(ref module, ref info) in inputs.iter() {
let mut writer = naga::back::wgsl::Writer::new(&mut string, flags);
writer.write(module, info).unwrap();
string.clear();
}
});
});
#[cfg(feature = "spv-out")]
group.bench_function("spv", |b| {
b.iter(|| {
let mut data = Vec::new();
let options = naga::back::spv::Options::default();
for &(ref module, ref info) in inputs.iter() {
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
writer.write(module, info, None, &None, &mut data).unwrap();
data.clear();
}
});
});
#[cfg(feature = "spv-out")]
group.bench_function("spv-separate", |b| {
b.iter(|| {
let mut data = Vec::new();
let options = naga::back::spv::Options::default();
for &(ref module, ref info) in inputs.iter() {
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
for ep in module.entry_points.iter() {
let pipeline_options = naga::back::spv::PipelineOptions {
shader_stage: ep.stage,
entry_point: ep.name.clone(),
};
writer
.write(module, info, Some(&pipeline_options), &None, &mut data)
.unwrap();
data.clear();
}
}
});
});
#[cfg(feature = "msl-out")]
group.bench_function("msl", |b| {
b.iter(|| {
let mut string = String::new();
let options = naga::back::msl::Options::default();
for &(ref module, ref info) in inputs.iter() {
let pipeline_options = naga::back::msl::PipelineOptions::default();
let mut writer = naga::back::msl::Writer::new(&mut string);
writer
.write(module, info, &options, &pipeline_options)
.unwrap();
string.clear();
}
});
});
#[cfg(feature = "hlsl-out")]
group.bench_function("hlsl", |b| {
b.iter(|| {
let options = naga::back::hlsl::Options::default();
let mut string = String::new();
for &(ref module, ref info) in inputs.iter() {
let mut writer = naga::back::hlsl::Writer::new(&mut string, &options);
let _ = writer.write(module, info); // may fail on unimplemented things
string.clear();
}
});
});
#[cfg(feature = "glsl-out")]
group.bench_function("glsl-separate", |b| {
b.iter(|| {
let mut string = String::new();
let options = naga::back::glsl::Options {
version: naga::back::glsl::Version::new_gles(320),
writer_flags: naga::back::glsl::WriterFlags::empty(),
binding_map: Default::default(),
zero_initialize_workgroup_memory: true,
};
for &(ref module, ref info) in inputs.iter() {
for ep in module.entry_points.iter() {
let pipeline_options = naga::back::glsl::PipelineOptions {
shader_stage: ep.stage,
entry_point: ep.name.clone(),
multiview: None,
};
// might be `Err` if missing features
if let Ok(mut writer) = naga::back::glsl::Writer::new(
&mut string,
module,
info,
&options,
&pipeline_options,
naga::proc::BoundsCheckPolicies::default(),
) {
let _ = writer.write(); // might be `Err` if unsupported
}
string.clear();
}
}
});
});
}
criterion_group!(criterion, frontends, validation, backends,);
criterion_main!(criterion);

View File

@ -21,23 +21,27 @@ features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"]
[[bin]]
name = "spv_parser"
path = "fuzz_targets/spv_parser.rs"
bench = false
test = false
doc = false
[[bin]]
name = "wgsl_parser"
path = "fuzz_targets/wgsl_parser.rs"
bench = false
test = false
doc = false
[[bin]]
name = "glsl_parser"
path = "fuzz_targets/glsl_parser.rs"
bench = false
test = false
doc = false
[[bin]]
name = "ir"
path = "fuzz_targets/ir.rs"
bench = false
test = false
doc = false

View File

@ -1044,7 +1044,12 @@ impl<'a, W: Write> super::Writer<'a, W> {
crate::Expression::GlobalVariable(var_handle) => {
&module.global_variables[var_handle]
}
ref other => unreachable!("Array length of base {:?}", other),
ref other => {
return Err(super::Error::Unimplemented(format!(
"Array length of base {:?}",
other
)))
}
};
let storage_access = match global_var.space {
crate::AddressSpace::Storage { access } => access,

View File

@ -172,6 +172,8 @@ impl<A: HalApi> BakedCommands<A> {
device_tracker: &mut Tracker<A>,
snatch_guard: &SnatchGuard<'_>,
) -> Result<(), DestroyedBufferError> {
profiling::scope!("initialize_buffer_memory");
// Gather init ranges for each buffer so we can collapse them.
// It is not possible to do this at an earlier point since previously
// executed command buffer change the resource init state.
@ -276,6 +278,8 @@ impl<A: HalApi> BakedCommands<A> {
device: &Device<A>,
snatch_guard: &SnatchGuard<'_>,
) -> Result<(), DestroyedTextureError> {
profiling::scope!("initialize_texture_memory");
let mut ranges: Vec<TextureInitRange> = Vec::new();
for texture_use in self.texture_memory_actions.drain_init_actions() {
let mut initialization_status = texture_use.texture.initialization_status.write();

View File

@ -32,7 +32,9 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
// value is enough for a 16k texture with float4 format.
pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
const CLEANUP_WAIT_MS: u32 = 5000;
// If a submission is not completed within this time, we go off into UB land.
// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
const CLEANUP_WAIT_MS: u32 = 60000;
const IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL: &str = "Implicit BindGroupLayout in the Error State";
const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";

View File

@ -1186,6 +1186,8 @@ impl Global {
// finish all the command buffers first
for &cmb_id in command_buffer_ids {
profiling::scope!("process command buffer");
// we reset the used surface textures every time we use
// it, so make sure to set_size on it.
used_surface_textures.set_size(device.tracker_indices.textures.size());
@ -1222,59 +1224,73 @@ impl Global {
continue;
}
// optimize the tracked states
// cmdbuf.trackers.optimize();
{
profiling::scope!("update submission ids");
let cmd_buf_data = cmdbuf.data.lock();
let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers;
// update submission IDs
for buffer in cmd_buf_trackers.buffers.used_resources() {
if buffer.raw.get(&snatch_guard).is_none() {
return Err(QueueSubmitError::DestroyedBuffer(
buffer.info.id(),
));
}
buffer.info.use_at(submit_index);
match *buffer.map_state.lock() {
BufferMapState::Idle => (),
_ => {
return Err(QueueSubmitError::BufferStillMapped(
{
profiling::scope!("buffers");
for buffer in cmd_buf_trackers.buffers.used_resources() {
if buffer.raw.get(&snatch_guard).is_none() {
return Err(QueueSubmitError::DestroyedBuffer(
buffer.info.id(),
))
}
}
}
for texture in cmd_buf_trackers.textures.used_resources() {
let should_extend = match texture.inner.get(&snatch_guard) {
None => {
return Err(QueueSubmitError::DestroyedTexture(
texture.info.id(),
));
}
Some(TextureInner::Native { .. }) => false,
Some(TextureInner::Surface { ref raw, .. }) => {
if raw.is_some() {
submit_surface_textures_owned.push(texture.clone());
}
buffer.info.use_at(submit_index);
true
match *buffer.map_state.lock() {
BufferMapState::Idle => (),
_ => {
return Err(QueueSubmitError::BufferStillMapped(
buffer.info.id(),
))
}
}
};
texture.info.use_at(submit_index);
if should_extend {
unsafe {
used_surface_textures
.merge_single(&texture, None, hal::TextureUses::PRESENT)
.unwrap();
};
}
}
for texture_view in cmd_buf_trackers.views.used_resources() {
texture_view.info.use_at(submit_index);
{
profiling::scope!("textures");
for texture in cmd_buf_trackers.textures.used_resources() {
let should_extend = match texture.inner.get(&snatch_guard) {
None => {
return Err(QueueSubmitError::DestroyedTexture(
texture.info.id(),
));
}
Some(TextureInner::Native { .. }) => false,
Some(TextureInner::Surface { ref raw, .. }) => {
if raw.is_some() {
submit_surface_textures_owned.push(texture.clone());
}
true
}
};
texture.info.use_at(submit_index);
if should_extend {
unsafe {
used_surface_textures
.merge_single(
&texture,
None,
hal::TextureUses::PRESENT,
)
.unwrap();
};
}
}
}
{
profiling::scope!("views");
for texture_view in cmd_buf_trackers.views.used_resources() {
texture_view.info.use_at(submit_index);
}
}
{
profiling::scope!("bind groups (+ referenced views/samplers)");
for bg in cmd_buf_trackers.bind_groups.used_resources() {
bg.info.use_at(submit_index);
// We need to update the submission indices for the contained
@ -1288,36 +1304,51 @@ impl Global {
}
}
}
// assert!(cmd_buf_trackers.samplers.is_empty());
for compute_pipeline in
cmd_buf_trackers.compute_pipelines.used_resources()
{
compute_pipeline.info.use_at(submit_index);
profiling::scope!("compute pipelines");
for compute_pipeline in
cmd_buf_trackers.compute_pipelines.used_resources()
{
compute_pipeline.info.use_at(submit_index);
}
}
for render_pipeline in
cmd_buf_trackers.render_pipelines.used_resources()
{
render_pipeline.info.use_at(submit_index);
}
for query_set in cmd_buf_trackers.query_sets.used_resources() {
query_set.info.use_at(submit_index);
}
for bundle in cmd_buf_trackers.bundles.used_resources() {
bundle.info.use_at(submit_index);
// We need to update the submission indices for the contained
// state-less (!) resources as well, excluding the bind groups.
// They don't get deleted too early if the bundle goes out of scope.
profiling::scope!("render pipelines");
for render_pipeline in
bundle.used.render_pipelines.read().used_resources()
cmd_buf_trackers.render_pipelines.used_resources()
{
render_pipeline.info.use_at(submit_index);
}
for query_set in bundle.used.query_sets.read().used_resources() {
}
{
profiling::scope!("query sets");
for query_set in cmd_buf_trackers.query_sets.used_resources() {
query_set.info.use_at(submit_index);
}
}
{
profiling::scope!(
"render bundles (+ referenced pipelines/query sets)"
);
for bundle in cmd_buf_trackers.bundles.used_resources() {
bundle.info.use_at(submit_index);
// We need to update the submission indices for the contained
// state-less (!) resources as well, excluding the bind groups.
// They don't get deleted too early if the bundle goes out of scope.
for render_pipeline in
bundle.used.render_pipelines.read().used_resources()
{
render_pipeline.info.use_at(submit_index);
}
for query_set in bundle.used.query_sets.read().used_resources()
{
query_set.info.use_at(submit_index);
}
}
}
}
let mut baked = cmdbuf.from_arc_into_baked();
// execute resource transitions
unsafe {
baked
@ -1385,6 +1416,13 @@ impl Global {
raw: baked.encoder,
cmd_buffers: baked.list,
});
{
// This involves actually decrementing the ref count of all command buffer
// resources, so can be _very_ expensive.
profiling::scope!("drop command buffer trackers");
drop(baked.trackers);
}
}
log::trace!("Device after submission {}", submit_index);

View File

@ -84,9 +84,6 @@ naga-ir = ["dep:naga"]
## to the validation carried out at public APIs in all builds.
strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"]
## Log all API entry points at info instead of trace level.
api_log_info = ["wgc/api_log_info"]
## Enables serialization via `serde` on common wgpu types.
serde = ["dep:serde", "wgc/serde"]

View File

@ -13,11 +13,21 @@ Usage: xtask <COMMAND>
Commands:
run-wasm
Build and run web examples
--release Build in release mode
--no-serve Just build the generated files, don't serve them
test
Run tests
--llvm-cov Run tests with LLVM code coverage using the llvm-cov tool
--list List all of the tests and their executables without running them
--retries Number of times to retry failing tests
vendor-web-sys
Re-vendor the WebGPU web-sys bindings.
--no-cleanup Don't clean up temporary checkout of wasm-bindgen
One of:
--path-to-checkout Path to a local checkout of wasm-bindgen to generate bindings from.

View File

@ -5,7 +5,7 @@ use xshell::Shell;
use crate::util::{check_all_programs, Program};
pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> Result<(), anyhow::Error> {
pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
let no_serve = args.contains("--no-serve");
let release = args.contains("--release");

View File

@ -4,6 +4,12 @@ use xshell::Shell;
pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
let llvm_cov = args.contains("--llvm-cov");
let list = args.contains("--list");
let retries = args
.opt_value_from_str("--retries")?
.unwrap_or(0_u32)
.to_string();
// These needs to match the command in "run wgpu-info" in `.github/workflows/ci.yml`
let llvm_cov_flags: &[_] = if llvm_cov {
&["llvm-cov", "--no-cfg-coverage", "--no-report"]
@ -13,18 +19,30 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
let llvm_cov_nextest_flags: &[_] = if llvm_cov {
&["llvm-cov", "--no-cfg-coverage", "--no-report", "nextest"]
} else {
&["nextest", "run"]
if list {
&["nextest", "list"]
} else {
&["nextest", "run"]
}
};
log::info!("Generating .gpuconfig file based on gpus on the system");
xshell::cmd!(
shell,
"cargo {llvm_cov_flags...} run --bin wgpu-info -- --json -o .gpuconfig"
)
.quiet()
.run()
.context("Failed to run wgpu-info to generate .gpuconfig")?;
shell
.cmd("cargo")
.args(llvm_cov_flags)
.args([
"run",
"--bin",
"wgpu-info",
"--",
"--json",
"-o",
".gpuconfig",
])
.quiet()
.run()
.context("Failed to run wgpu-info to generate .gpuconfig")?;
let gpu_count = shell
.read_file(".gpuconfig")
@ -39,16 +57,34 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
if gpu_count == 1 { "" } else { "s" }
);
if list {
log::info!("Listing tests");
shell
.cmd("cargo")
.args(llvm_cov_nextest_flags)
.args(["-v", "--benches", "--tests", "--all-features"])
.args(args.finish())
.run()
.context("Failed to list tests")?;
return Ok(());
}
log::info!("Running cargo tests");
xshell::cmd!(
shell,
"cargo {llvm_cov_nextest_flags...} --all-features --no-fail-fast --retries 2"
)
.args(args.finish())
.quiet()
.run()
.context("Tests failed")?;
shell
.cmd("cargo")
.args(llvm_cov_nextest_flags)
.args([
"--benches",
"--tests",
"--no-fail-fast",
"--all-features",
"--retries",
&retries,
])
.args(args.finish())
.quiet()
.run()
.context("Tests failed")?;
log::info!("Finished tests");

View File

@ -1,15 +1,15 @@
use std::{io, process::Command};
pub(crate) struct Program {
pub binary_name: &'static str,
pub crate_name: &'static str,
pub binary_name: &'static str,
}
pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
let mut failed = Vec::new();
for Program {
binary_name,
let mut failed_crates = Vec::new();
for &Program {
crate_name,
binary_name,
} in programs
{
let mut cmd = Command::new(binary_name);
@ -21,7 +21,7 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
}
Err(e) if matches!(e.kind(), io::ErrorKind::NotFound) => {
log::error!("Checking for {binary_name} in PATH: ❌");
failed.push(*crate_name);
failed_crates.push(crate_name);
}
Err(e) => {
log::error!("Checking for {binary_name} in PATH: ❌");
@ -30,12 +30,13 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
}
}
if !failed.is_empty() {
if !failed_crates.is_empty() {
log::error!(
"Please install them with: cargo install {}",
failed.join(" ")
failed_crates.join(" ")
);
anyhow::bail!("Missing programs in PATH");
anyhow::bail!("Missing required programs");
}
Ok(())