From eeb1a9d7b751da1fd14768809ff55f15b9056504 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 16 May 2024 09:05:41 -0400 Subject: [PATCH] Add Benchmarks (#5694) --- .config/nextest.toml | 14 +- .deny.toml | 2 + .github/workflows/ci.yml | 2 +- Cargo.lock | 203 +++++++- Cargo.toml | 14 +- benches/Cargo.toml | 46 ++ benches/README.md | 95 ++++ benches/benches/renderpass-bindless.wgsl | 26 + benches/benches/renderpass.rs | 573 +++++++++++++++++++++++ benches/benches/renderpass.wgsl | 36 ++ benches/benches/resource_creation.rs | 71 +++ benches/benches/root.rs | 65 +++ benches/benches/shader.rs | 355 ++++++++++++++ naga/Cargo.toml | 8 - naga/benches/criterion.rs | 273 ----------- naga/fuzz/Cargo.toml | 4 + naga/src/back/hlsl/help.rs | 7 +- wgpu-core/src/command/memory_init.rs | 4 + wgpu-core/src/device/mod.rs | 4 +- wgpu-core/src/device/queue.rs | 152 +++--- wgpu/Cargo.toml | 3 - xtask/src/main.rs | 10 + xtask/src/run_wasm.rs | 2 +- xtask/src/test.rs | 68 ++- xtask/src/util.rs | 17 +- 25 files changed, 1674 insertions(+), 380 deletions(-) create mode 100644 benches/Cargo.toml create mode 100644 benches/README.md create mode 100644 benches/benches/renderpass-bindless.wgsl create mode 100644 benches/benches/renderpass.rs create mode 100644 benches/benches/renderpass.wgsl create mode 100644 benches/benches/resource_creation.rs create mode 100644 benches/benches/root.rs create mode 100644 benches/benches/shader.rs delete mode 100644 naga/benches/criterion.rs diff --git a/.config/nextest.toml b/.config/nextest.toml index b8dbfe952..3d5a23b65 100644 --- a/.config/nextest.toml +++ b/.config/nextest.toml @@ -3,7 +3,17 @@ [profile.default] slow-timeout = { period = "45s", terminate-after = 2 } -# Use two threads for tests with "2_threads" in their name +# Use two threads for tests with "2 threads" in their name [[profile.default.overrides]] -filter = 'test(~2_threads)' +filter = 'test(~2_threads) | test(~2 threads)' threads-required = 2 + +# Use four threads for tests with "4 threads" in their name +[[profile.default.overrides]] +filter = 'test(~4_threads) | test(~4 threads)' +threads-required = 4 + +# Use eight threads for tests with "8 threads" in their name +[[profile.default.overrides]] +filter = 'test(~8_threads) | test(~8 threads)' +threads-required = 8 diff --git a/.deny.toml b/.deny.toml index 7e000d6f8..8448c81e8 100644 --- a/.deny.toml +++ b/.deny.toml @@ -1,6 +1,8 @@ [bans] multiple-versions = "deny" skip-tree = [ + # We never enable loom in any of our dependencies but it causes dupes + { name = "loom", version = "0.7.2" }, { name = "windows-sys", version = "0.45" }, { name = "winit", version = "0.27" }, { name = "winit", version = "0.29" }, diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 55020c173..e2723f2ce 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -226,7 +226,7 @@ jobs: cargo clippy --target ${{ matrix.target }} --no-default-features # Check with all features. - cargo clippy --target ${{ matrix.target }} --tests --all-features + cargo clippy --target ${{ matrix.target }} --tests --benches --all-features # build docs cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps diff --git a/Cargo.lock b/Cargo.lock index 83bdcc7c5..1e1cdd65d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1511,6 +1511,20 @@ dependencies = [ "slab", ] +[[package]] +name = "generator" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb" +dependencies = [ + "cc", + "cfg-if", + "libc", + "log", + "rustversion", + "windows 0.54.0", +] + [[package]] name = "gethostname" version = "0.4.3" @@ -1672,7 +1686,7 @@ dependencies = [ "presser", "thiserror", "winapi", - "windows", + "windows 0.52.0", ] [[package]] @@ -2047,6 +2061,19 @@ version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" +[[package]] +name = "loom" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" +dependencies = [ + "cfg-if", + "generator", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + [[package]] name = "malloc_buf" version = "0.0.6" @@ -2056,6 +2083,15 @@ dependencies = [ "libc", ] +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.7.2" @@ -2141,11 +2177,9 @@ version = "0.20.0" dependencies = [ "arbitrary", "arrayvec 0.7.4", - "bincode", "bit-set", "bitflags 2.5.0", "codespan-reporting", - "criterion", "diff", "env_logger", "hexf-parse", @@ -2326,6 +2360,16 @@ dependencies = [ "rand_xorshift", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-bigint" version = "0.4.5" @@ -2513,6 +2557,12 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "owned_ttf_parser" version = "0.21.0" @@ -2892,8 +2942,17 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.6", + "regex-syntax 0.8.3", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -2904,9 +2963,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.3", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.3" @@ -3138,6 +3203,15 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shared_library" version = "0.1.9" @@ -3410,6 +3484,16 @@ dependencies = [ "winapi", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "threadpool" version = "1.8.1" @@ -3567,6 +3651,59 @@ name = "tracing-core" version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "tracy-client" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d" +dependencies = [ + "loom", + "once_cell", + "tracy-client-sys", +] + +[[package]] +name = "tracy-client-sys" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882" +dependencies = [ + "cc", +] [[package]] name = "ttf-parser" @@ -3716,6 +3853,12 @@ dependencies = [ "which", ] +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "vec_map" version = "0.8.2" @@ -4077,6 +4220,23 @@ dependencies = [ "wgpu-types", ] +[[package]] +name = "wgpu-benchmark" +version = "0.20.0" +dependencies = [ + "bincode", + "bytemuck", + "criterion", + "naga", + "nanorand", + "once_cell", + "pollster", + "profiling", + "rayon", + "tracy-client", + "wgpu", +] + [[package]] name = "wgpu-core" version = "0.20.0" @@ -4304,7 +4464,17 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" dependencies = [ - "windows-core", + "windows-core 0.52.0", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49" +dependencies = [ + "windows-core 0.54.0", "windows-targets 0.52.5", ] @@ -4317,6 +4487,25 @@ dependencies = [ "windows-targets 0.52.5", ] +[[package]] +name = "windows-core" +version = "0.54.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65" +dependencies = [ + "windows-result", + "windows-targets 0.52.5", +] + +[[package]] +name = "windows-result" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "749f0da9cc72d82e600d8d2e44cadd0b9eedb9038f71a1c58556ac1c5791813b" +dependencies = [ + "windows-targets 0.52.5", +] + [[package]] name = "windows-sys" version = "0.36.1" diff --git a/Cargo.toml b/Cargo.toml index bfcc19e7f..7d142df64 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,8 +5,9 @@ members = [ "deno_webgpu", # default members + "benches", "d3d12", - "examples/", + "examples", "naga-cli", "naga", "naga/fuzz", @@ -22,8 +23,9 @@ members = [ ] exclude = [] default-members = [ + "benches", "d3d12", - "examples/", + "examples", "naga-cli", "naga", "naga/fuzz", @@ -70,11 +72,13 @@ version = "0.20.0" [workspace.dependencies] anyhow = "1.0.23" arrayvec = "0.7" +bincode = "1" bit-vec = "0.6" bitflags = "2" bytemuck = { version = "1.14", features = ["derive"] } cfg_aliases = "0.1" cfg-if = "1" +criterion = "0.5" codespan-reporting = "0.11" ctor = "0.2" document-features = "0.2.8" @@ -109,6 +113,7 @@ png = "0.17.11" pollster = "0.3" profiling = { version = "1", default-features = false } raw-window-handle = "0.6" +rayon = "1" renderdoc-sys = "1.1.0" ron = "0.8" rustc-hash = "1.1.0" @@ -116,6 +121,7 @@ serde = "1" serde_json = "1.0.116" smallvec = "1" static_assertions = "1.1.0" +tracy-client = "0.17" thiserror = "1" wgpu = { version = "0.20.0", path = "./wgpu" } wgpu-core = { version = "0.20.0", path = "./wgpu-core" } @@ -187,6 +193,10 @@ termcolor = "1.4.1" #js-sys = { path = "../wasm-bindgen/crates/js-sys" } #wasm-bindgen = { path = "../wasm-bindgen" } +[profile.release] +lto = "thin" +debug = true + # Speed up image comparison even in debug builds [profile.dev.package."nv-flip-sys"] opt-level = 3 diff --git a/benches/Cargo.toml b/benches/Cargo.toml new file mode 100644 index 000000000..65ac0eefd --- /dev/null +++ b/benches/Cargo.toml @@ -0,0 +1,46 @@ +[package] +name = "wgpu-benchmark" +version.workspace = true +authors.workspace = true +edition.workspace = true +description = "wgpu benchmarking suite" +homepage.workspace = true +repository.workspace = true +keywords.workspace = true +license.workspace = true +autobenches = false +publish = false + +[[bench]] +name = "root" +harness = false +path = "benches/root.rs" + +[features] +# Uncomment these features to enable tracy and superluminal profiling. +# tracy = ["dep:tracy-client", "profiling/profile-with-tracy"] +# superluminal = ["profiling/profile-with-superluminal"] + +[dependencies] +bincode.workspace = true +bytemuck.workspace = true +criterion.workspace = true +naga = { workspace = true, features = [ + "deserialize", + "serialize", + "wgsl-in", + "spv-in", + "glsl-in", + "spv-out", + "msl-out", + "hlsl-out", + "glsl-out", + "wgsl-out", +] } +nanorand.workspace = true +once_cell.workspace = true +pollster.workspace = true +profiling.workspace = true +rayon.workspace = true +tracy-client = { workspace = true, optional = true } +wgpu.workspace = true diff --git a/benches/README.md b/benches/README.md new file mode 100644 index 000000000..3f20cbba7 --- /dev/null +++ b/benches/README.md @@ -0,0 +1,95 @@ +Collection of CPU benchmarks for `wgpu`. + +These benchmarks are designed as a first line of defence against performance regressions and generally approximate the performance for users. +They all do very little GPU work and are testing the CPU performance of the API. + +Criterion will give you the end-to-end performance of the benchmark, but you can also use a profiler to get more detailed information about where time is being spent. + +## Usage + +```sh +# Run all benchmarks +cargo bench -p wgpu-benchmark +# Run a specific benchmarks that contains "filter" in its name +cargo bench -p wgpu-benchmark -- "filter" +``` + +## Benchmarks + +#### `Renderpass` + +This benchmark measures the performance of recording and submitting a render pass with a large +number of draw calls and resources, emulating an intense, more traditional graphics application. +By default it measures 10k draw calls, with 90k total resources. + +Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting +the render pass into multiple passes over multiple command buffers. + +#### `Resource Creation` + +This benchmark measures the performance of creating large resources. By default it makes buffers that are 256MB. It tests this over a range of thread counts. + +#### `Shader Compilation` + +This benchmark measures the performance of naga parsing, validating, and generating shaders. + +## Comparing Against a Baseline + +To compare the current benchmarks against a baseline, you can use the `--save-baseline` and `--baseline` flags. + +For example, to compare v0.20 against trunk, you could run the following: + +```sh +git checkout v0.20 + +# Run the baseline benchmarks +cargo bench -p wgpu-benchmark -- --save-baseline "v0.20" + +git checkout trunk + +# Run the current benchmarks +cargo bench -p wgpu-benchmark -- --baseline "v0.20" +``` + +You can use this for any bits of code you want to compare. + +## Integration with Profilers + +The benchmarks can be run with a profiler to get more detailed information about where time is being spent. +Integrations are available for `tracy` and `superluminal`. Due to some implementation details, +you need to uncomment the features in the `Cargo.toml` to allow features to be used. + +#### Tracy + +Tracy is available prebuilt for Windows on [github](https://github.com/wolfpld/tracy/releases/latest/). + +```sh +# Once this is running, you can connect to it with the Tracy Profiler +cargo bench -p wgpu-benchmark --features tracy +``` + +#### Superluminal + +Superluminal is a paid product for windows available [here](https://superluminal.eu/). + +```sh +# This command will build the benchmarks, and display the path to the executable +cargo bench -p wgpu-benchmark --features superluminal -- -h + +# Have Superluminal run the following command (replacing with the path to the executable) +./target/release/deps/root-2c45d61b38a65438.exe --bench "filter" +``` + +#### `perf` and others + +You can follow the same pattern as above to run the benchmarks with other profilers. +For example, the command line tool `perf` can be used to profile the benchmarks. + +```sh +# This command will build the benchmarks, and display the path to the executable +cargo bench -p wgpu-benchmark -- -h + +# Run the benchmarks with perf +perf record ./target/release/deps/root-2c45d61b38a65438 --bench "filter" +``` + diff --git a/benches/benches/renderpass-bindless.wgsl b/benches/benches/renderpass-bindless.wgsl new file mode 100644 index 000000000..0277ef63b --- /dev/null +++ b/benches/benches/renderpass-bindless.wgsl @@ -0,0 +1,26 @@ +@group(0) @binding(0) +var tex: binding_array>; + +struct VertexOutput { + @builtin(position) position: vec4f, + @location(0) @interpolate(flat) instance_index: u32, +} + +@vertex +fn vs_main(@builtin(instance_index) instance_index: u32) -> VertexOutput { + return VertexOutput( + vec4f(0.0, 0.0, 0.0, 1.0), + instance_index + ); +} + +@fragment +fn fs_main(vs_in: VertexOutput) -> @location(0) vec4f { + return textureLoad(tex[7 * vs_in.instance_index + 0], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 1], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 2], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 3], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 4], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 5], vec2u(0), 0) + + textureLoad(tex[7 * vs_in.instance_index + 6], vec2u(0), 0); +} diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs new file mode 100644 index 000000000..30543839a --- /dev/null +++ b/benches/benches/renderpass.rs @@ -0,0 +1,573 @@ +use std::{ + num::NonZeroU32, + time::{Duration, Instant}, +}; + +use criterion::{criterion_group, Criterion, Throughput}; +use nanorand::{Rng, WyRand}; +use once_cell::sync::Lazy; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +use crate::DeviceState; + +const DRAW_COUNT: usize = 10_000; +// Must match the number of textures in the renderpass.wgsl shader +const TEXTURES_PER_DRAW: usize = 7; +const VERTEX_BUFFERS_PER_DRAW: usize = 2; +const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW; + +const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW; + +struct RenderpassState { + device_state: DeviceState, + pipeline: wgpu::RenderPipeline, + bind_groups: Vec, + vertex_buffers: Vec, + index_buffers: Vec, + render_target: wgpu::TextureView, + + // Bindless resources + bindless_bind_group: Option, + bindless_pipeline: Option, +} + +impl RenderpassState { + /// Create and prepare all the resources needed for the renderpass benchmark. + fn new() -> Self { + let device_state = DeviceState::new(); + + let supports_bindless = device_state.device.features().contains( + wgpu::Features::TEXTURE_BINDING_ARRAY + | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING, + ) && device_state + .device + .limits() + .max_sampled_textures_per_shader_stage + >= TEXTURE_COUNT as _; + + // Performance gets considerably worse if the resources are shuffled. + // + // This more closely matches the real-world use case where resources have no + // well defined usage order. + let mut random = WyRand::new_seed(0x8BADF00D); + + let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DRAW); + for i in 0..TEXTURES_PER_DRAW { + bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }); + } + + let bind_group_layout = + device_state + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &bind_group_layout_entries, + }); + + let mut texture_views = Vec::with_capacity(TEXTURE_COUNT); + for i in 0..TEXTURE_COUNT { + let texture = device_state + .device + .create_texture(&wgpu::TextureDescriptor { + label: Some(&format!("Texture {i}")), + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8UnormSrgb, + usage: wgpu::TextureUsages::TEXTURE_BINDING, + view_formats: &[], + }); + texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor { + label: Some(&format!("Texture View {i}")), + ..Default::default() + })); + } + random.shuffle(&mut texture_views); + + let texture_view_refs: Vec<_> = texture_views.iter().collect(); + + let mut bind_groups = Vec::with_capacity(DRAW_COUNT); + for draw_idx in 0..DRAW_COUNT { + let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW); + for tex_idx in 0..TEXTURES_PER_DRAW { + entries.push(wgpu::BindGroupEntry { + binding: tex_idx as u32, + resource: wgpu::BindingResource::TextureView( + &texture_views[draw_idx * TEXTURES_PER_DRAW + tex_idx], + ), + }); + } + + bind_groups.push( + device_state + .device + .create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bind_group_layout, + entries: &entries, + }), + ); + } + random.shuffle(&mut bind_groups); + + let sm = device_state + .device + .create_shader_module(wgpu::include_wgsl!("renderpass.wgsl")); + + let pipeline_layout = + device_state + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bind_group_layout], + push_constant_ranges: &[], + }); + + let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT); + for _ in 0..VERTEX_BUFFER_COUNT { + vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 16, + usage: wgpu::BufferUsages::VERTEX, + mapped_at_creation: false, + })); + } + random.shuffle(&mut vertex_buffers); + + let mut index_buffers = Vec::with_capacity(DRAW_COUNT); + for _ in 0..DRAW_COUNT { + index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 3 * 4, + usage: wgpu::BufferUsages::INDEX, + mapped_at_creation: false, + })); + } + random.shuffle(&mut index_buffers); + + let mut vertex_buffer_attributes = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW); + for i in 0..VERTEX_BUFFERS_PER_DRAW { + vertex_buffer_attributes.push(wgpu::vertex_attr_array![i as u32 => Float32x4]); + } + + let mut vertex_buffer_layouts = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW); + for attributes in &vertex_buffer_attributes { + vertex_buffer_layouts.push(wgpu::VertexBufferLayout { + array_stride: 16, + step_mode: wgpu::VertexStepMode::Vertex, + attributes, + }); + } + + let pipeline = + device_state + .device + .create_render_pipeline(&wgpu::RenderPipelineDescriptor { + label: None, + layout: Some(&pipeline_layout), + vertex: wgpu::VertexState { + module: &sm, + entry_point: "vs_main", + buffers: &vertex_buffer_layouts, + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + strip_index_format: None, + front_face: wgpu::FrontFace::Cw, + cull_mode: Some(wgpu::Face::Back), + polygon_mode: wgpu::PolygonMode::Fill, + unclipped_depth: false, + conservative: false, + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + fragment: Some(wgpu::FragmentState { + module: &sm, + entry_point: "fs_main", + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::Rgba8UnormSrgb, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + multiview: None, + }); + + let render_target = device_state + .device + .create_texture(&wgpu::TextureDescriptor { + label: Some("Render Target"), + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8UnormSrgb, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT, + view_formats: &[], + }) + .create_view(&wgpu::TextureViewDescriptor::default()); + + let mut bindless_bind_group = None; + let mut bindless_pipeline = None; + + if supports_bindless { + let bindless_bind_group_layout = + device_state + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + visibility: wgpu::ShaderStages::FRAGMENT, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()), + }], + }); + + bindless_bind_group = Some(device_state.device.create_bind_group( + &wgpu::BindGroupDescriptor { + label: None, + layout: &bindless_bind_group_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureViewArray(&texture_view_refs), + }], + }, + )); + + let bindless_shader_module = device_state + .device + .create_shader_module(wgpu::include_wgsl!("renderpass-bindless.wgsl")); + + let bindless_pipeline_layout = + device_state + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bindless_bind_group_layout], + push_constant_ranges: &[], + }); + + bindless_pipeline = Some(device_state.device.create_render_pipeline( + &wgpu::RenderPipelineDescriptor { + label: None, + layout: Some(&bindless_pipeline_layout), + vertex: wgpu::VertexState { + module: &bindless_shader_module, + entry_point: "vs_main", + buffers: &vertex_buffer_layouts, + compilation_options: wgpu::PipelineCompilationOptions::default(), + }, + primitive: wgpu::PrimitiveState { + topology: wgpu::PrimitiveTopology::TriangleList, + strip_index_format: None, + front_face: wgpu::FrontFace::Cw, + cull_mode: Some(wgpu::Face::Back), + polygon_mode: wgpu::PolygonMode::Fill, + unclipped_depth: false, + conservative: false, + }, + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + fragment: Some(wgpu::FragmentState { + module: &bindless_shader_module, + entry_point: "fs_main", + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::Rgba8UnormSrgb, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + compilation_options: wgpu::PipelineCompilationOptions::default(), + }), + multiview: None, + }, + )); + } + + Self { + device_state, + pipeline, + bind_groups, + vertex_buffers, + index_buffers, + render_target, + + bindless_bind_group, + bindless_pipeline, + } + } + + fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer { + profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}")); + + let draws_per_pass = DRAW_COUNT / total_passes; + + let mut encoder = self + .device_state + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.render_target, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + store: wgpu::StoreOp::Store, + }, + })], + occlusion_query_set: None, + timestamp_writes: None, + depth_stencil_attachment: None, + }); + + let start_idx = pass_number * draws_per_pass; + let end_idx = start_idx + draws_per_pass; + for draw_idx in start_idx..end_idx { + render_pass.set_pipeline(&self.pipeline); + render_pass.set_bind_group(0, &self.bind_groups[draw_idx], &[]); + for i in 0..VERTEX_BUFFERS_PER_DRAW { + render_pass.set_vertex_buffer( + i as u32, + self.vertex_buffers[draw_idx * VERTEX_BUFFERS_PER_DRAW + i].slice(..), + ); + } + render_pass.set_index_buffer( + self.index_buffers[draw_idx].slice(..), + wgpu::IndexFormat::Uint32, + ); + render_pass.draw_indexed(0..3, 0, 0..1); + } + + drop(render_pass); + + encoder.finish() + } + + fn run_bindless_pass(&self) -> wgpu::CommandBuffer { + profiling::scope!("Bindless Renderpass"); + + let mut encoder = self + .device_state + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &self.render_target, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::BLACK), + store: wgpu::StoreOp::Store, + }, + })], + occlusion_query_set: None, + timestamp_writes: None, + depth_stencil_attachment: None, + }); + + render_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap()); + render_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]); + for i in 0..VERTEX_BUFFERS_PER_DRAW { + render_pass.set_vertex_buffer(i as u32, self.vertex_buffers[0].slice(..)); + } + render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32); + + for draw_idx in 0..DRAW_COUNT { + render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1); + } + + drop(render_pass); + + encoder.finish() + } +} + +fn run_bench(ctx: &mut Criterion) { + let state = Lazy::new(RenderpassState::new); + + // Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses + let mut group = ctx.benchmark_group("Renderpass: Single Threaded"); + group.throughput(Throughput::Elements(DRAW_COUNT as _)); + + for time_submit in [false, true] { + for rpasses in [1, 2, 4, 8] { + let draws_per_pass = DRAW_COUNT / rpasses; + + let label = if time_submit { + "Submit Time" + } else { + "Renderpass Time" + }; + + group.bench_function( + &format!("{rpasses} renderpasses x {draws_per_pass} draws ({label})"), + |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + // This benchmark hangs on Apple Paravirtualized GPUs. No idea why. + if state.device_state.adapter_info.name.contains("Paravirtual") { + return Duration::from_secs_f32(1.0); + } + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let mut start = Instant::now(); + + let mut buffers: Vec = Vec::with_capacity(rpasses); + for i in 0..rpasses { + buffers.push(state.run_subpass(i, rpasses)); + } + + if time_submit { + start = Instant::now(); + } else { + duration += start.elapsed(); + } + + state.device_state.queue.submit(buffers); + + if time_submit { + duration += start.elapsed(); + } + + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }, + ); + } + } + group.finish(); + + // Test 10k draw calls split up over 2, 4, and 8 threads. + let mut group = ctx.benchmark_group("Renderpass: Multi Threaded"); + group.throughput(Throughput::Elements(DRAW_COUNT as _)); + + for threads in [2, 4, 8] { + let draws_per_pass = DRAW_COUNT / threads; + group.bench_function( + &format!("{threads} threads x {draws_per_pass} draws"), + |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + // This benchmark hangs on Apple Paravirtualized GPUs. No idea why. + if state.device_state.adapter_info.name.contains("Paravirtual") { + return Duration::from_secs_f32(1.0); + } + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let start = Instant::now(); + + let buffers = (0..threads) + .into_par_iter() + .map(|i| state.run_subpass(i, threads)) + .collect::>(); + + duration += start.elapsed(); + + state.device_state.queue.submit(buffers); + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }, + ); + } + group.finish(); + + // Test 10k draw calls split up over 1, 2, 4, and 8 threads. + let mut group = ctx.benchmark_group("Renderpass: Bindless"); + group.throughput(Throughput::Elements(DRAW_COUNT as _)); + + group.bench_function(&format!("{DRAW_COUNT} draws"), |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + // Need bindless to run this benchmark + if state.bindless_bind_group.is_none() { + return Duration::from_secs_f32(1.0); + } + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + let start = Instant::now(); + + let buffer = state.run_bindless_pass(); + + duration += start.elapsed(); + + state.device_state.queue.submit([buffer]); + state.device_state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }); + group.finish(); + + ctx.bench_function( + &format!( + "Renderpass: Empty Submit with {} Resources", + TEXTURE_COUNT + VERTEX_BUFFER_COUNT + ), + |b| { + Lazy::force(&state); + + b.iter(|| state.device_state.queue.submit([])); + }, + ); +} + +criterion_group! { + name = renderpass; + config = Criterion::default().measurement_time(Duration::from_secs(10)); + targets = run_bench, +} diff --git a/benches/benches/renderpass.wgsl b/benches/benches/renderpass.wgsl new file mode 100644 index 000000000..948fd6e2f --- /dev/null +++ b/benches/benches/renderpass.wgsl @@ -0,0 +1,36 @@ +@group(0) @binding(0) +var tex_1: texture_2d; + +@group(0) @binding(1) +var tex_2: texture_2d; + +@group(0) @binding(2) +var tex_3: texture_2d; + +@group(0) @binding(3) +var tex_4: texture_2d; + +@group(0) @binding(4) +var tex_5: texture_2d; + +@group(0) @binding(5) +var tex_6: texture_2d; + +@group(0) @binding(6) +var tex_7: texture_2d; + +@vertex +fn vs_main() -> @builtin(position) vec4f { + return vec4f(0.0, 0.0, 0.0, 1.0); +} + +@fragment +fn fs_main() -> @location(0) vec4f { + return textureLoad(tex_1, vec2u(0), 0) + + textureLoad(tex_2, vec2u(0), 0) + + textureLoad(tex_3, vec2u(0), 0) + + textureLoad(tex_4, vec2u(0), 0) + + textureLoad(tex_5, vec2u(0), 0) + + textureLoad(tex_6, vec2u(0), 0) + + textureLoad(tex_7, vec2u(0), 0); +} diff --git a/benches/benches/resource_creation.rs b/benches/benches/resource_creation.rs new file mode 100644 index 000000000..c23f132bb --- /dev/null +++ b/benches/benches/resource_creation.rs @@ -0,0 +1,71 @@ +use std::time::{Duration, Instant}; + +use criterion::{criterion_group, Criterion, Throughput}; +use once_cell::sync::Lazy; +use rayon::iter::{IntoParallelIterator, ParallelIterator}; + +use crate::DeviceState; + +fn run_bench(ctx: &mut Criterion) { + let state = Lazy::new(DeviceState::new); + + const RESOURCES_TO_CREATE: usize = 8; + + let mut group = ctx.benchmark_group("Resource Creation: Large Buffer"); + group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _)); + + for threads in [1, 2, 4, 8] { + let resources_per_thread = RESOURCES_TO_CREATE / threads; + group.bench_function( + &format!("{threads} threads x {resources_per_thread} resource"), + |b| { + Lazy::force(&state); + + b.iter_custom(|iters| { + profiling::scope!("benchmark invocation"); + + let mut duration = Duration::ZERO; + + for _ in 0..iters { + profiling::scope!("benchmark iteration"); + + // We can't create too many resources at once, so we do it 8 resources at a time. + let start = Instant::now(); + + let buffers = (0..threads) + .into_par_iter() + .map(|_| { + (0..resources_per_thread) + .map(|_| { + state.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: 256 * 1024 * 1024, + usage: wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }) + }) + .collect::>() + }) + .collect::>(); + + duration += start.elapsed(); + + drop(buffers); + + state.queue.submit([]); + state.device.poll(wgpu::Maintain::Wait); + } + + duration + }) + }, + ); + } + group.finish(); +} + +criterion_group! { + name = resource_creation; + config = Criterion::default().measurement_time(Duration::from_secs(10)); + targets = run_bench, +} diff --git a/benches/benches/root.rs b/benches/benches/root.rs new file mode 100644 index 000000000..98563f839 --- /dev/null +++ b/benches/benches/root.rs @@ -0,0 +1,65 @@ +use criterion::criterion_main; +use pollster::block_on; + +mod renderpass; +mod resource_creation; +mod shader; + +struct DeviceState { + adapter_info: wgpu::AdapterInfo, + device: wgpu::Device, + queue: wgpu::Queue, +} + +impl DeviceState { + fn new() -> Self { + #[cfg(feature = "tracy")] + tracy_client::Client::start(); + + let base_backend = if cfg!(target_os = "macos") { + // We don't want to use Molten-VK on Mac. + wgpu::Backends::METAL + } else { + wgpu::Backends::all() + }; + + let instance = wgpu::Instance::new(wgpu::InstanceDescriptor { + backends: wgpu::util::backend_bits_from_env().unwrap_or(base_backend), + flags: wgpu::InstanceFlags::empty(), + dx12_shader_compiler: wgpu::util::dx12_shader_compiler_from_env() + .unwrap_or(wgpu::Dx12Compiler::Fxc), + gles_minor_version: wgpu::Gles3MinorVersion::Automatic, + }); + + let adapter = block_on(wgpu::util::initialize_adapter_from_env_or_default( + &instance, None, + )) + .unwrap(); + + let adapter_info = adapter.get_info(); + + eprintln!("{:?}", adapter_info); + + let (device, queue) = block_on(adapter.request_device( + &wgpu::DeviceDescriptor { + required_features: adapter.features(), + required_limits: adapter.limits(), + label: Some("RenderPass Device"), + }, + None, + )) + .unwrap(); + + Self { + adapter_info, + device, + queue, + } + } +} + +criterion_main!( + renderpass::renderpass, + resource_creation::resource_creation, + shader::shader +); diff --git a/benches/benches/shader.rs b/benches/benches/shader.rs new file mode 100644 index 000000000..6d20b6029 --- /dev/null +++ b/benches/benches/shader.rs @@ -0,0 +1,355 @@ +use criterion::*; +use std::{fs, path::PathBuf}; + +struct Input { + filename: String, + size: u64, + data: Vec, + string: Option, + module: Option, + module_info: Option, +} + +struct Inputs { + inner: Vec, +} + +impl Inputs { + fn from_dir(folder: &str, extension: &str) -> Self { + let mut inputs = Vec::new(); + let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join(folder) + .read_dir() + .unwrap(); + + for file_entry in read_dir { + match file_entry { + Ok(entry) => match entry.path().extension() { + Some(ostr) if ostr == extension => { + let path = entry.path(); + + inputs.push(Input { + filename: path.to_string_lossy().into_owned(), + size: entry.metadata().unwrap().len(), + string: None, + data: vec![], + module: None, + module_info: None, + }); + } + _ => continue, + }, + Err(e) => { + eprintln!("Skipping file: {:?}", e); + continue; + } + } + } + + Self { inner: inputs } + } + + fn bytes(&self) -> u64 { + self.inner.iter().map(|input| input.size).sum() + } + + fn load(&mut self) { + for input in &mut self.inner { + if !input.data.is_empty() { + continue; + } + + input.data = fs::read(&input.filename).unwrap_or_default(); + } + } + + fn load_utf8(&mut self) { + self.load(); + + for input in &mut self.inner { + if input.string.is_some() { + continue; + } + + input.string = Some(std::str::from_utf8(&input.data).unwrap().to_string()); + } + } + + fn parse(&mut self) { + self.load_utf8(); + + let mut parser = naga::front::wgsl::Frontend::new(); + for input in &mut self.inner { + if input.module.is_some() { + continue; + } + + input.module = Some(parser.parse(input.string.as_ref().unwrap()).unwrap()); + } + } + + fn validate(&mut self) { + self.parse(); + + let mut validator = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + // Note, this is empty, to let all backends work. + naga::valid::Capabilities::empty(), + ); + + for input in &mut self.inner { + if input.module_info.is_some() { + continue; + } + + input.module_info = validator.validate(input.module.as_ref().unwrap()).ok(); + } + + self.inner.retain(|input| input.module_info.is_some()); + } +} + +fn parse_glsl(stage: naga::ShaderStage, inputs: &Inputs) { + let mut parser = naga::front::glsl::Frontend::default(); + let options = naga::front::glsl::Options { + stage, + defines: Default::default(), + }; + for input in &inputs.inner { + parser + .parse(&options, input.string.as_deref().unwrap()) + .unwrap(); + } +} + +fn frontends(c: &mut Criterion) { + let mut group = c.benchmark_group("front"); + + let mut inputs_wgsl = Inputs::from_dir("../naga/tests/in", "wgsl"); + group.throughput(Throughput::Bytes(inputs_wgsl.bytes())); + group.bench_function("shader: naga module bincode decode", |b| { + inputs_wgsl.parse(); + + let inputs_bin = inputs_wgsl + .inner + .iter() + .map(|input| bincode::serialize(&input.module.as_ref().unwrap()).unwrap()) + .collect::>(); + + b.iter(move || { + for input in inputs_bin.iter() { + bincode::deserialize::(input).unwrap(); + } + }); + }); + + group.bench_function("shader: wgsl-in", |b| { + inputs_wgsl.load_utf8(); + + let mut frontend = naga::front::wgsl::Frontend::new(); + b.iter(|| { + for input in &inputs_wgsl.inner { + frontend.parse(input.string.as_ref().unwrap()).unwrap(); + } + }); + }); + + let mut inputs_spirv = Inputs::from_dir("../naga/tests/in/spv", "spv"); + group.throughput(Throughput::Bytes(inputs_spirv.bytes())); + group.bench_function("shader: spv-in", |b| { + inputs_spirv.load(); + + b.iter(|| { + let options = naga::front::spv::Options::default(); + for input in &inputs_spirv.inner { + let spv = bytemuck::cast_slice(&input.data); + let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options); + parser.parse().unwrap(); + } + }); + }); + + let mut inputs_vertex = Inputs::from_dir("../naga/tests/in/glsl", "vert"); + let mut inputs_fragment = Inputs::from_dir("../naga/tests/in/glsl", "frag"); + // let mut inputs_compute = Inputs::from_dir("../naga/tests/in/glsl", "comp"); + group.throughput(Throughput::Bytes( + inputs_vertex.bytes() + inputs_fragment.bytes(), // + inputs_compute.bytes() + )); + group.bench_function("shader: glsl-in", |b| { + inputs_vertex.load(); + inputs_vertex.load_utf8(); + inputs_fragment.load_utf8(); + // inputs_compute.load_utf8(); + + b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_vertex)); + b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_fragment)); + // TODO: This one hangs for some reason + // b.iter(move || parse_glsl(naga::ShaderStage::Compute, &inputs_compute)); + }); +} + +fn validation(c: &mut Criterion) { + let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl"); + + let mut group = c.benchmark_group("validate"); + group.throughput(Throughput::Bytes(inputs.bytes())); + group.bench_function("shader: validation", |b| { + inputs.load(); + inputs.load_utf8(); + inputs.parse(); + + let mut validator = naga::valid::Validator::new( + naga::valid::ValidationFlags::all(), + naga::valid::Capabilities::all(), + ); + validator + .subgroup_stages(naga::valid::ShaderStages::all()) + .subgroup_operations(naga::valid::SubgroupOperationSet::all()); + b.iter(|| { + for input in &inputs.inner { + validator.validate(input.module.as_ref().unwrap()).unwrap(); + } + }); + }); + group.finish(); +} + +fn backends(c: &mut Criterion) { + let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl"); + + let mut group = c.benchmark_group("back"); + // While normally this would be done inside the bench_function callback, we need to + // run this to properly know the size of the inputs, as any that fail validation + // will be removed. + inputs.validate(); + + group.throughput(Throughput::Bytes(inputs.bytes())); + group.bench_function("shader: wgsl-out", |b| { + b.iter(|| { + let mut string = String::new(); + let flags = naga::back::wgsl::WriterFlags::empty(); + for input in &inputs.inner { + let mut writer = naga::back::wgsl::Writer::new(&mut string, flags); + let _ = writer.write( + input.module.as_ref().unwrap(), + input.module_info.as_ref().unwrap(), + ); + string.clear(); + } + }); + }); + + group.bench_function("shader: spv-out", |b| { + b.iter(|| { + let mut data = Vec::new(); + let options = naga::back::spv::Options::default(); + for input in &inputs.inner { + let mut writer = naga::back::spv::Writer::new(&options).unwrap(); + let _ = writer.write( + input.module.as_ref().unwrap(), + input.module_info.as_ref().unwrap(), + None, + &None, + &mut data, + ); + data.clear(); + } + }); + }); + group.bench_function("shader: spv-out multiple entrypoints", |b| { + b.iter(|| { + let mut data = Vec::new(); + let options = naga::back::spv::Options::default(); + for input in &inputs.inner { + let mut writer = naga::back::spv::Writer::new(&options).unwrap(); + let module = input.module.as_ref().unwrap(); + for ep in module.entry_points.iter() { + let pipeline_options = naga::back::spv::PipelineOptions { + shader_stage: ep.stage, + entry_point: ep.name.clone(), + }; + let _ = writer.write( + input.module.as_ref().unwrap(), + input.module_info.as_ref().unwrap(), + Some(&pipeline_options), + &None, + &mut data, + ); + data.clear(); + } + } + }); + }); + + group.bench_function("shader: msl-out", |b| { + b.iter(|| { + let mut string = String::new(); + let options = naga::back::msl::Options::default(); + for input in &inputs.inner { + let pipeline_options = naga::back::msl::PipelineOptions::default(); + let mut writer = naga::back::msl::Writer::new(&mut string); + let _ = writer.write( + input.module.as_ref().unwrap(), + input.module_info.as_ref().unwrap(), + &options, + &pipeline_options, + ); + string.clear(); + } + }); + }); + + group.bench_function("shader: hlsl-out", |b| { + b.iter(|| { + let options = naga::back::hlsl::Options::default(); + let mut string = String::new(); + for input in &inputs.inner { + let mut writer = naga::back::hlsl::Writer::new(&mut string, &options); + let _ = writer.write( + input.module.as_ref().unwrap(), + input.module_info.as_ref().unwrap(), + ); // may fail on unimplemented things + string.clear(); + } + }); + }); + + group.bench_function("shader: glsl-out multiple entrypoints", |b| { + b.iter(|| { + let mut string = String::new(); + let options = naga::back::glsl::Options { + version: naga::back::glsl::Version::new_gles(320), + writer_flags: naga::back::glsl::WriterFlags::empty(), + binding_map: Default::default(), + zero_initialize_workgroup_memory: true, + }; + for input in &inputs.inner { + let module = input.module.as_ref().unwrap(); + let info = input.module_info.as_ref().unwrap(); + for ep in module.entry_points.iter() { + let pipeline_options = naga::back::glsl::PipelineOptions { + shader_stage: ep.stage, + entry_point: ep.name.clone(), + multiview: None, + }; + + // might be `Err` if missing features + if let Ok(mut writer) = naga::back::glsl::Writer::new( + &mut string, + module, + info, + &options, + &pipeline_options, + naga::proc::BoundsCheckPolicies::default(), + ) { + let _ = writer.write(); // might be `Err` if unsupported + } + + string.clear(); + } + } + }); + }); +} + +criterion_group!(shader, frontends, validation, backends); diff --git a/naga/Cargo.toml b/naga/Cargo.toml index 3041a6009..22e172d47 100644 --- a/naga/Cargo.toml +++ b/naga/Cargo.toml @@ -35,10 +35,6 @@ wgsl-out = [] hlsl-out = [] compact = [] -[[bench]] -name = "criterion" -harness = false - [dependencies] arbitrary = { version = "1.3", features = ["derive"], optional = true } bitflags = "2.5" @@ -60,11 +56,7 @@ hexf-parse = { version = "0.2.1", optional = true } unicode-xid = { version = "0.2.3", optional = true } arrayvec.workspace = true -[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies] -criterion = { version = "0.5", features = [] } - [dev-dependencies] -bincode = "1" diff = "0.1" env_logger = "0.11" # This _cannot_ have a version specified. If it does, crates.io will look diff --git a/naga/benches/criterion.rs b/naga/benches/criterion.rs deleted file mode 100644 index e57c58a84..000000000 --- a/naga/benches/criterion.rs +++ /dev/null @@ -1,273 +0,0 @@ -#![cfg(not(target_arch = "wasm32"))] -#![allow(clippy::needless_borrowed_reference)] - -use criterion::*; -use std::{fs, path::PathBuf, slice}; - -fn gather_inputs(folder: &str, extension: &str) -> Vec> { - let mut list = Vec::new(); - let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .join(folder) - .read_dir() - .unwrap(); - for file_entry in read_dir { - match file_entry { - Ok(entry) => match entry.path().extension() { - Some(ostr) if ostr == extension => { - let input = fs::read(entry.path()).unwrap_or_default(); - list.push(input.into_boxed_slice()); - } - _ => continue, - }, - Err(e) => { - log::warn!("Skipping file: {:?}", e); - continue; - } - } - } - list -} - -fn parse_glsl(stage: naga::ShaderStage, inputs: &[Box<[u8]>]) { - let mut parser = naga::front::glsl::Frontend::default(); - let options = naga::front::glsl::Options { - stage, - defines: Default::default(), - }; - for input in inputs.iter() { - let string = std::str::from_utf8(input).unwrap(); - parser.parse(&options, string).unwrap(); - } -} - -fn frontends(c: &mut Criterion) { - let mut group = c.benchmark_group("front"); - #[cfg(all(feature = "wgsl-in", feature = "serialize", feature = "deserialize"))] - group.bench_function("bin", |b| { - let inputs_wgsl = gather_inputs("tests/in", "wgsl"); - let mut frontend = naga::front::wgsl::Frontend::new(); - let inputs_bin = inputs_wgsl - .iter() - .map(|input| { - let string = std::str::from_utf8(input).unwrap(); - let module = frontend.parse(string).unwrap(); - bincode::serialize(&module).unwrap() - }) - .collect::>(); - b.iter(move || { - for input in inputs_bin.iter() { - bincode::deserialize::(input).unwrap(); - } - }); - }); - #[cfg(feature = "wgsl-in")] - group.bench_function("wgsl", |b| { - let inputs_wgsl = gather_inputs("tests/in", "wgsl"); - let inputs = inputs_wgsl - .iter() - .map(|input| std::str::from_utf8(input).unwrap()) - .collect::>(); - let mut frontend = naga::front::wgsl::Frontend::new(); - b.iter(move || { - for &input in inputs.iter() { - frontend.parse(input).unwrap(); - } - }); - }); - #[cfg(feature = "spv-in")] - group.bench_function("spv", |b| { - let inputs = gather_inputs("tests/in/spv", "spv"); - b.iter(move || { - let options = naga::front::spv::Options::default(); - for input in inputs.iter() { - let spv = - unsafe { slice::from_raw_parts(input.as_ptr() as *const u32, input.len() / 4) }; - let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options); - parser.parse().unwrap(); - } - }); - }); - #[cfg(feature = "glsl-in")] - group.bench_function("glsl", |b| { - let vert = gather_inputs("tests/in/glsl", "vert"); - b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &vert)); - let frag = gather_inputs("tests/in/glsl", "frag"); - b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &frag)); - //TODO: hangs for some reason! - //let comp = gather_inputs("tests/in/glsl", "comp"); - //b.iter(move || parse_glsl(naga::ShaderStage::Compute, &comp)); - }); -} - -#[cfg(feature = "wgsl-in")] -fn gather_modules() -> Vec { - let inputs = gather_inputs("tests/in", "wgsl"); - let mut frontend = naga::front::wgsl::Frontend::new(); - inputs - .iter() - .map(|input| { - let string = std::str::from_utf8(input).unwrap(); - frontend.parse(string).unwrap() - }) - .collect() -} -#[cfg(not(feature = "wgsl-in"))] -fn gather_modules() -> Vec { - Vec::new() -} - -fn validation(c: &mut Criterion) { - let inputs = gather_modules(); - let mut group = c.benchmark_group("valid"); - group.bench_function("safe", |b| { - let mut validator = naga::valid::Validator::new( - naga::valid::ValidationFlags::all(), - naga::valid::Capabilities::all(), - ); - b.iter(|| { - for input in inputs.iter() { - validator.validate(input).unwrap(); - } - }); - }); - group.bench_function("unsafe", |b| { - let mut validator = naga::valid::Validator::new( - naga::valid::ValidationFlags::empty(), - naga::valid::Capabilities::all(), - ); - b.iter(|| { - for input in inputs.iter() { - validator.validate(input).unwrap(); - } - }); - }); -} - -fn backends(c: &mut Criterion) { - let inputs = { - let mut validator = naga::valid::Validator::new( - naga::valid::ValidationFlags::empty(), - naga::valid::Capabilities::default(), - ); - let input_modules = gather_modules(); - input_modules - .into_iter() - .flat_map(|module| validator.validate(&module).ok().map(|info| (module, info))) - .collect::>() - }; - - let mut group = c.benchmark_group("back"); - #[cfg(feature = "wgsl-out")] - group.bench_function("wgsl", |b| { - b.iter(|| { - let mut string = String::new(); - let flags = naga::back::wgsl::WriterFlags::empty(); - for &(ref module, ref info) in inputs.iter() { - let mut writer = naga::back::wgsl::Writer::new(&mut string, flags); - writer.write(module, info).unwrap(); - string.clear(); - } - }); - }); - - #[cfg(feature = "spv-out")] - group.bench_function("spv", |b| { - b.iter(|| { - let mut data = Vec::new(); - let options = naga::back::spv::Options::default(); - for &(ref module, ref info) in inputs.iter() { - let mut writer = naga::back::spv::Writer::new(&options).unwrap(); - writer.write(module, info, None, &None, &mut data).unwrap(); - data.clear(); - } - }); - }); - #[cfg(feature = "spv-out")] - group.bench_function("spv-separate", |b| { - b.iter(|| { - let mut data = Vec::new(); - let options = naga::back::spv::Options::default(); - for &(ref module, ref info) in inputs.iter() { - let mut writer = naga::back::spv::Writer::new(&options).unwrap(); - for ep in module.entry_points.iter() { - let pipeline_options = naga::back::spv::PipelineOptions { - shader_stage: ep.stage, - entry_point: ep.name.clone(), - }; - writer - .write(module, info, Some(&pipeline_options), &None, &mut data) - .unwrap(); - data.clear(); - } - } - }); - }); - - #[cfg(feature = "msl-out")] - group.bench_function("msl", |b| { - b.iter(|| { - let mut string = String::new(); - let options = naga::back::msl::Options::default(); - for &(ref module, ref info) in inputs.iter() { - let pipeline_options = naga::back::msl::PipelineOptions::default(); - let mut writer = naga::back::msl::Writer::new(&mut string); - writer - .write(module, info, &options, &pipeline_options) - .unwrap(); - string.clear(); - } - }); - }); - - #[cfg(feature = "hlsl-out")] - group.bench_function("hlsl", |b| { - b.iter(|| { - let options = naga::back::hlsl::Options::default(); - let mut string = String::new(); - for &(ref module, ref info) in inputs.iter() { - let mut writer = naga::back::hlsl::Writer::new(&mut string, &options); - let _ = writer.write(module, info); // may fail on unimplemented things - string.clear(); - } - }); - }); - - #[cfg(feature = "glsl-out")] - group.bench_function("glsl-separate", |b| { - b.iter(|| { - let mut string = String::new(); - let options = naga::back::glsl::Options { - version: naga::back::glsl::Version::new_gles(320), - writer_flags: naga::back::glsl::WriterFlags::empty(), - binding_map: Default::default(), - zero_initialize_workgroup_memory: true, - }; - for &(ref module, ref info) in inputs.iter() { - for ep in module.entry_points.iter() { - let pipeline_options = naga::back::glsl::PipelineOptions { - shader_stage: ep.stage, - entry_point: ep.name.clone(), - multiview: None, - }; - - // might be `Err` if missing features - if let Ok(mut writer) = naga::back::glsl::Writer::new( - &mut string, - module, - info, - &options, - &pipeline_options, - naga::proc::BoundsCheckPolicies::default(), - ) { - let _ = writer.write(); // might be `Err` if unsupported - } - - string.clear(); - } - } - }); - }); -} - -criterion_group!(criterion, frontends, validation, backends,); -criterion_main!(criterion); diff --git a/naga/fuzz/Cargo.toml b/naga/fuzz/Cargo.toml index 3e46af0c5..196919e44 100644 --- a/naga/fuzz/Cargo.toml +++ b/naga/fuzz/Cargo.toml @@ -21,23 +21,27 @@ features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"] [[bin]] name = "spv_parser" path = "fuzz_targets/spv_parser.rs" +bench = false test = false doc = false [[bin]] name = "wgsl_parser" path = "fuzz_targets/wgsl_parser.rs" +bench = false test = false doc = false [[bin]] name = "glsl_parser" path = "fuzz_targets/glsl_parser.rs" +bench = false test = false doc = false [[bin]] name = "ir" path = "fuzz_targets/ir.rs" +bench = false test = false doc = false diff --git a/naga/src/back/hlsl/help.rs b/naga/src/back/hlsl/help.rs index d3bb1ce7f..e6b0b3d61 100644 --- a/naga/src/back/hlsl/help.rs +++ b/naga/src/back/hlsl/help.rs @@ -1044,7 +1044,12 @@ impl<'a, W: Write> super::Writer<'a, W> { crate::Expression::GlobalVariable(var_handle) => { &module.global_variables[var_handle] } - ref other => unreachable!("Array length of base {:?}", other), + ref other => { + return Err(super::Error::Unimplemented(format!( + "Array length of base {:?}", + other + ))) + } }; let storage_access = match global_var.space { crate::AddressSpace::Storage { access } => access, diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs index 54bdedb79..338cdf8f2 100644 --- a/wgpu-core/src/command/memory_init.rs +++ b/wgpu-core/src/command/memory_init.rs @@ -172,6 +172,8 @@ impl BakedCommands { device_tracker: &mut Tracker, snatch_guard: &SnatchGuard<'_>, ) -> Result<(), DestroyedBufferError> { + profiling::scope!("initialize_buffer_memory"); + // Gather init ranges for each buffer so we can collapse them. // It is not possible to do this at an earlier point since previously // executed command buffer change the resource init state. @@ -276,6 +278,8 @@ impl BakedCommands { device: &Device, snatch_guard: &SnatchGuard<'_>, ) -> Result<(), DestroyedTextureError> { + profiling::scope!("initialize_texture_memory"); + let mut ranges: Vec = Vec::new(); for texture_use in self.texture_memory_actions.drain_init_actions() { let mut initialization_status = texture_use.texture.initialization_status.write(); diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index e9da11b7a..854ebfd76 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -32,7 +32,9 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES; // value is enough for a 16k texture with float4 format. pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10; -const CLEANUP_WAIT_MS: u32 = 5000; +// If a submission is not completed within this time, we go off into UB land. +// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this. +const CLEANUP_WAIT_MS: u32 = 60000; const IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL: &str = "Implicit BindGroupLayout in the Error State"; const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid"; diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index f7beff894..168b36843 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -1186,6 +1186,8 @@ impl Global { // finish all the command buffers first for &cmb_id in command_buffer_ids { + profiling::scope!("process command buffer"); + // we reset the used surface textures every time we use // it, so make sure to set_size on it. used_surface_textures.set_size(device.tracker_indices.textures.size()); @@ -1222,59 +1224,73 @@ impl Global { continue; } - // optimize the tracked states - // cmdbuf.trackers.optimize(); { + profiling::scope!("update submission ids"); + let cmd_buf_data = cmdbuf.data.lock(); let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers; // update submission IDs - for buffer in cmd_buf_trackers.buffers.used_resources() { - if buffer.raw.get(&snatch_guard).is_none() { - return Err(QueueSubmitError::DestroyedBuffer( - buffer.info.id(), - )); - } - buffer.info.use_at(submit_index); - - match *buffer.map_state.lock() { - BufferMapState::Idle => (), - _ => { - return Err(QueueSubmitError::BufferStillMapped( + { + profiling::scope!("buffers"); + for buffer in cmd_buf_trackers.buffers.used_resources() { + if buffer.raw.get(&snatch_guard).is_none() { + return Err(QueueSubmitError::DestroyedBuffer( buffer.info.id(), - )) - } - } - } - for texture in cmd_buf_trackers.textures.used_resources() { - let should_extend = match texture.inner.get(&snatch_guard) { - None => { - return Err(QueueSubmitError::DestroyedTexture( - texture.info.id(), )); } - Some(TextureInner::Native { .. }) => false, - Some(TextureInner::Surface { ref raw, .. }) => { - if raw.is_some() { - submit_surface_textures_owned.push(texture.clone()); - } + buffer.info.use_at(submit_index); - true + match *buffer.map_state.lock() { + BufferMapState::Idle => (), + _ => { + return Err(QueueSubmitError::BufferStillMapped( + buffer.info.id(), + )) + } } - }; - texture.info.use_at(submit_index); - if should_extend { - unsafe { - used_surface_textures - .merge_single(&texture, None, hal::TextureUses::PRESENT) - .unwrap(); - }; } } - for texture_view in cmd_buf_trackers.views.used_resources() { - texture_view.info.use_at(submit_index); + { + profiling::scope!("textures"); + for texture in cmd_buf_trackers.textures.used_resources() { + let should_extend = match texture.inner.get(&snatch_guard) { + None => { + return Err(QueueSubmitError::DestroyedTexture( + texture.info.id(), + )); + } + Some(TextureInner::Native { .. }) => false, + Some(TextureInner::Surface { ref raw, .. }) => { + if raw.is_some() { + submit_surface_textures_owned.push(texture.clone()); + } + + true + } + }; + texture.info.use_at(submit_index); + if should_extend { + unsafe { + used_surface_textures + .merge_single( + &texture, + None, + hal::TextureUses::PRESENT, + ) + .unwrap(); + }; + } + } } { + profiling::scope!("views"); + for texture_view in cmd_buf_trackers.views.used_resources() { + texture_view.info.use_at(submit_index); + } + } + { + profiling::scope!("bind groups (+ referenced views/samplers)"); for bg in cmd_buf_trackers.bind_groups.used_resources() { bg.info.use_at(submit_index); // We need to update the submission indices for the contained @@ -1288,36 +1304,51 @@ impl Global { } } } - // assert!(cmd_buf_trackers.samplers.is_empty()); - for compute_pipeline in - cmd_buf_trackers.compute_pipelines.used_resources() { - compute_pipeline.info.use_at(submit_index); + profiling::scope!("compute pipelines"); + for compute_pipeline in + cmd_buf_trackers.compute_pipelines.used_resources() + { + compute_pipeline.info.use_at(submit_index); + } } - for render_pipeline in - cmd_buf_trackers.render_pipelines.used_resources() { - render_pipeline.info.use_at(submit_index); - } - for query_set in cmd_buf_trackers.query_sets.used_resources() { - query_set.info.use_at(submit_index); - } - for bundle in cmd_buf_trackers.bundles.used_resources() { - bundle.info.use_at(submit_index); - // We need to update the submission indices for the contained - // state-less (!) resources as well, excluding the bind groups. - // They don't get deleted too early if the bundle goes out of scope. + profiling::scope!("render pipelines"); for render_pipeline in - bundle.used.render_pipelines.read().used_resources() + cmd_buf_trackers.render_pipelines.used_resources() { render_pipeline.info.use_at(submit_index); } - for query_set in bundle.used.query_sets.read().used_resources() { + } + { + profiling::scope!("query sets"); + for query_set in cmd_buf_trackers.query_sets.used_resources() { query_set.info.use_at(submit_index); } } + { + profiling::scope!( + "render bundles (+ referenced pipelines/query sets)" + ); + for bundle in cmd_buf_trackers.bundles.used_resources() { + bundle.info.use_at(submit_index); + // We need to update the submission indices for the contained + // state-less (!) resources as well, excluding the bind groups. + // They don't get deleted too early if the bundle goes out of scope. + for render_pipeline in + bundle.used.render_pipelines.read().used_resources() + { + render_pipeline.info.use_at(submit_index); + } + for query_set in bundle.used.query_sets.read().used_resources() + { + query_set.info.use_at(submit_index); + } + } + } } let mut baked = cmdbuf.from_arc_into_baked(); + // execute resource transitions unsafe { baked @@ -1385,6 +1416,13 @@ impl Global { raw: baked.encoder, cmd_buffers: baked.list, }); + + { + // This involves actually decrementing the ref count of all command buffer + // resources, so can be _very_ expensive. + profiling::scope!("drop command buffer trackers"); + drop(baked.trackers); + } } log::trace!("Device after submission {}", submit_index); diff --git a/wgpu/Cargo.toml b/wgpu/Cargo.toml index 9d52f54d0..81927f0a6 100644 --- a/wgpu/Cargo.toml +++ b/wgpu/Cargo.toml @@ -84,9 +84,6 @@ naga-ir = ["dep:naga"] ## to the validation carried out at public APIs in all builds. strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"] -## Log all API entry points at info instead of trace level. -api_log_info = ["wgc/api_log_info"] - ## Enables serialization via `serde` on common wgpu types. serde = ["dep:serde", "wgc/serde"] diff --git a/xtask/src/main.rs b/xtask/src/main.rs index 3f6eb622b..f173fe969 100644 --- a/xtask/src/main.rs +++ b/xtask/src/main.rs @@ -13,11 +13,21 @@ Usage: xtask Commands: run-wasm + Build and run web examples + --release Build in release mode --no-serve Just build the generated files, don't serve them + test + Run tests + --llvm-cov Run tests with LLVM code coverage using the llvm-cov tool + --list List all of the tests and their executables without running them + --retries Number of times to retry failing tests + vendor-web-sys + Re-vendor the WebGPU web-sys bindings. + --no-cleanup Don't clean up temporary checkout of wasm-bindgen One of: --path-to-checkout Path to a local checkout of wasm-bindgen to generate bindings from. diff --git a/xtask/src/run_wasm.rs b/xtask/src/run_wasm.rs index 33351e670..e575b0578 100644 --- a/xtask/src/run_wasm.rs +++ b/xtask/src/run_wasm.rs @@ -5,7 +5,7 @@ use xshell::Shell; use crate::util::{check_all_programs, Program}; -pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> Result<(), anyhow::Error> { +pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> { let no_serve = args.contains("--no-serve"); let release = args.contains("--release"); diff --git a/xtask/src/test.rs b/xtask/src/test.rs index 70278df47..c5b378da1 100644 --- a/xtask/src/test.rs +++ b/xtask/src/test.rs @@ -4,6 +4,12 @@ use xshell::Shell; pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> { let llvm_cov = args.contains("--llvm-cov"); + let list = args.contains("--list"); + let retries = args + .opt_value_from_str("--retries")? + .unwrap_or(0_u32) + .to_string(); + // These needs to match the command in "run wgpu-info" in `.github/workflows/ci.yml` let llvm_cov_flags: &[_] = if llvm_cov { &["llvm-cov", "--no-cfg-coverage", "--no-report"] @@ -13,18 +19,30 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> { let llvm_cov_nextest_flags: &[_] = if llvm_cov { &["llvm-cov", "--no-cfg-coverage", "--no-report", "nextest"] } else { - &["nextest", "run"] + if list { + &["nextest", "list"] + } else { + &["nextest", "run"] + } }; log::info!("Generating .gpuconfig file based on gpus on the system"); - xshell::cmd!( - shell, - "cargo {llvm_cov_flags...} run --bin wgpu-info -- --json -o .gpuconfig" - ) - .quiet() - .run() - .context("Failed to run wgpu-info to generate .gpuconfig")?; + shell + .cmd("cargo") + .args(llvm_cov_flags) + .args([ + "run", + "--bin", + "wgpu-info", + "--", + "--json", + "-o", + ".gpuconfig", + ]) + .quiet() + .run() + .context("Failed to run wgpu-info to generate .gpuconfig")?; let gpu_count = shell .read_file(".gpuconfig") @@ -39,16 +57,34 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> { if gpu_count == 1 { "" } else { "s" } ); + if list { + log::info!("Listing tests"); + shell + .cmd("cargo") + .args(llvm_cov_nextest_flags) + .args(["-v", "--benches", "--tests", "--all-features"]) + .args(args.finish()) + .run() + .context("Failed to list tests")?; + return Ok(()); + } log::info!("Running cargo tests"); - xshell::cmd!( - shell, - "cargo {llvm_cov_nextest_flags...} --all-features --no-fail-fast --retries 2" - ) - .args(args.finish()) - .quiet() - .run() - .context("Tests failed")?; + shell + .cmd("cargo") + .args(llvm_cov_nextest_flags) + .args([ + "--benches", + "--tests", + "--no-fail-fast", + "--all-features", + "--retries", + &retries, + ]) + .args(args.finish()) + .quiet() + .run() + .context("Tests failed")?; log::info!("Finished tests"); diff --git a/xtask/src/util.rs b/xtask/src/util.rs index 85f4444c4..186426971 100644 --- a/xtask/src/util.rs +++ b/xtask/src/util.rs @@ -1,15 +1,15 @@ use std::{io, process::Command}; pub(crate) struct Program { - pub binary_name: &'static str, pub crate_name: &'static str, + pub binary_name: &'static str, } pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> { - let mut failed = Vec::new(); - for Program { - binary_name, + let mut failed_crates = Vec::new(); + for &Program { crate_name, + binary_name, } in programs { let mut cmd = Command::new(binary_name); @@ -21,7 +21,7 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> { } Err(e) if matches!(e.kind(), io::ErrorKind::NotFound) => { log::error!("Checking for {binary_name} in PATH: ❌"); - failed.push(*crate_name); + failed_crates.push(crate_name); } Err(e) => { log::error!("Checking for {binary_name} in PATH: ❌"); @@ -30,12 +30,13 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> { } } - if !failed.is_empty() { + if !failed_crates.is_empty() { log::error!( "Please install them with: cargo install {}", - failed.join(" ") + failed_crates.join(" ") ); - anyhow::bail!("Missing programs in PATH"); + + anyhow::bail!("Missing required programs"); } Ok(())