mirror of
https://github.com/gfx-rs/wgpu.git
synced 2024-11-23 23:34:29 +00:00
Add Benchmarks (#5694)
This commit is contained in:
parent
3a798859cd
commit
eeb1a9d7b7
@ -3,7 +3,17 @@
|
||||
[profile.default]
|
||||
slow-timeout = { period = "45s", terminate-after = 2 }
|
||||
|
||||
# Use two threads for tests with "2_threads" in their name
|
||||
# Use two threads for tests with "2 threads" in their name
|
||||
[[profile.default.overrides]]
|
||||
filter = 'test(~2_threads)'
|
||||
filter = 'test(~2_threads) | test(~2 threads)'
|
||||
threads-required = 2
|
||||
|
||||
# Use four threads for tests with "4 threads" in their name
|
||||
[[profile.default.overrides]]
|
||||
filter = 'test(~4_threads) | test(~4 threads)'
|
||||
threads-required = 4
|
||||
|
||||
# Use eight threads for tests with "8 threads" in their name
|
||||
[[profile.default.overrides]]
|
||||
filter = 'test(~8_threads) | test(~8 threads)'
|
||||
threads-required = 8
|
||||
|
@ -1,6 +1,8 @@
|
||||
[bans]
|
||||
multiple-versions = "deny"
|
||||
skip-tree = [
|
||||
# We never enable loom in any of our dependencies but it causes dupes
|
||||
{ name = "loom", version = "0.7.2" },
|
||||
{ name = "windows-sys", version = "0.45" },
|
||||
{ name = "winit", version = "0.27" },
|
||||
{ name = "winit", version = "0.29" },
|
||||
|
2
.github/workflows/ci.yml
vendored
2
.github/workflows/ci.yml
vendored
@ -226,7 +226,7 @@ jobs:
|
||||
cargo clippy --target ${{ matrix.target }} --no-default-features
|
||||
|
||||
# Check with all features.
|
||||
cargo clippy --target ${{ matrix.target }} --tests --all-features
|
||||
cargo clippy --target ${{ matrix.target }} --tests --benches --all-features
|
||||
|
||||
# build docs
|
||||
cargo +${{ env.DOCS_RUST_VERSION }} doc --target ${{ matrix.target }} --all-features --no-deps
|
||||
|
203
Cargo.lock
generated
203
Cargo.lock
generated
@ -1511,6 +1511,20 @@ dependencies = [
|
||||
"slab",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "generator"
|
||||
version = "0.8.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "186014d53bc231d0090ef8d6f03e0920c54d85a5ed22f4f2f74315ec56cf83fb"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"log",
|
||||
"rustversion",
|
||||
"windows 0.54.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "gethostname"
|
||||
version = "0.4.3"
|
||||
@ -1672,7 +1686,7 @@ dependencies = [
|
||||
"presser",
|
||||
"thiserror",
|
||||
"winapi",
|
||||
"windows",
|
||||
"windows 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2047,6 +2061,19 @@ version = "0.4.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c"
|
||||
|
||||
[[package]]
|
||||
name = "loom"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"generator",
|
||||
"scoped-tls",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "malloc_buf"
|
||||
version = "0.0.6"
|
||||
@ -2056,6 +2083,15 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "matchers"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558"
|
||||
dependencies = [
|
||||
"regex-automata 0.1.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.7.2"
|
||||
@ -2141,11 +2177,9 @@ version = "0.20.0"
|
||||
dependencies = [
|
||||
"arbitrary",
|
||||
"arrayvec 0.7.4",
|
||||
"bincode",
|
||||
"bit-set",
|
||||
"bitflags 2.5.0",
|
||||
"codespan-reporting",
|
||||
"criterion",
|
||||
"diff",
|
||||
"env_logger",
|
||||
"hexf-parse",
|
||||
@ -2326,6 +2360,16 @@ dependencies = [
|
||||
"rand_xorshift",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "nu-ansi-term"
|
||||
version = "0.46.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84"
|
||||
dependencies = [
|
||||
"overload",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num-bigint"
|
||||
version = "0.4.5"
|
||||
@ -2513,6 +2557,12 @@ version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4030760ffd992bef45b0ae3f10ce1aba99e33464c90d14dd7c039884963ddc7a"
|
||||
|
||||
[[package]]
|
||||
name = "overload"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "owned_ttf_parser"
|
||||
version = "0.21.0"
|
||||
@ -2892,8 +2942,17 @@ checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
"regex-automata 0.4.6",
|
||||
"regex-syntax 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
dependencies = [
|
||||
"regex-syntax 0.6.29",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -2904,9 +2963,15 @@ checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"regex-syntax 0.8.3",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.29"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.8.3"
|
||||
@ -3138,6 +3203,15 @@ dependencies = [
|
||||
"pkg-config",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "sharded-slab"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "shared_library"
|
||||
version = "0.1.9"
|
||||
@ -3410,6 +3484,16 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "threadpool"
|
||||
version = "1.8.1"
|
||||
@ -3567,6 +3651,59 @@ name = "tracing-core"
|
||||
version = "0.1.32"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"valuable",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-log"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3"
|
||||
dependencies = [
|
||||
"log",
|
||||
"once_cell",
|
||||
"tracing-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracing-subscriber"
|
||||
version = "0.3.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b"
|
||||
dependencies = [
|
||||
"matchers",
|
||||
"nu-ansi-term",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"sharded-slab",
|
||||
"smallvec",
|
||||
"thread_local",
|
||||
"tracing",
|
||||
"tracing-core",
|
||||
"tracing-log",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracy-client"
|
||||
version = "0.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb931a64ff88984f86d3e9bcd1ae8843aa7fe44dd0f8097527bc172351741d"
|
||||
dependencies = [
|
||||
"loom",
|
||||
"once_cell",
|
||||
"tracy-client-sys",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tracy-client-sys"
|
||||
version = "0.22.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9d104d610dfa9dd154535102cc9c6164ae1fa37842bc2d9e83f9ac82b0ae0882"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ttf-parser"
|
||||
@ -3716,6 +3853,12 @@ dependencies = [
|
||||
"which",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "valuable"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.2"
|
||||
@ -4077,6 +4220,23 @@ dependencies = [
|
||||
"wgpu-types",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wgpu-benchmark"
|
||||
version = "0.20.0"
|
||||
dependencies = [
|
||||
"bincode",
|
||||
"bytemuck",
|
||||
"criterion",
|
||||
"naga",
|
||||
"nanorand",
|
||||
"once_cell",
|
||||
"pollster",
|
||||
"profiling",
|
||||
"rayon",
|
||||
"tracy-client",
|
||||
"wgpu",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "wgpu-core"
|
||||
version = "0.20.0"
|
||||
@ -4304,7 +4464,17 @@ version = "0.52.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be"
|
||||
dependencies = [
|
||||
"windows-core",
|
||||
"windows-core 0.52.0",
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows"
|
||||
version = "0.54.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9252e5725dbed82865af151df558e754e4a3c2c30818359eb17465f1346a1b49"
|
||||
dependencies = [
|
||||
"windows-core 0.54.0",
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
@ -4317,6 +4487,25 @@ dependencies = [
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-core"
|
||||
version = "0.54.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12661b9c89351d684a50a8a643ce5f608e20243b9fb84687800163429f161d65"
|
||||
dependencies = [
|
||||
"windows-result",
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-result"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "749f0da9cc72d82e600d8d2e44cadd0b9eedb9038f71a1c58556ac1c5791813b"
|
||||
dependencies = [
|
||||
"windows-targets 0.52.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "windows-sys"
|
||||
version = "0.36.1"
|
||||
|
14
Cargo.toml
14
Cargo.toml
@ -5,8 +5,9 @@ members = [
|
||||
"deno_webgpu",
|
||||
|
||||
# default members
|
||||
"benches",
|
||||
"d3d12",
|
||||
"examples/",
|
||||
"examples",
|
||||
"naga-cli",
|
||||
"naga",
|
||||
"naga/fuzz",
|
||||
@ -22,8 +23,9 @@ members = [
|
||||
]
|
||||
exclude = []
|
||||
default-members = [
|
||||
"benches",
|
||||
"d3d12",
|
||||
"examples/",
|
||||
"examples",
|
||||
"naga-cli",
|
||||
"naga",
|
||||
"naga/fuzz",
|
||||
@ -70,11 +72,13 @@ version = "0.20.0"
|
||||
[workspace.dependencies]
|
||||
anyhow = "1.0.23"
|
||||
arrayvec = "0.7"
|
||||
bincode = "1"
|
||||
bit-vec = "0.6"
|
||||
bitflags = "2"
|
||||
bytemuck = { version = "1.14", features = ["derive"] }
|
||||
cfg_aliases = "0.1"
|
||||
cfg-if = "1"
|
||||
criterion = "0.5"
|
||||
codespan-reporting = "0.11"
|
||||
ctor = "0.2"
|
||||
document-features = "0.2.8"
|
||||
@ -109,6 +113,7 @@ png = "0.17.11"
|
||||
pollster = "0.3"
|
||||
profiling = { version = "1", default-features = false }
|
||||
raw-window-handle = "0.6"
|
||||
rayon = "1"
|
||||
renderdoc-sys = "1.1.0"
|
||||
ron = "0.8"
|
||||
rustc-hash = "1.1.0"
|
||||
@ -116,6 +121,7 @@ serde = "1"
|
||||
serde_json = "1.0.116"
|
||||
smallvec = "1"
|
||||
static_assertions = "1.1.0"
|
||||
tracy-client = "0.17"
|
||||
thiserror = "1"
|
||||
wgpu = { version = "0.20.0", path = "./wgpu" }
|
||||
wgpu-core = { version = "0.20.0", path = "./wgpu-core" }
|
||||
@ -187,6 +193,10 @@ termcolor = "1.4.1"
|
||||
#js-sys = { path = "../wasm-bindgen/crates/js-sys" }
|
||||
#wasm-bindgen = { path = "../wasm-bindgen" }
|
||||
|
||||
[profile.release]
|
||||
lto = "thin"
|
||||
debug = true
|
||||
|
||||
# Speed up image comparison even in debug builds
|
||||
[profile.dev.package."nv-flip-sys"]
|
||||
opt-level = 3
|
||||
|
46
benches/Cargo.toml
Normal file
46
benches/Cargo.toml
Normal file
@ -0,0 +1,46 @@
|
||||
[package]
|
||||
name = "wgpu-benchmark"
|
||||
version.workspace = true
|
||||
authors.workspace = true
|
||||
edition.workspace = true
|
||||
description = "wgpu benchmarking suite"
|
||||
homepage.workspace = true
|
||||
repository.workspace = true
|
||||
keywords.workspace = true
|
||||
license.workspace = true
|
||||
autobenches = false
|
||||
publish = false
|
||||
|
||||
[[bench]]
|
||||
name = "root"
|
||||
harness = false
|
||||
path = "benches/root.rs"
|
||||
|
||||
[features]
|
||||
# Uncomment these features to enable tracy and superluminal profiling.
|
||||
# tracy = ["dep:tracy-client", "profiling/profile-with-tracy"]
|
||||
# superluminal = ["profiling/profile-with-superluminal"]
|
||||
|
||||
[dependencies]
|
||||
bincode.workspace = true
|
||||
bytemuck.workspace = true
|
||||
criterion.workspace = true
|
||||
naga = { workspace = true, features = [
|
||||
"deserialize",
|
||||
"serialize",
|
||||
"wgsl-in",
|
||||
"spv-in",
|
||||
"glsl-in",
|
||||
"spv-out",
|
||||
"msl-out",
|
||||
"hlsl-out",
|
||||
"glsl-out",
|
||||
"wgsl-out",
|
||||
] }
|
||||
nanorand.workspace = true
|
||||
once_cell.workspace = true
|
||||
pollster.workspace = true
|
||||
profiling.workspace = true
|
||||
rayon.workspace = true
|
||||
tracy-client = { workspace = true, optional = true }
|
||||
wgpu.workspace = true
|
95
benches/README.md
Normal file
95
benches/README.md
Normal file
@ -0,0 +1,95 @@
|
||||
Collection of CPU benchmarks for `wgpu`.
|
||||
|
||||
These benchmarks are designed as a first line of defence against performance regressions and generally approximate the performance for users.
|
||||
They all do very little GPU work and are testing the CPU performance of the API.
|
||||
|
||||
Criterion will give you the end-to-end performance of the benchmark, but you can also use a profiler to get more detailed information about where time is being spent.
|
||||
|
||||
## Usage
|
||||
|
||||
```sh
|
||||
# Run all benchmarks
|
||||
cargo bench -p wgpu-benchmark
|
||||
# Run a specific benchmarks that contains "filter" in its name
|
||||
cargo bench -p wgpu-benchmark -- "filter"
|
||||
```
|
||||
|
||||
## Benchmarks
|
||||
|
||||
#### `Renderpass`
|
||||
|
||||
This benchmark measures the performance of recording and submitting a render pass with a large
|
||||
number of draw calls and resources, emulating an intense, more traditional graphics application.
|
||||
By default it measures 10k draw calls, with 90k total resources.
|
||||
|
||||
Within this benchmark, both single threaded and multi-threaded recording are tested, as well as splitting
|
||||
the render pass into multiple passes over multiple command buffers.
|
||||
|
||||
#### `Resource Creation`
|
||||
|
||||
This benchmark measures the performance of creating large resources. By default it makes buffers that are 256MB. It tests this over a range of thread counts.
|
||||
|
||||
#### `Shader Compilation`
|
||||
|
||||
This benchmark measures the performance of naga parsing, validating, and generating shaders.
|
||||
|
||||
## Comparing Against a Baseline
|
||||
|
||||
To compare the current benchmarks against a baseline, you can use the `--save-baseline` and `--baseline` flags.
|
||||
|
||||
For example, to compare v0.20 against trunk, you could run the following:
|
||||
|
||||
```sh
|
||||
git checkout v0.20
|
||||
|
||||
# Run the baseline benchmarks
|
||||
cargo bench -p wgpu-benchmark -- --save-baseline "v0.20"
|
||||
|
||||
git checkout trunk
|
||||
|
||||
# Run the current benchmarks
|
||||
cargo bench -p wgpu-benchmark -- --baseline "v0.20"
|
||||
```
|
||||
|
||||
You can use this for any bits of code you want to compare.
|
||||
|
||||
## Integration with Profilers
|
||||
|
||||
The benchmarks can be run with a profiler to get more detailed information about where time is being spent.
|
||||
Integrations are available for `tracy` and `superluminal`. Due to some implementation details,
|
||||
you need to uncomment the features in the `Cargo.toml` to allow features to be used.
|
||||
|
||||
#### Tracy
|
||||
|
||||
Tracy is available prebuilt for Windows on [github](https://github.com/wolfpld/tracy/releases/latest/).
|
||||
|
||||
```sh
|
||||
# Once this is running, you can connect to it with the Tracy Profiler
|
||||
cargo bench -p wgpu-benchmark --features tracy
|
||||
```
|
||||
|
||||
#### Superluminal
|
||||
|
||||
Superluminal is a paid product for windows available [here](https://superluminal.eu/).
|
||||
|
||||
```sh
|
||||
# This command will build the benchmarks, and display the path to the executable
|
||||
cargo bench -p wgpu-benchmark --features superluminal -- -h
|
||||
|
||||
# Have Superluminal run the following command (replacing with the path to the executable)
|
||||
./target/release/deps/root-2c45d61b38a65438.exe --bench "filter"
|
||||
```
|
||||
|
||||
#### `perf` and others
|
||||
|
||||
You can follow the same pattern as above to run the benchmarks with other profilers.
|
||||
For example, the command line tool `perf` can be used to profile the benchmarks.
|
||||
|
||||
```sh
|
||||
# This command will build the benchmarks, and display the path to the executable
|
||||
cargo bench -p wgpu-benchmark -- -h
|
||||
|
||||
# Run the benchmarks with perf
|
||||
perf record ./target/release/deps/root-2c45d61b38a65438 --bench "filter"
|
||||
```
|
||||
|
26
benches/benches/renderpass-bindless.wgsl
Normal file
26
benches/benches/renderpass-bindless.wgsl
Normal file
@ -0,0 +1,26 @@
|
||||
@group(0) @binding(0)
|
||||
var tex: binding_array<texture_2d<f32>>;
|
||||
|
||||
struct VertexOutput {
|
||||
@builtin(position) position: vec4f,
|
||||
@location(0) @interpolate(flat) instance_index: u32,
|
||||
}
|
||||
|
||||
@vertex
|
||||
fn vs_main(@builtin(instance_index) instance_index: u32) -> VertexOutput {
|
||||
return VertexOutput(
|
||||
vec4f(0.0, 0.0, 0.0, 1.0),
|
||||
instance_index
|
||||
);
|
||||
}
|
||||
|
||||
@fragment
|
||||
fn fs_main(vs_in: VertexOutput) -> @location(0) vec4f {
|
||||
return textureLoad(tex[7 * vs_in.instance_index + 0], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 1], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 2], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 3], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 4], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 5], vec2u(0), 0) +
|
||||
textureLoad(tex[7 * vs_in.instance_index + 6], vec2u(0), 0);
|
||||
}
|
573
benches/benches/renderpass.rs
Normal file
573
benches/benches/renderpass.rs
Normal file
@ -0,0 +1,573 @@
|
||||
use std::{
|
||||
num::NonZeroU32,
|
||||
time::{Duration, Instant},
|
||||
};
|
||||
|
||||
use criterion::{criterion_group, Criterion, Throughput};
|
||||
use nanorand::{Rng, WyRand};
|
||||
use once_cell::sync::Lazy;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use crate::DeviceState;
|
||||
|
||||
const DRAW_COUNT: usize = 10_000;
|
||||
// Must match the number of textures in the renderpass.wgsl shader
|
||||
const TEXTURES_PER_DRAW: usize = 7;
|
||||
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
|
||||
const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
|
||||
|
||||
const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
|
||||
|
||||
struct RenderpassState {
|
||||
device_state: DeviceState,
|
||||
pipeline: wgpu::RenderPipeline,
|
||||
bind_groups: Vec<wgpu::BindGroup>,
|
||||
vertex_buffers: Vec<wgpu::Buffer>,
|
||||
index_buffers: Vec<wgpu::Buffer>,
|
||||
render_target: wgpu::TextureView,
|
||||
|
||||
// Bindless resources
|
||||
bindless_bind_group: Option<wgpu::BindGroup>,
|
||||
bindless_pipeline: Option<wgpu::RenderPipeline>,
|
||||
}
|
||||
|
||||
impl RenderpassState {
|
||||
/// Create and prepare all the resources needed for the renderpass benchmark.
|
||||
fn new() -> Self {
|
||||
let device_state = DeviceState::new();
|
||||
|
||||
let supports_bindless = device_state.device.features().contains(
|
||||
wgpu::Features::TEXTURE_BINDING_ARRAY
|
||||
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
|
||||
) && device_state
|
||||
.device
|
||||
.limits()
|
||||
.max_sampled_textures_per_shader_stage
|
||||
>= TEXTURE_COUNT as _;
|
||||
|
||||
// Performance gets considerably worse if the resources are shuffled.
|
||||
//
|
||||
// This more closely matches the real-world use case where resources have no
|
||||
// well defined usage order.
|
||||
let mut random = WyRand::new_seed(0x8BADF00D);
|
||||
|
||||
let mut bind_group_layout_entries = Vec::with_capacity(TEXTURES_PER_DRAW);
|
||||
for i in 0..TEXTURES_PER_DRAW {
|
||||
bind_group_layout_entries.push(wgpu::BindGroupLayoutEntry {
|
||||
binding: i as u32,
|
||||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||||
ty: wgpu::BindingType::Texture {
|
||||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||
view_dimension: wgpu::TextureViewDimension::D2,
|
||||
multisampled: false,
|
||||
},
|
||||
count: None,
|
||||
});
|
||||
}
|
||||
|
||||
let bind_group_layout =
|
||||
device_state
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: None,
|
||||
entries: &bind_group_layout_entries,
|
||||
});
|
||||
|
||||
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
|
||||
for i in 0..TEXTURE_COUNT {
|
||||
let texture = device_state
|
||||
.device
|
||||
.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some(&format!("Texture {i}")),
|
||||
size: wgpu::Extent3d {
|
||||
width: 1,
|
||||
height: 1,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
usage: wgpu::TextureUsages::TEXTURE_BINDING,
|
||||
view_formats: &[],
|
||||
});
|
||||
texture_views.push(texture.create_view(&wgpu::TextureViewDescriptor {
|
||||
label: Some(&format!("Texture View {i}")),
|
||||
..Default::default()
|
||||
}));
|
||||
}
|
||||
random.shuffle(&mut texture_views);
|
||||
|
||||
let texture_view_refs: Vec<_> = texture_views.iter().collect();
|
||||
|
||||
let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
|
||||
for draw_idx in 0..DRAW_COUNT {
|
||||
let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
|
||||
for tex_idx in 0..TEXTURES_PER_DRAW {
|
||||
entries.push(wgpu::BindGroupEntry {
|
||||
binding: tex_idx as u32,
|
||||
resource: wgpu::BindingResource::TextureView(
|
||||
&texture_views[draw_idx * TEXTURES_PER_DRAW + tex_idx],
|
||||
),
|
||||
});
|
||||
}
|
||||
|
||||
bind_groups.push(
|
||||
device_state
|
||||
.device
|
||||
.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||
label: None,
|
||||
layout: &bind_group_layout,
|
||||
entries: &entries,
|
||||
}),
|
||||
);
|
||||
}
|
||||
random.shuffle(&mut bind_groups);
|
||||
|
||||
let sm = device_state
|
||||
.device
|
||||
.create_shader_module(wgpu::include_wgsl!("renderpass.wgsl"));
|
||||
|
||||
let pipeline_layout =
|
||||
device_state
|
||||
.device
|
||||
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: None,
|
||||
bind_group_layouts: &[&bind_group_layout],
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
|
||||
let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
|
||||
for _ in 0..VERTEX_BUFFER_COUNT {
|
||||
vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: None,
|
||||
size: 3 * 16,
|
||||
usage: wgpu::BufferUsages::VERTEX,
|
||||
mapped_at_creation: false,
|
||||
}));
|
||||
}
|
||||
random.shuffle(&mut vertex_buffers);
|
||||
|
||||
let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
|
||||
for _ in 0..DRAW_COUNT {
|
||||
index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: None,
|
||||
size: 3 * 4,
|
||||
usage: wgpu::BufferUsages::INDEX,
|
||||
mapped_at_creation: false,
|
||||
}));
|
||||
}
|
||||
random.shuffle(&mut index_buffers);
|
||||
|
||||
let mut vertex_buffer_attributes = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW);
|
||||
for i in 0..VERTEX_BUFFERS_PER_DRAW {
|
||||
vertex_buffer_attributes.push(wgpu::vertex_attr_array![i as u32 => Float32x4]);
|
||||
}
|
||||
|
||||
let mut vertex_buffer_layouts = Vec::with_capacity(VERTEX_BUFFERS_PER_DRAW);
|
||||
for attributes in &vertex_buffer_attributes {
|
||||
vertex_buffer_layouts.push(wgpu::VertexBufferLayout {
|
||||
array_stride: 16,
|
||||
step_mode: wgpu::VertexStepMode::Vertex,
|
||||
attributes,
|
||||
});
|
||||
}
|
||||
|
||||
let pipeline =
|
||||
device_state
|
||||
.device
|
||||
.create_render_pipeline(&wgpu::RenderPipelineDescriptor {
|
||||
label: None,
|
||||
layout: Some(&pipeline_layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &sm,
|
||||
entry_point: "vs_main",
|
||||
buffers: &vertex_buffer_layouts,
|
||||
compilation_options: wgpu::PipelineCompilationOptions::default(),
|
||||
},
|
||||
primitive: wgpu::PrimitiveState {
|
||||
topology: wgpu::PrimitiveTopology::TriangleList,
|
||||
strip_index_format: None,
|
||||
front_face: wgpu::FrontFace::Cw,
|
||||
cull_mode: Some(wgpu::Face::Back),
|
||||
polygon_mode: wgpu::PolygonMode::Fill,
|
||||
unclipped_depth: false,
|
||||
conservative: false,
|
||||
},
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &sm,
|
||||
entry_point: "fs_main",
|
||||
targets: &[Some(wgpu::ColorTargetState {
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
blend: None,
|
||||
write_mask: wgpu::ColorWrites::ALL,
|
||||
})],
|
||||
compilation_options: wgpu::PipelineCompilationOptions::default(),
|
||||
}),
|
||||
multiview: None,
|
||||
});
|
||||
|
||||
let render_target = device_state
|
||||
.device
|
||||
.create_texture(&wgpu::TextureDescriptor {
|
||||
label: Some("Render Target"),
|
||||
size: wgpu::Extent3d {
|
||||
width: 1,
|
||||
height: 1,
|
||||
depth_or_array_layers: 1,
|
||||
},
|
||||
mip_level_count: 1,
|
||||
sample_count: 1,
|
||||
dimension: wgpu::TextureDimension::D2,
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
|
||||
view_formats: &[],
|
||||
})
|
||||
.create_view(&wgpu::TextureViewDescriptor::default());
|
||||
|
||||
let mut bindless_bind_group = None;
|
||||
let mut bindless_pipeline = None;
|
||||
|
||||
if supports_bindless {
|
||||
let bindless_bind_group_layout =
|
||||
device_state
|
||||
.device
|
||||
.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
|
||||
label: None,
|
||||
entries: &[wgpu::BindGroupLayoutEntry {
|
||||
binding: 0,
|
||||
visibility: wgpu::ShaderStages::FRAGMENT,
|
||||
ty: wgpu::BindingType::Texture {
|
||||
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||
view_dimension: wgpu::TextureViewDimension::D2,
|
||||
multisampled: false,
|
||||
},
|
||||
count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
|
||||
}],
|
||||
});
|
||||
|
||||
bindless_bind_group = Some(device_state.device.create_bind_group(
|
||||
&wgpu::BindGroupDescriptor {
|
||||
label: None,
|
||||
layout: &bindless_bind_group_layout,
|
||||
entries: &[wgpu::BindGroupEntry {
|
||||
binding: 0,
|
||||
resource: wgpu::BindingResource::TextureViewArray(&texture_view_refs),
|
||||
}],
|
||||
},
|
||||
));
|
||||
|
||||
let bindless_shader_module = device_state
|
||||
.device
|
||||
.create_shader_module(wgpu::include_wgsl!("renderpass-bindless.wgsl"));
|
||||
|
||||
let bindless_pipeline_layout =
|
||||
device_state
|
||||
.device
|
||||
.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
|
||||
label: None,
|
||||
bind_group_layouts: &[&bindless_bind_group_layout],
|
||||
push_constant_ranges: &[],
|
||||
});
|
||||
|
||||
bindless_pipeline = Some(device_state.device.create_render_pipeline(
|
||||
&wgpu::RenderPipelineDescriptor {
|
||||
label: None,
|
||||
layout: Some(&bindless_pipeline_layout),
|
||||
vertex: wgpu::VertexState {
|
||||
module: &bindless_shader_module,
|
||||
entry_point: "vs_main",
|
||||
buffers: &vertex_buffer_layouts,
|
||||
compilation_options: wgpu::PipelineCompilationOptions::default(),
|
||||
},
|
||||
primitive: wgpu::PrimitiveState {
|
||||
topology: wgpu::PrimitiveTopology::TriangleList,
|
||||
strip_index_format: None,
|
||||
front_face: wgpu::FrontFace::Cw,
|
||||
cull_mode: Some(wgpu::Face::Back),
|
||||
polygon_mode: wgpu::PolygonMode::Fill,
|
||||
unclipped_depth: false,
|
||||
conservative: false,
|
||||
},
|
||||
depth_stencil: None,
|
||||
multisample: wgpu::MultisampleState::default(),
|
||||
fragment: Some(wgpu::FragmentState {
|
||||
module: &bindless_shader_module,
|
||||
entry_point: "fs_main",
|
||||
targets: &[Some(wgpu::ColorTargetState {
|
||||
format: wgpu::TextureFormat::Rgba8UnormSrgb,
|
||||
blend: None,
|
||||
write_mask: wgpu::ColorWrites::ALL,
|
||||
})],
|
||||
compilation_options: wgpu::PipelineCompilationOptions::default(),
|
||||
}),
|
||||
multiview: None,
|
||||
},
|
||||
));
|
||||
}
|
||||
|
||||
Self {
|
||||
device_state,
|
||||
pipeline,
|
||||
bind_groups,
|
||||
vertex_buffers,
|
||||
index_buffers,
|
||||
render_target,
|
||||
|
||||
bindless_bind_group,
|
||||
bindless_pipeline,
|
||||
}
|
||||
}
|
||||
|
||||
fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
|
||||
profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
|
||||
|
||||
let draws_per_pass = DRAW_COUNT / total_passes;
|
||||
|
||||
let mut encoder = self
|
||||
.device_state
|
||||
.device
|
||||
.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
|
||||
|
||||
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||||
label: None,
|
||||
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||||
view: &self.render_target,
|
||||
resolve_target: None,
|
||||
ops: wgpu::Operations {
|
||||
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||
store: wgpu::StoreOp::Store,
|
||||
},
|
||||
})],
|
||||
occlusion_query_set: None,
|
||||
timestamp_writes: None,
|
||||
depth_stencil_attachment: None,
|
||||
});
|
||||
|
||||
let start_idx = pass_number * draws_per_pass;
|
||||
let end_idx = start_idx + draws_per_pass;
|
||||
for draw_idx in start_idx..end_idx {
|
||||
render_pass.set_pipeline(&self.pipeline);
|
||||
render_pass.set_bind_group(0, &self.bind_groups[draw_idx], &[]);
|
||||
for i in 0..VERTEX_BUFFERS_PER_DRAW {
|
||||
render_pass.set_vertex_buffer(
|
||||
i as u32,
|
||||
self.vertex_buffers[draw_idx * VERTEX_BUFFERS_PER_DRAW + i].slice(..),
|
||||
);
|
||||
}
|
||||
render_pass.set_index_buffer(
|
||||
self.index_buffers[draw_idx].slice(..),
|
||||
wgpu::IndexFormat::Uint32,
|
||||
);
|
||||
render_pass.draw_indexed(0..3, 0, 0..1);
|
||||
}
|
||||
|
||||
drop(render_pass);
|
||||
|
||||
encoder.finish()
|
||||
}
|
||||
|
||||
fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
|
||||
profiling::scope!("Bindless Renderpass");
|
||||
|
||||
let mut encoder = self
|
||||
.device_state
|
||||
.device
|
||||
.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
|
||||
|
||||
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
|
||||
label: None,
|
||||
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
|
||||
view: &self.render_target,
|
||||
resolve_target: None,
|
||||
ops: wgpu::Operations {
|
||||
load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
|
||||
store: wgpu::StoreOp::Store,
|
||||
},
|
||||
})],
|
||||
occlusion_query_set: None,
|
||||
timestamp_writes: None,
|
||||
depth_stencil_attachment: None,
|
||||
});
|
||||
|
||||
render_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
|
||||
render_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
|
||||
for i in 0..VERTEX_BUFFERS_PER_DRAW {
|
||||
render_pass.set_vertex_buffer(i as u32, self.vertex_buffers[0].slice(..));
|
||||
}
|
||||
render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
|
||||
|
||||
for draw_idx in 0..DRAW_COUNT {
|
||||
render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
|
||||
}
|
||||
|
||||
drop(render_pass);
|
||||
|
||||
encoder.finish()
|
||||
}
|
||||
}
|
||||
|
||||
fn run_bench(ctx: &mut Criterion) {
|
||||
let state = Lazy::new(RenderpassState::new);
|
||||
|
||||
// Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
|
||||
let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
|
||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
||||
|
||||
for time_submit in [false, true] {
|
||||
for rpasses in [1, 2, 4, 8] {
|
||||
let draws_per_pass = DRAW_COUNT / rpasses;
|
||||
|
||||
let label = if time_submit {
|
||||
"Submit Time"
|
||||
} else {
|
||||
"Renderpass Time"
|
||||
};
|
||||
|
||||
group.bench_function(
|
||||
&format!("{rpasses} renderpasses x {draws_per_pass} draws ({label})"),
|
||||
|b| {
|
||||
Lazy::force(&state);
|
||||
|
||||
b.iter_custom(|iters| {
|
||||
profiling::scope!("benchmark invocation");
|
||||
|
||||
// This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
|
||||
if state.device_state.adapter_info.name.contains("Paravirtual") {
|
||||
return Duration::from_secs_f32(1.0);
|
||||
}
|
||||
|
||||
let mut duration = Duration::ZERO;
|
||||
|
||||
for _ in 0..iters {
|
||||
profiling::scope!("benchmark iteration");
|
||||
|
||||
let mut start = Instant::now();
|
||||
|
||||
let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
|
||||
for i in 0..rpasses {
|
||||
buffers.push(state.run_subpass(i, rpasses));
|
||||
}
|
||||
|
||||
if time_submit {
|
||||
start = Instant::now();
|
||||
} else {
|
||||
duration += start.elapsed();
|
||||
}
|
||||
|
||||
state.device_state.queue.submit(buffers);
|
||||
|
||||
if time_submit {
|
||||
duration += start.elapsed();
|
||||
}
|
||||
|
||||
state.device_state.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
||||
duration
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
group.finish();
|
||||
|
||||
// Test 10k draw calls split up over 2, 4, and 8 threads.
|
||||
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
|
||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
||||
|
||||
for threads in [2, 4, 8] {
|
||||
let draws_per_pass = DRAW_COUNT / threads;
|
||||
group.bench_function(
|
||||
&format!("{threads} threads x {draws_per_pass} draws"),
|
||||
|b| {
|
||||
Lazy::force(&state);
|
||||
|
||||
b.iter_custom(|iters| {
|
||||
profiling::scope!("benchmark invocation");
|
||||
|
||||
// This benchmark hangs on Apple Paravirtualized GPUs. No idea why.
|
||||
if state.device_state.adapter_info.name.contains("Paravirtual") {
|
||||
return Duration::from_secs_f32(1.0);
|
||||
}
|
||||
|
||||
let mut duration = Duration::ZERO;
|
||||
|
||||
for _ in 0..iters {
|
||||
profiling::scope!("benchmark iteration");
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let buffers = (0..threads)
|
||||
.into_par_iter()
|
||||
.map(|i| state.run_subpass(i, threads))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
duration += start.elapsed();
|
||||
|
||||
state.device_state.queue.submit(buffers);
|
||||
state.device_state.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
||||
duration
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
|
||||
// Test 10k draw calls split up over 1, 2, 4, and 8 threads.
|
||||
let mut group = ctx.benchmark_group("Renderpass: Bindless");
|
||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
||||
|
||||
group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
|
||||
Lazy::force(&state);
|
||||
|
||||
b.iter_custom(|iters| {
|
||||
profiling::scope!("benchmark invocation");
|
||||
|
||||
// Need bindless to run this benchmark
|
||||
if state.bindless_bind_group.is_none() {
|
||||
return Duration::from_secs_f32(1.0);
|
||||
}
|
||||
|
||||
let mut duration = Duration::ZERO;
|
||||
|
||||
for _ in 0..iters {
|
||||
profiling::scope!("benchmark iteration");
|
||||
|
||||
let start = Instant::now();
|
||||
|
||||
let buffer = state.run_bindless_pass();
|
||||
|
||||
duration += start.elapsed();
|
||||
|
||||
state.device_state.queue.submit([buffer]);
|
||||
state.device_state.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
||||
duration
|
||||
})
|
||||
});
|
||||
group.finish();
|
||||
|
||||
ctx.bench_function(
|
||||
&format!(
|
||||
"Renderpass: Empty Submit with {} Resources",
|
||||
TEXTURE_COUNT + VERTEX_BUFFER_COUNT
|
||||
),
|
||||
|b| {
|
||||
Lazy::force(&state);
|
||||
|
||||
b.iter(|| state.device_state.queue.submit([]));
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = renderpass;
|
||||
config = Criterion::default().measurement_time(Duration::from_secs(10));
|
||||
targets = run_bench,
|
||||
}
|
36
benches/benches/renderpass.wgsl
Normal file
36
benches/benches/renderpass.wgsl
Normal file
@ -0,0 +1,36 @@
|
||||
@group(0) @binding(0)
|
||||
var tex_1: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var tex_2: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var tex_3: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(3)
|
||||
var tex_4: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(4)
|
||||
var tex_5: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(5)
|
||||
var tex_6: texture_2d<f32>;
|
||||
|
||||
@group(0) @binding(6)
|
||||
var tex_7: texture_2d<f32>;
|
||||
|
||||
@vertex
|
||||
fn vs_main() -> @builtin(position) vec4f {
|
||||
return vec4f(0.0, 0.0, 0.0, 1.0);
|
||||
}
|
||||
|
||||
@fragment
|
||||
fn fs_main() -> @location(0) vec4f {
|
||||
return textureLoad(tex_1, vec2u(0), 0) +
|
||||
textureLoad(tex_2, vec2u(0), 0) +
|
||||
textureLoad(tex_3, vec2u(0), 0) +
|
||||
textureLoad(tex_4, vec2u(0), 0) +
|
||||
textureLoad(tex_5, vec2u(0), 0) +
|
||||
textureLoad(tex_6, vec2u(0), 0) +
|
||||
textureLoad(tex_7, vec2u(0), 0);
|
||||
}
|
71
benches/benches/resource_creation.rs
Normal file
71
benches/benches/resource_creation.rs
Normal file
@ -0,0 +1,71 @@
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use criterion::{criterion_group, Criterion, Throughput};
|
||||
use once_cell::sync::Lazy;
|
||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
||||
|
||||
use crate::DeviceState;
|
||||
|
||||
fn run_bench(ctx: &mut Criterion) {
|
||||
let state = Lazy::new(DeviceState::new);
|
||||
|
||||
const RESOURCES_TO_CREATE: usize = 8;
|
||||
|
||||
let mut group = ctx.benchmark_group("Resource Creation: Large Buffer");
|
||||
group.throughput(Throughput::Elements(RESOURCES_TO_CREATE as _));
|
||||
|
||||
for threads in [1, 2, 4, 8] {
|
||||
let resources_per_thread = RESOURCES_TO_CREATE / threads;
|
||||
group.bench_function(
|
||||
&format!("{threads} threads x {resources_per_thread} resource"),
|
||||
|b| {
|
||||
Lazy::force(&state);
|
||||
|
||||
b.iter_custom(|iters| {
|
||||
profiling::scope!("benchmark invocation");
|
||||
|
||||
let mut duration = Duration::ZERO;
|
||||
|
||||
for _ in 0..iters {
|
||||
profiling::scope!("benchmark iteration");
|
||||
|
||||
// We can't create too many resources at once, so we do it 8 resources at a time.
|
||||
let start = Instant::now();
|
||||
|
||||
let buffers = (0..threads)
|
||||
.into_par_iter()
|
||||
.map(|_| {
|
||||
(0..resources_per_thread)
|
||||
.map(|_| {
|
||||
state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||
label: None,
|
||||
size: 256 * 1024 * 1024,
|
||||
usage: wgpu::BufferUsages::COPY_DST,
|
||||
mapped_at_creation: false,
|
||||
})
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
duration += start.elapsed();
|
||||
|
||||
drop(buffers);
|
||||
|
||||
state.queue.submit([]);
|
||||
state.device.poll(wgpu::Maintain::Wait);
|
||||
}
|
||||
|
||||
duration
|
||||
})
|
||||
},
|
||||
);
|
||||
}
|
||||
group.finish();
|
||||
}
|
||||
|
||||
criterion_group! {
|
||||
name = resource_creation;
|
||||
config = Criterion::default().measurement_time(Duration::from_secs(10));
|
||||
targets = run_bench,
|
||||
}
|
65
benches/benches/root.rs
Normal file
65
benches/benches/root.rs
Normal file
@ -0,0 +1,65 @@
|
||||
use criterion::criterion_main;
|
||||
use pollster::block_on;
|
||||
|
||||
mod renderpass;
|
||||
mod resource_creation;
|
||||
mod shader;
|
||||
|
||||
struct DeviceState {
|
||||
adapter_info: wgpu::AdapterInfo,
|
||||
device: wgpu::Device,
|
||||
queue: wgpu::Queue,
|
||||
}
|
||||
|
||||
impl DeviceState {
|
||||
fn new() -> Self {
|
||||
#[cfg(feature = "tracy")]
|
||||
tracy_client::Client::start();
|
||||
|
||||
let base_backend = if cfg!(target_os = "macos") {
|
||||
// We don't want to use Molten-VK on Mac.
|
||||
wgpu::Backends::METAL
|
||||
} else {
|
||||
wgpu::Backends::all()
|
||||
};
|
||||
|
||||
let instance = wgpu::Instance::new(wgpu::InstanceDescriptor {
|
||||
backends: wgpu::util::backend_bits_from_env().unwrap_or(base_backend),
|
||||
flags: wgpu::InstanceFlags::empty(),
|
||||
dx12_shader_compiler: wgpu::util::dx12_shader_compiler_from_env()
|
||||
.unwrap_or(wgpu::Dx12Compiler::Fxc),
|
||||
gles_minor_version: wgpu::Gles3MinorVersion::Automatic,
|
||||
});
|
||||
|
||||
let adapter = block_on(wgpu::util::initialize_adapter_from_env_or_default(
|
||||
&instance, None,
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
let adapter_info = adapter.get_info();
|
||||
|
||||
eprintln!("{:?}", adapter_info);
|
||||
|
||||
let (device, queue) = block_on(adapter.request_device(
|
||||
&wgpu::DeviceDescriptor {
|
||||
required_features: adapter.features(),
|
||||
required_limits: adapter.limits(),
|
||||
label: Some("RenderPass Device"),
|
||||
},
|
||||
None,
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
Self {
|
||||
adapter_info,
|
||||
device,
|
||||
queue,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
criterion_main!(
|
||||
renderpass::renderpass,
|
||||
resource_creation::resource_creation,
|
||||
shader::shader
|
||||
);
|
355
benches/benches/shader.rs
Normal file
355
benches/benches/shader.rs
Normal file
@ -0,0 +1,355 @@
|
||||
use criterion::*;
|
||||
use std::{fs, path::PathBuf};
|
||||
|
||||
struct Input {
|
||||
filename: String,
|
||||
size: u64,
|
||||
data: Vec<u8>,
|
||||
string: Option<String>,
|
||||
module: Option<naga::Module>,
|
||||
module_info: Option<naga::valid::ModuleInfo>,
|
||||
}
|
||||
|
||||
struct Inputs {
|
||||
inner: Vec<Input>,
|
||||
}
|
||||
|
||||
impl Inputs {
|
||||
fn from_dir(folder: &str, extension: &str) -> Self {
|
||||
let mut inputs = Vec::new();
|
||||
let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join(folder)
|
||||
.read_dir()
|
||||
.unwrap();
|
||||
|
||||
for file_entry in read_dir {
|
||||
match file_entry {
|
||||
Ok(entry) => match entry.path().extension() {
|
||||
Some(ostr) if ostr == extension => {
|
||||
let path = entry.path();
|
||||
|
||||
inputs.push(Input {
|
||||
filename: path.to_string_lossy().into_owned(),
|
||||
size: entry.metadata().unwrap().len(),
|
||||
string: None,
|
||||
data: vec![],
|
||||
module: None,
|
||||
module_info: None,
|
||||
});
|
||||
}
|
||||
_ => continue,
|
||||
},
|
||||
Err(e) => {
|
||||
eprintln!("Skipping file: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Self { inner: inputs }
|
||||
}
|
||||
|
||||
fn bytes(&self) -> u64 {
|
||||
self.inner.iter().map(|input| input.size).sum()
|
||||
}
|
||||
|
||||
fn load(&mut self) {
|
||||
for input in &mut self.inner {
|
||||
if !input.data.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
input.data = fs::read(&input.filename).unwrap_or_default();
|
||||
}
|
||||
}
|
||||
|
||||
fn load_utf8(&mut self) {
|
||||
self.load();
|
||||
|
||||
for input in &mut self.inner {
|
||||
if input.string.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
input.string = Some(std::str::from_utf8(&input.data).unwrap().to_string());
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(&mut self) {
|
||||
self.load_utf8();
|
||||
|
||||
let mut parser = naga::front::wgsl::Frontend::new();
|
||||
for input in &mut self.inner {
|
||||
if input.module.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
input.module = Some(parser.parse(input.string.as_ref().unwrap()).unwrap());
|
||||
}
|
||||
}
|
||||
|
||||
fn validate(&mut self) {
|
||||
self.parse();
|
||||
|
||||
let mut validator = naga::valid::Validator::new(
|
||||
naga::valid::ValidationFlags::all(),
|
||||
// Note, this is empty, to let all backends work.
|
||||
naga::valid::Capabilities::empty(),
|
||||
);
|
||||
|
||||
for input in &mut self.inner {
|
||||
if input.module_info.is_some() {
|
||||
continue;
|
||||
}
|
||||
|
||||
input.module_info = validator.validate(input.module.as_ref().unwrap()).ok();
|
||||
}
|
||||
|
||||
self.inner.retain(|input| input.module_info.is_some());
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_glsl(stage: naga::ShaderStage, inputs: &Inputs) {
|
||||
let mut parser = naga::front::glsl::Frontend::default();
|
||||
let options = naga::front::glsl::Options {
|
||||
stage,
|
||||
defines: Default::default(),
|
||||
};
|
||||
for input in &inputs.inner {
|
||||
parser
|
||||
.parse(&options, input.string.as_deref().unwrap())
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn frontends(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("front");
|
||||
|
||||
let mut inputs_wgsl = Inputs::from_dir("../naga/tests/in", "wgsl");
|
||||
group.throughput(Throughput::Bytes(inputs_wgsl.bytes()));
|
||||
group.bench_function("shader: naga module bincode decode", |b| {
|
||||
inputs_wgsl.parse();
|
||||
|
||||
let inputs_bin = inputs_wgsl
|
||||
.inner
|
||||
.iter()
|
||||
.map(|input| bincode::serialize(&input.module.as_ref().unwrap()).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
b.iter(move || {
|
||||
for input in inputs_bin.iter() {
|
||||
bincode::deserialize::<naga::Module>(input).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("shader: wgsl-in", |b| {
|
||||
inputs_wgsl.load_utf8();
|
||||
|
||||
let mut frontend = naga::front::wgsl::Frontend::new();
|
||||
b.iter(|| {
|
||||
for input in &inputs_wgsl.inner {
|
||||
frontend.parse(input.string.as_ref().unwrap()).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
let mut inputs_spirv = Inputs::from_dir("../naga/tests/in/spv", "spv");
|
||||
group.throughput(Throughput::Bytes(inputs_spirv.bytes()));
|
||||
group.bench_function("shader: spv-in", |b| {
|
||||
inputs_spirv.load();
|
||||
|
||||
b.iter(|| {
|
||||
let options = naga::front::spv::Options::default();
|
||||
for input in &inputs_spirv.inner {
|
||||
let spv = bytemuck::cast_slice(&input.data);
|
||||
let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options);
|
||||
parser.parse().unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
let mut inputs_vertex = Inputs::from_dir("../naga/tests/in/glsl", "vert");
|
||||
let mut inputs_fragment = Inputs::from_dir("../naga/tests/in/glsl", "frag");
|
||||
// let mut inputs_compute = Inputs::from_dir("../naga/tests/in/glsl", "comp");
|
||||
group.throughput(Throughput::Bytes(
|
||||
inputs_vertex.bytes() + inputs_fragment.bytes(), // + inputs_compute.bytes()
|
||||
));
|
||||
group.bench_function("shader: glsl-in", |b| {
|
||||
inputs_vertex.load();
|
||||
inputs_vertex.load_utf8();
|
||||
inputs_fragment.load_utf8();
|
||||
// inputs_compute.load_utf8();
|
||||
|
||||
b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_vertex));
|
||||
b.iter(|| parse_glsl(naga::ShaderStage::Vertex, &inputs_fragment));
|
||||
// TODO: This one hangs for some reason
|
||||
// b.iter(move || parse_glsl(naga::ShaderStage::Compute, &inputs_compute));
|
||||
});
|
||||
}
|
||||
|
||||
fn validation(c: &mut Criterion) {
|
||||
let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl");
|
||||
|
||||
let mut group = c.benchmark_group("validate");
|
||||
group.throughput(Throughput::Bytes(inputs.bytes()));
|
||||
group.bench_function("shader: validation", |b| {
|
||||
inputs.load();
|
||||
inputs.load_utf8();
|
||||
inputs.parse();
|
||||
|
||||
let mut validator = naga::valid::Validator::new(
|
||||
naga::valid::ValidationFlags::all(),
|
||||
naga::valid::Capabilities::all(),
|
||||
);
|
||||
validator
|
||||
.subgroup_stages(naga::valid::ShaderStages::all())
|
||||
.subgroup_operations(naga::valid::SubgroupOperationSet::all());
|
||||
b.iter(|| {
|
||||
for input in &inputs.inner {
|
||||
validator.validate(input.module.as_ref().unwrap()).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.finish();
|
||||
}
|
||||
|
||||
fn backends(c: &mut Criterion) {
|
||||
let mut inputs = Inputs::from_dir("../naga/tests/in", "wgsl");
|
||||
|
||||
let mut group = c.benchmark_group("back");
|
||||
// While normally this would be done inside the bench_function callback, we need to
|
||||
// run this to properly know the size of the inputs, as any that fail validation
|
||||
// will be removed.
|
||||
inputs.validate();
|
||||
|
||||
group.throughput(Throughput::Bytes(inputs.bytes()));
|
||||
group.bench_function("shader: wgsl-out", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let flags = naga::back::wgsl::WriterFlags::empty();
|
||||
for input in &inputs.inner {
|
||||
let mut writer = naga::back::wgsl::Writer::new(&mut string, flags);
|
||||
let _ = writer.write(
|
||||
input.module.as_ref().unwrap(),
|
||||
input.module_info.as_ref().unwrap(),
|
||||
);
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("shader: spv-out", |b| {
|
||||
b.iter(|| {
|
||||
let mut data = Vec::new();
|
||||
let options = naga::back::spv::Options::default();
|
||||
for input in &inputs.inner {
|
||||
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
|
||||
let _ = writer.write(
|
||||
input.module.as_ref().unwrap(),
|
||||
input.module_info.as_ref().unwrap(),
|
||||
None,
|
||||
&None,
|
||||
&mut data,
|
||||
);
|
||||
data.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("shader: spv-out multiple entrypoints", |b| {
|
||||
b.iter(|| {
|
||||
let mut data = Vec::new();
|
||||
let options = naga::back::spv::Options::default();
|
||||
for input in &inputs.inner {
|
||||
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
|
||||
let module = input.module.as_ref().unwrap();
|
||||
for ep in module.entry_points.iter() {
|
||||
let pipeline_options = naga::back::spv::PipelineOptions {
|
||||
shader_stage: ep.stage,
|
||||
entry_point: ep.name.clone(),
|
||||
};
|
||||
let _ = writer.write(
|
||||
input.module.as_ref().unwrap(),
|
||||
input.module_info.as_ref().unwrap(),
|
||||
Some(&pipeline_options),
|
||||
&None,
|
||||
&mut data,
|
||||
);
|
||||
data.clear();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("shader: msl-out", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let options = naga::back::msl::Options::default();
|
||||
for input in &inputs.inner {
|
||||
let pipeline_options = naga::back::msl::PipelineOptions::default();
|
||||
let mut writer = naga::back::msl::Writer::new(&mut string);
|
||||
let _ = writer.write(
|
||||
input.module.as_ref().unwrap(),
|
||||
input.module_info.as_ref().unwrap(),
|
||||
&options,
|
||||
&pipeline_options,
|
||||
);
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("shader: hlsl-out", |b| {
|
||||
b.iter(|| {
|
||||
let options = naga::back::hlsl::Options::default();
|
||||
let mut string = String::new();
|
||||
for input in &inputs.inner {
|
||||
let mut writer = naga::back::hlsl::Writer::new(&mut string, &options);
|
||||
let _ = writer.write(
|
||||
input.module.as_ref().unwrap(),
|
||||
input.module_info.as_ref().unwrap(),
|
||||
); // may fail on unimplemented things
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
group.bench_function("shader: glsl-out multiple entrypoints", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let options = naga::back::glsl::Options {
|
||||
version: naga::back::glsl::Version::new_gles(320),
|
||||
writer_flags: naga::back::glsl::WriterFlags::empty(),
|
||||
binding_map: Default::default(),
|
||||
zero_initialize_workgroup_memory: true,
|
||||
};
|
||||
for input in &inputs.inner {
|
||||
let module = input.module.as_ref().unwrap();
|
||||
let info = input.module_info.as_ref().unwrap();
|
||||
for ep in module.entry_points.iter() {
|
||||
let pipeline_options = naga::back::glsl::PipelineOptions {
|
||||
shader_stage: ep.stage,
|
||||
entry_point: ep.name.clone(),
|
||||
multiview: None,
|
||||
};
|
||||
|
||||
// might be `Err` if missing features
|
||||
if let Ok(mut writer) = naga::back::glsl::Writer::new(
|
||||
&mut string,
|
||||
module,
|
||||
info,
|
||||
&options,
|
||||
&pipeline_options,
|
||||
naga::proc::BoundsCheckPolicies::default(),
|
||||
) {
|
||||
let _ = writer.write(); // might be `Err` if unsupported
|
||||
}
|
||||
|
||||
string.clear();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(shader, frontends, validation, backends);
|
@ -35,10 +35,6 @@ wgsl-out = []
|
||||
hlsl-out = []
|
||||
compact = []
|
||||
|
||||
[[bench]]
|
||||
name = "criterion"
|
||||
harness = false
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3", features = ["derive"], optional = true }
|
||||
bitflags = "2.5"
|
||||
@ -60,11 +56,7 @@ hexf-parse = { version = "0.2.1", optional = true }
|
||||
unicode-xid = { version = "0.2.3", optional = true }
|
||||
arrayvec.workspace = true
|
||||
|
||||
[target.'cfg(not(target_arch = "wasm32"))'.dev-dependencies]
|
||||
criterion = { version = "0.5", features = [] }
|
||||
|
||||
[dev-dependencies]
|
||||
bincode = "1"
|
||||
diff = "0.1"
|
||||
env_logger = "0.11"
|
||||
# This _cannot_ have a version specified. If it does, crates.io will look
|
||||
|
@ -1,273 +0,0 @@
|
||||
#![cfg(not(target_arch = "wasm32"))]
|
||||
#![allow(clippy::needless_borrowed_reference)]
|
||||
|
||||
use criterion::*;
|
||||
use std::{fs, path::PathBuf, slice};
|
||||
|
||||
fn gather_inputs(folder: &str, extension: &str) -> Vec<Box<[u8]>> {
|
||||
let mut list = Vec::new();
|
||||
let read_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
|
||||
.join(folder)
|
||||
.read_dir()
|
||||
.unwrap();
|
||||
for file_entry in read_dir {
|
||||
match file_entry {
|
||||
Ok(entry) => match entry.path().extension() {
|
||||
Some(ostr) if ostr == extension => {
|
||||
let input = fs::read(entry.path()).unwrap_or_default();
|
||||
list.push(input.into_boxed_slice());
|
||||
}
|
||||
_ => continue,
|
||||
},
|
||||
Err(e) => {
|
||||
log::warn!("Skipping file: {:?}", e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
list
|
||||
}
|
||||
|
||||
fn parse_glsl(stage: naga::ShaderStage, inputs: &[Box<[u8]>]) {
|
||||
let mut parser = naga::front::glsl::Frontend::default();
|
||||
let options = naga::front::glsl::Options {
|
||||
stage,
|
||||
defines: Default::default(),
|
||||
};
|
||||
for input in inputs.iter() {
|
||||
let string = std::str::from_utf8(input).unwrap();
|
||||
parser.parse(&options, string).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn frontends(c: &mut Criterion) {
|
||||
let mut group = c.benchmark_group("front");
|
||||
#[cfg(all(feature = "wgsl-in", feature = "serialize", feature = "deserialize"))]
|
||||
group.bench_function("bin", |b| {
|
||||
let inputs_wgsl = gather_inputs("tests/in", "wgsl");
|
||||
let mut frontend = naga::front::wgsl::Frontend::new();
|
||||
let inputs_bin = inputs_wgsl
|
||||
.iter()
|
||||
.map(|input| {
|
||||
let string = std::str::from_utf8(input).unwrap();
|
||||
let module = frontend.parse(string).unwrap();
|
||||
bincode::serialize(&module).unwrap()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
b.iter(move || {
|
||||
for input in inputs_bin.iter() {
|
||||
bincode::deserialize::<naga::Module>(input).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
#[cfg(feature = "wgsl-in")]
|
||||
group.bench_function("wgsl", |b| {
|
||||
let inputs_wgsl = gather_inputs("tests/in", "wgsl");
|
||||
let inputs = inputs_wgsl
|
||||
.iter()
|
||||
.map(|input| std::str::from_utf8(input).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
let mut frontend = naga::front::wgsl::Frontend::new();
|
||||
b.iter(move || {
|
||||
for &input in inputs.iter() {
|
||||
frontend.parse(input).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
#[cfg(feature = "spv-in")]
|
||||
group.bench_function("spv", |b| {
|
||||
let inputs = gather_inputs("tests/in/spv", "spv");
|
||||
b.iter(move || {
|
||||
let options = naga::front::spv::Options::default();
|
||||
for input in inputs.iter() {
|
||||
let spv =
|
||||
unsafe { slice::from_raw_parts(input.as_ptr() as *const u32, input.len() / 4) };
|
||||
let parser = naga::front::spv::Frontend::new(spv.iter().cloned(), &options);
|
||||
parser.parse().unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
#[cfg(feature = "glsl-in")]
|
||||
group.bench_function("glsl", |b| {
|
||||
let vert = gather_inputs("tests/in/glsl", "vert");
|
||||
b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &vert));
|
||||
let frag = gather_inputs("tests/in/glsl", "frag");
|
||||
b.iter(move || parse_glsl(naga::ShaderStage::Vertex, &frag));
|
||||
//TODO: hangs for some reason!
|
||||
//let comp = gather_inputs("tests/in/glsl", "comp");
|
||||
//b.iter(move || parse_glsl(naga::ShaderStage::Compute, &comp));
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(feature = "wgsl-in")]
|
||||
fn gather_modules() -> Vec<naga::Module> {
|
||||
let inputs = gather_inputs("tests/in", "wgsl");
|
||||
let mut frontend = naga::front::wgsl::Frontend::new();
|
||||
inputs
|
||||
.iter()
|
||||
.map(|input| {
|
||||
let string = std::str::from_utf8(input).unwrap();
|
||||
frontend.parse(string).unwrap()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
#[cfg(not(feature = "wgsl-in"))]
|
||||
fn gather_modules() -> Vec<naga::Module> {
|
||||
Vec::new()
|
||||
}
|
||||
|
||||
fn validation(c: &mut Criterion) {
|
||||
let inputs = gather_modules();
|
||||
let mut group = c.benchmark_group("valid");
|
||||
group.bench_function("safe", |b| {
|
||||
let mut validator = naga::valid::Validator::new(
|
||||
naga::valid::ValidationFlags::all(),
|
||||
naga::valid::Capabilities::all(),
|
||||
);
|
||||
b.iter(|| {
|
||||
for input in inputs.iter() {
|
||||
validator.validate(input).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
group.bench_function("unsafe", |b| {
|
||||
let mut validator = naga::valid::Validator::new(
|
||||
naga::valid::ValidationFlags::empty(),
|
||||
naga::valid::Capabilities::all(),
|
||||
);
|
||||
b.iter(|| {
|
||||
for input in inputs.iter() {
|
||||
validator.validate(input).unwrap();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
fn backends(c: &mut Criterion) {
|
||||
let inputs = {
|
||||
let mut validator = naga::valid::Validator::new(
|
||||
naga::valid::ValidationFlags::empty(),
|
||||
naga::valid::Capabilities::default(),
|
||||
);
|
||||
let input_modules = gather_modules();
|
||||
input_modules
|
||||
.into_iter()
|
||||
.flat_map(|module| validator.validate(&module).ok().map(|info| (module, info)))
|
||||
.collect::<Vec<_>>()
|
||||
};
|
||||
|
||||
let mut group = c.benchmark_group("back");
|
||||
#[cfg(feature = "wgsl-out")]
|
||||
group.bench_function("wgsl", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let flags = naga::back::wgsl::WriterFlags::empty();
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
let mut writer = naga::back::wgsl::Writer::new(&mut string, flags);
|
||||
writer.write(module, info).unwrap();
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#[cfg(feature = "spv-out")]
|
||||
group.bench_function("spv", |b| {
|
||||
b.iter(|| {
|
||||
let mut data = Vec::new();
|
||||
let options = naga::back::spv::Options::default();
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
|
||||
writer.write(module, info, None, &None, &mut data).unwrap();
|
||||
data.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
#[cfg(feature = "spv-out")]
|
||||
group.bench_function("spv-separate", |b| {
|
||||
b.iter(|| {
|
||||
let mut data = Vec::new();
|
||||
let options = naga::back::spv::Options::default();
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
let mut writer = naga::back::spv::Writer::new(&options).unwrap();
|
||||
for ep in module.entry_points.iter() {
|
||||
let pipeline_options = naga::back::spv::PipelineOptions {
|
||||
shader_stage: ep.stage,
|
||||
entry_point: ep.name.clone(),
|
||||
};
|
||||
writer
|
||||
.write(module, info, Some(&pipeline_options), &None, &mut data)
|
||||
.unwrap();
|
||||
data.clear();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#[cfg(feature = "msl-out")]
|
||||
group.bench_function("msl", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let options = naga::back::msl::Options::default();
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
let pipeline_options = naga::back::msl::PipelineOptions::default();
|
||||
let mut writer = naga::back::msl::Writer::new(&mut string);
|
||||
writer
|
||||
.write(module, info, &options, &pipeline_options)
|
||||
.unwrap();
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#[cfg(feature = "hlsl-out")]
|
||||
group.bench_function("hlsl", |b| {
|
||||
b.iter(|| {
|
||||
let options = naga::back::hlsl::Options::default();
|
||||
let mut string = String::new();
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
let mut writer = naga::back::hlsl::Writer::new(&mut string, &options);
|
||||
let _ = writer.write(module, info); // may fail on unimplemented things
|
||||
string.clear();
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
#[cfg(feature = "glsl-out")]
|
||||
group.bench_function("glsl-separate", |b| {
|
||||
b.iter(|| {
|
||||
let mut string = String::new();
|
||||
let options = naga::back::glsl::Options {
|
||||
version: naga::back::glsl::Version::new_gles(320),
|
||||
writer_flags: naga::back::glsl::WriterFlags::empty(),
|
||||
binding_map: Default::default(),
|
||||
zero_initialize_workgroup_memory: true,
|
||||
};
|
||||
for &(ref module, ref info) in inputs.iter() {
|
||||
for ep in module.entry_points.iter() {
|
||||
let pipeline_options = naga::back::glsl::PipelineOptions {
|
||||
shader_stage: ep.stage,
|
||||
entry_point: ep.name.clone(),
|
||||
multiview: None,
|
||||
};
|
||||
|
||||
// might be `Err` if missing features
|
||||
if let Ok(mut writer) = naga::back::glsl::Writer::new(
|
||||
&mut string,
|
||||
module,
|
||||
info,
|
||||
&options,
|
||||
&pipeline_options,
|
||||
naga::proc::BoundsCheckPolicies::default(),
|
||||
) {
|
||||
let _ = writer.write(); // might be `Err` if unsupported
|
||||
}
|
||||
|
||||
string.clear();
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
criterion_group!(criterion, frontends, validation, backends,);
|
||||
criterion_main!(criterion);
|
@ -21,23 +21,27 @@ features = ["arbitrary", "spv-in", "wgsl-in", "glsl-in"]
|
||||
[[bin]]
|
||||
name = "spv_parser"
|
||||
path = "fuzz_targets/spv_parser.rs"
|
||||
bench = false
|
||||
test = false
|
||||
doc = false
|
||||
|
||||
[[bin]]
|
||||
name = "wgsl_parser"
|
||||
path = "fuzz_targets/wgsl_parser.rs"
|
||||
bench = false
|
||||
test = false
|
||||
doc = false
|
||||
|
||||
[[bin]]
|
||||
name = "glsl_parser"
|
||||
path = "fuzz_targets/glsl_parser.rs"
|
||||
bench = false
|
||||
test = false
|
||||
doc = false
|
||||
|
||||
[[bin]]
|
||||
name = "ir"
|
||||
path = "fuzz_targets/ir.rs"
|
||||
bench = false
|
||||
test = false
|
||||
doc = false
|
||||
|
@ -1044,7 +1044,12 @@ impl<'a, W: Write> super::Writer<'a, W> {
|
||||
crate::Expression::GlobalVariable(var_handle) => {
|
||||
&module.global_variables[var_handle]
|
||||
}
|
||||
ref other => unreachable!("Array length of base {:?}", other),
|
||||
ref other => {
|
||||
return Err(super::Error::Unimplemented(format!(
|
||||
"Array length of base {:?}",
|
||||
other
|
||||
)))
|
||||
}
|
||||
};
|
||||
let storage_access = match global_var.space {
|
||||
crate::AddressSpace::Storage { access } => access,
|
||||
|
@ -172,6 +172,8 @@ impl<A: HalApi> BakedCommands<A> {
|
||||
device_tracker: &mut Tracker<A>,
|
||||
snatch_guard: &SnatchGuard<'_>,
|
||||
) -> Result<(), DestroyedBufferError> {
|
||||
profiling::scope!("initialize_buffer_memory");
|
||||
|
||||
// Gather init ranges for each buffer so we can collapse them.
|
||||
// It is not possible to do this at an earlier point since previously
|
||||
// executed command buffer change the resource init state.
|
||||
@ -276,6 +278,8 @@ impl<A: HalApi> BakedCommands<A> {
|
||||
device: &Device<A>,
|
||||
snatch_guard: &SnatchGuard<'_>,
|
||||
) -> Result<(), DestroyedTextureError> {
|
||||
profiling::scope!("initialize_texture_memory");
|
||||
|
||||
let mut ranges: Vec<TextureInitRange> = Vec::new();
|
||||
for texture_use in self.texture_memory_actions.drain_init_actions() {
|
||||
let mut initialization_status = texture_use.texture.initialization_status.write();
|
||||
|
@ -32,7 +32,9 @@ pub const SHADER_STAGE_COUNT: usize = hal::MAX_CONCURRENT_SHADER_STAGES;
|
||||
// value is enough for a 16k texture with float4 format.
|
||||
pub(crate) const ZERO_BUFFER_SIZE: BufferAddress = 512 << 10;
|
||||
|
||||
const CLEANUP_WAIT_MS: u32 = 5000;
|
||||
// If a submission is not completed within this time, we go off into UB land.
|
||||
// See https://github.com/gfx-rs/wgpu/issues/4589. 60s to reduce the chances of this.
|
||||
const CLEANUP_WAIT_MS: u32 = 60000;
|
||||
|
||||
const IMPLICIT_BIND_GROUP_LAYOUT_ERROR_LABEL: &str = "Implicit BindGroupLayout in the Error State";
|
||||
const ENTRYPOINT_FAILURE_ERROR: &str = "The given EntryPoint is Invalid";
|
||||
|
@ -1186,6 +1186,8 @@ impl Global {
|
||||
|
||||
// finish all the command buffers first
|
||||
for &cmb_id in command_buffer_ids {
|
||||
profiling::scope!("process command buffer");
|
||||
|
||||
// we reset the used surface textures every time we use
|
||||
// it, so make sure to set_size on it.
|
||||
used_surface_textures.set_size(device.tracker_indices.textures.size());
|
||||
@ -1222,13 +1224,15 @@ impl Global {
|
||||
continue;
|
||||
}
|
||||
|
||||
// optimize the tracked states
|
||||
// cmdbuf.trackers.optimize();
|
||||
{
|
||||
profiling::scope!("update submission ids");
|
||||
|
||||
let cmd_buf_data = cmdbuf.data.lock();
|
||||
let cmd_buf_trackers = &cmd_buf_data.as_ref().unwrap().trackers;
|
||||
|
||||
// update submission IDs
|
||||
{
|
||||
profiling::scope!("buffers");
|
||||
for buffer in cmd_buf_trackers.buffers.used_resources() {
|
||||
if buffer.raw.get(&snatch_guard).is_none() {
|
||||
return Err(QueueSubmitError::DestroyedBuffer(
|
||||
@ -1246,6 +1250,9 @@ impl Global {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!("textures");
|
||||
for texture in cmd_buf_trackers.textures.used_resources() {
|
||||
let should_extend = match texture.inner.get(&snatch_guard) {
|
||||
None => {
|
||||
@ -1266,15 +1273,24 @@ impl Global {
|
||||
if should_extend {
|
||||
unsafe {
|
||||
used_surface_textures
|
||||
.merge_single(&texture, None, hal::TextureUses::PRESENT)
|
||||
.merge_single(
|
||||
&texture,
|
||||
None,
|
||||
hal::TextureUses::PRESENT,
|
||||
)
|
||||
.unwrap();
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!("views");
|
||||
for texture_view in cmd_buf_trackers.views.used_resources() {
|
||||
texture_view.info.use_at(submit_index);
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!("bind groups (+ referenced views/samplers)");
|
||||
for bg in cmd_buf_trackers.bind_groups.used_resources() {
|
||||
bg.info.use_at(submit_index);
|
||||
// We need to update the submission indices for the contained
|
||||
@ -1288,20 +1304,32 @@ impl Global {
|
||||
}
|
||||
}
|
||||
}
|
||||
// assert!(cmd_buf_trackers.samplers.is_empty());
|
||||
{
|
||||
profiling::scope!("compute pipelines");
|
||||
for compute_pipeline in
|
||||
cmd_buf_trackers.compute_pipelines.used_resources()
|
||||
{
|
||||
compute_pipeline.info.use_at(submit_index);
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!("render pipelines");
|
||||
for render_pipeline in
|
||||
cmd_buf_trackers.render_pipelines.used_resources()
|
||||
{
|
||||
render_pipeline.info.use_at(submit_index);
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!("query sets");
|
||||
for query_set in cmd_buf_trackers.query_sets.used_resources() {
|
||||
query_set.info.use_at(submit_index);
|
||||
}
|
||||
}
|
||||
{
|
||||
profiling::scope!(
|
||||
"render bundles (+ referenced pipelines/query sets)"
|
||||
);
|
||||
for bundle in cmd_buf_trackers.bundles.used_resources() {
|
||||
bundle.info.use_at(submit_index);
|
||||
// We need to update the submission indices for the contained
|
||||
@ -1312,12 +1340,15 @@ impl Global {
|
||||
{
|
||||
render_pipeline.info.use_at(submit_index);
|
||||
}
|
||||
for query_set in bundle.used.query_sets.read().used_resources() {
|
||||
for query_set in bundle.used.query_sets.read().used_resources()
|
||||
{
|
||||
query_set.info.use_at(submit_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mut baked = cmdbuf.from_arc_into_baked();
|
||||
|
||||
// execute resource transitions
|
||||
unsafe {
|
||||
baked
|
||||
@ -1385,6 +1416,13 @@ impl Global {
|
||||
raw: baked.encoder,
|
||||
cmd_buffers: baked.list,
|
||||
});
|
||||
|
||||
{
|
||||
// This involves actually decrementing the ref count of all command buffer
|
||||
// resources, so can be _very_ expensive.
|
||||
profiling::scope!("drop command buffer trackers");
|
||||
drop(baked.trackers);
|
||||
}
|
||||
}
|
||||
|
||||
log::trace!("Device after submission {}", submit_index);
|
||||
|
@ -84,9 +84,6 @@ naga-ir = ["dep:naga"]
|
||||
## to the validation carried out at public APIs in all builds.
|
||||
strict_asserts = ["wgc?/strict_asserts", "wgt/strict_asserts"]
|
||||
|
||||
## Log all API entry points at info instead of trace level.
|
||||
api_log_info = ["wgc/api_log_info"]
|
||||
|
||||
## Enables serialization via `serde` on common wgpu types.
|
||||
serde = ["dep:serde", "wgc/serde"]
|
||||
|
||||
|
@ -13,11 +13,21 @@ Usage: xtask <COMMAND>
|
||||
|
||||
Commands:
|
||||
run-wasm
|
||||
Build and run web examples
|
||||
|
||||
--release Build in release mode
|
||||
--no-serve Just build the generated files, don't serve them
|
||||
|
||||
test
|
||||
Run tests
|
||||
|
||||
--llvm-cov Run tests with LLVM code coverage using the llvm-cov tool
|
||||
--list List all of the tests and their executables without running them
|
||||
--retries Number of times to retry failing tests
|
||||
|
||||
vendor-web-sys
|
||||
Re-vendor the WebGPU web-sys bindings.
|
||||
|
||||
--no-cleanup Don't clean up temporary checkout of wasm-bindgen
|
||||
One of:
|
||||
--path-to-checkout Path to a local checkout of wasm-bindgen to generate bindings from.
|
||||
|
@ -5,7 +5,7 @@ use xshell::Shell;
|
||||
|
||||
use crate::util::{check_all_programs, Program};
|
||||
|
||||
pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> Result<(), anyhow::Error> {
|
||||
pub(crate) fn run_wasm(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
|
||||
let no_serve = args.contains("--no-serve");
|
||||
let release = args.contains("--release");
|
||||
|
||||
|
@ -4,6 +4,12 @@ use xshell::Shell;
|
||||
|
||||
pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
|
||||
let llvm_cov = args.contains("--llvm-cov");
|
||||
let list = args.contains("--list");
|
||||
let retries = args
|
||||
.opt_value_from_str("--retries")?
|
||||
.unwrap_or(0_u32)
|
||||
.to_string();
|
||||
|
||||
// These needs to match the command in "run wgpu-info" in `.github/workflows/ci.yml`
|
||||
let llvm_cov_flags: &[_] = if llvm_cov {
|
||||
&["llvm-cov", "--no-cfg-coverage", "--no-report"]
|
||||
@ -12,16 +18,28 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
|
||||
};
|
||||
let llvm_cov_nextest_flags: &[_] = if llvm_cov {
|
||||
&["llvm-cov", "--no-cfg-coverage", "--no-report", "nextest"]
|
||||
} else {
|
||||
if list {
|
||||
&["nextest", "list"]
|
||||
} else {
|
||||
&["nextest", "run"]
|
||||
}
|
||||
};
|
||||
|
||||
log::info!("Generating .gpuconfig file based on gpus on the system");
|
||||
|
||||
xshell::cmd!(
|
||||
shell,
|
||||
"cargo {llvm_cov_flags...} run --bin wgpu-info -- --json -o .gpuconfig"
|
||||
)
|
||||
shell
|
||||
.cmd("cargo")
|
||||
.args(llvm_cov_flags)
|
||||
.args([
|
||||
"run",
|
||||
"--bin",
|
||||
"wgpu-info",
|
||||
"--",
|
||||
"--json",
|
||||
"-o",
|
||||
".gpuconfig",
|
||||
])
|
||||
.quiet()
|
||||
.run()
|
||||
.context("Failed to run wgpu-info to generate .gpuconfig")?;
|
||||
@ -39,12 +57,30 @@ pub fn run_tests(shell: Shell, mut args: Arguments) -> anyhow::Result<()> {
|
||||
if gpu_count == 1 { "" } else { "s" }
|
||||
);
|
||||
|
||||
if list {
|
||||
log::info!("Listing tests");
|
||||
shell
|
||||
.cmd("cargo")
|
||||
.args(llvm_cov_nextest_flags)
|
||||
.args(["-v", "--benches", "--tests", "--all-features"])
|
||||
.args(args.finish())
|
||||
.run()
|
||||
.context("Failed to list tests")?;
|
||||
return Ok(());
|
||||
}
|
||||
log::info!("Running cargo tests");
|
||||
|
||||
xshell::cmd!(
|
||||
shell,
|
||||
"cargo {llvm_cov_nextest_flags...} --all-features --no-fail-fast --retries 2"
|
||||
)
|
||||
shell
|
||||
.cmd("cargo")
|
||||
.args(llvm_cov_nextest_flags)
|
||||
.args([
|
||||
"--benches",
|
||||
"--tests",
|
||||
"--no-fail-fast",
|
||||
"--all-features",
|
||||
"--retries",
|
||||
&retries,
|
||||
])
|
||||
.args(args.finish())
|
||||
.quiet()
|
||||
.run()
|
||||
|
@ -1,15 +1,15 @@
|
||||
use std::{io, process::Command};
|
||||
|
||||
pub(crate) struct Program {
|
||||
pub binary_name: &'static str,
|
||||
pub crate_name: &'static str,
|
||||
pub binary_name: &'static str,
|
||||
}
|
||||
|
||||
pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
|
||||
let mut failed = Vec::new();
|
||||
for Program {
|
||||
binary_name,
|
||||
let mut failed_crates = Vec::new();
|
||||
for &Program {
|
||||
crate_name,
|
||||
binary_name,
|
||||
} in programs
|
||||
{
|
||||
let mut cmd = Command::new(binary_name);
|
||||
@ -21,7 +21,7 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
|
||||
}
|
||||
Err(e) if matches!(e.kind(), io::ErrorKind::NotFound) => {
|
||||
log::error!("Checking for {binary_name} in PATH: ❌");
|
||||
failed.push(*crate_name);
|
||||
failed_crates.push(crate_name);
|
||||
}
|
||||
Err(e) => {
|
||||
log::error!("Checking for {binary_name} in PATH: ❌");
|
||||
@ -30,12 +30,13 @@ pub(crate) fn check_all_programs(programs: &[Program]) -> anyhow::Result<()> {
|
||||
}
|
||||
}
|
||||
|
||||
if !failed.is_empty() {
|
||||
if !failed_crates.is_empty() {
|
||||
log::error!(
|
||||
"Please install them with: cargo install {}",
|
||||
failed.join(" ")
|
||||
failed_crates.join(" ")
|
||||
);
|
||||
anyhow::bail!("Missing programs in PATH");
|
||||
|
||||
anyhow::bail!("Missing required programs");
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
Loading…
Reference in New Issue
Block a user