mirror of
https://github.com/gfx-rs/wgpu.git
synced 2024-11-25 00:03:29 +00:00
Reduce the number of iterations in benchmarks to a small number when running on CI
This commit is contained in:
parent
7446790354
commit
86507f33cd
1
.github/workflows/ci.yml
vendored
1
.github/workflows/ci.yml
vendored
@ -59,6 +59,7 @@ env:
|
|||||||
RUSTDOCFLAGS: -D warnings
|
RUSTDOCFLAGS: -D warnings
|
||||||
WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
|
WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
|
||||||
CACHE_SUFFIX: c # cache busting
|
CACHE_SUFFIX: c # cache busting
|
||||||
|
WGPU_TESTING: true
|
||||||
|
|
||||||
# We distinguish the following kinds of builds:
|
# We distinguish the following kinds of builds:
|
||||||
# - native: build for the same target as we compile on
|
# - native: build for the same target as we compile on
|
||||||
|
@ -10,24 +10,36 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
|||||||
|
|
||||||
use crate::DeviceState;
|
use crate::DeviceState;
|
||||||
|
|
||||||
const DISPATCH_COUNT: usize = 10_000;
|
fn dispatch_count() -> usize {
|
||||||
|
// On CI we only want to run a very lightweight version of the benchmark
|
||||||
|
// to ensure that it does not break.
|
||||||
|
if std::env::var("WGPU_TESTING").is_ok() {
|
||||||
|
8
|
||||||
|
} else {
|
||||||
|
10_000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Currently bindless is _much_ slower than with regularly resources,
|
// Currently bindless is _much_ slower than with regularly resources,
|
||||||
// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
|
// since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
|
||||||
// This is in fact so slow that it makes the benchmark unusable when we use the same amount of
|
// This is in fact so slow that it makes the benchmark unusable when we use the same amount of
|
||||||
// resources as the regular benchmark.
|
// resources as the regular benchmark.
|
||||||
// For details see https://github.com/gfx-rs/wgpu/issues/5766
|
// For details see https://github.com/gfx-rs/wgpu/issues/5766
|
||||||
const DISPATCH_COUNT_BINDLESS: usize = 1_000;
|
fn dispatch_count_bindless() -> usize {
|
||||||
|
// On CI we only want to run a very lightweight version of the benchmark
|
||||||
|
// to ensure that it does not break.
|
||||||
|
if std::env::var("WGPU_TESTING").is_ok() {
|
||||||
|
8
|
||||||
|
} else {
|
||||||
|
1_000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Must match the number of textures in the computepass.wgsl shader
|
// Must match the number of textures in the computepass.wgsl shader
|
||||||
const TEXTURES_PER_DISPATCH: usize = 2;
|
const TEXTURES_PER_DISPATCH: usize = 2;
|
||||||
const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
|
const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
|
||||||
const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
|
const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
|
||||||
|
|
||||||
const TEXTURE_COUNT: usize = DISPATCH_COUNT * TEXTURES_PER_DISPATCH;
|
|
||||||
const STORAGE_TEXTURE_COUNT: usize = DISPATCH_COUNT * STORAGE_TEXTURES_PER_DISPATCH;
|
|
||||||
const STORAGE_BUFFER_COUNT: usize = DISPATCH_COUNT * STORAGE_BUFFERS_PER_DISPATCH;
|
|
||||||
|
|
||||||
const BUFFER_SIZE: u64 = 16;
|
const BUFFER_SIZE: u64 = 16;
|
||||||
|
|
||||||
struct ComputepassState {
|
struct ComputepassState {
|
||||||
@ -45,6 +57,12 @@ impl ComputepassState {
|
|||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
let device_state = DeviceState::new();
|
let device_state = DeviceState::new();
|
||||||
|
|
||||||
|
let dispatch_count = dispatch_count();
|
||||||
|
let dispatch_count_bindless = dispatch_count_bindless();
|
||||||
|
let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
|
||||||
|
let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
|
||||||
|
let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
|
||||||
|
|
||||||
let supports_bindless = device_state.device.features().contains(
|
let supports_bindless = device_state.device.features().contains(
|
||||||
wgpu::Features::BUFFER_BINDING_ARRAY
|
wgpu::Features::BUFFER_BINDING_ARRAY
|
||||||
| wgpu::Features::TEXTURE_BINDING_ARRAY
|
| wgpu::Features::TEXTURE_BINDING_ARRAY
|
||||||
@ -106,8 +124,8 @@ impl ComputepassState {
|
|||||||
entries: &bind_group_layout_entries,
|
entries: &bind_group_layout_entries,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
|
let mut texture_views = Vec::with_capacity(texture_count);
|
||||||
for i in 0..TEXTURE_COUNT {
|
for i in 0..texture_count {
|
||||||
let texture = device_state
|
let texture = device_state
|
||||||
.device
|
.device
|
||||||
.create_texture(&wgpu::TextureDescriptor {
|
.create_texture(&wgpu::TextureDescriptor {
|
||||||
@ -132,8 +150,8 @@ impl ComputepassState {
|
|||||||
random.shuffle(&mut texture_views);
|
random.shuffle(&mut texture_views);
|
||||||
let texture_view_refs: Vec<_> = texture_views.iter().collect();
|
let texture_view_refs: Vec<_> = texture_views.iter().collect();
|
||||||
|
|
||||||
let mut storage_texture_views = Vec::with_capacity(STORAGE_TEXTURE_COUNT);
|
let mut storage_texture_views = Vec::with_capacity(storage_texture_count);
|
||||||
for i in 0..TEXTURE_COUNT {
|
for i in 0..storage_texture_count {
|
||||||
let texture = device_state
|
let texture = device_state
|
||||||
.device
|
.device
|
||||||
.create_texture(&wgpu::TextureDescriptor {
|
.create_texture(&wgpu::TextureDescriptor {
|
||||||
@ -158,8 +176,8 @@ impl ComputepassState {
|
|||||||
random.shuffle(&mut storage_texture_views);
|
random.shuffle(&mut storage_texture_views);
|
||||||
let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
|
let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
|
||||||
|
|
||||||
let mut storage_buffers = Vec::with_capacity(STORAGE_BUFFER_COUNT);
|
let mut storage_buffers = Vec::with_capacity(storage_buffer_count);
|
||||||
for i in 0..STORAGE_BUFFER_COUNT {
|
for i in 0..storage_buffer_count {
|
||||||
storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
label: Some(&format!("Buffer {i}")),
|
label: Some(&format!("Buffer {i}")),
|
||||||
size: BUFFER_SIZE,
|
size: BUFFER_SIZE,
|
||||||
@ -173,8 +191,8 @@ impl ComputepassState {
|
|||||||
.map(|b| b.as_entire_buffer_binding())
|
.map(|b| b.as_entire_buffer_binding())
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let mut bind_groups = Vec::with_capacity(DISPATCH_COUNT);
|
let mut bind_groups = Vec::with_capacity(dispatch_count);
|
||||||
for dispatch_idx in 0..DISPATCH_COUNT {
|
for dispatch_idx in 0..dispatch_count {
|
||||||
let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
|
let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
|
||||||
for tex_idx in 0..TEXTURES_PER_DISPATCH {
|
for tex_idx in 0..TEXTURES_PER_DISPATCH {
|
||||||
entries.push(wgpu::BindGroupEntry {
|
entries.push(wgpu::BindGroupEntry {
|
||||||
@ -258,7 +276,7 @@ impl ComputepassState {
|
|||||||
view_dimension: wgpu::TextureViewDimension::D2,
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
multisampled: false,
|
multisampled: false,
|
||||||
},
|
},
|
||||||
count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
|
count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
|
||||||
},
|
},
|
||||||
wgpu::BindGroupLayoutEntry {
|
wgpu::BindGroupLayoutEntry {
|
||||||
binding: 1,
|
binding: 1,
|
||||||
@ -268,7 +286,7 @@ impl ComputepassState {
|
|||||||
format: wgpu::TextureFormat::R32Float,
|
format: wgpu::TextureFormat::R32Float,
|
||||||
view_dimension: wgpu::TextureViewDimension::D2,
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
},
|
},
|
||||||
count: Some(NonZeroU32::new(STORAGE_TEXTURE_COUNT as u32).unwrap()),
|
count: Some(NonZeroU32::new(storage_texture_count as u32).unwrap()),
|
||||||
},
|
},
|
||||||
wgpu::BindGroupLayoutEntry {
|
wgpu::BindGroupLayoutEntry {
|
||||||
binding: 2,
|
binding: 2,
|
||||||
@ -278,7 +296,7 @@ impl ComputepassState {
|
|||||||
has_dynamic_offset: false,
|
has_dynamic_offset: false,
|
||||||
min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
|
min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
|
||||||
},
|
},
|
||||||
count: Some(NonZeroU32::new(STORAGE_BUFFER_COUNT as u32).unwrap()),
|
count: Some(NonZeroU32::new(storage_buffer_count as u32).unwrap()),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
});
|
});
|
||||||
@ -293,19 +311,19 @@ impl ComputepassState {
|
|||||||
wgpu::BindGroupEntry {
|
wgpu::BindGroupEntry {
|
||||||
binding: 0,
|
binding: 0,
|
||||||
resource: wgpu::BindingResource::TextureViewArray(
|
resource: wgpu::BindingResource::TextureViewArray(
|
||||||
&texture_view_refs[..DISPATCH_COUNT_BINDLESS],
|
&texture_view_refs[..dispatch_count_bindless],
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
wgpu::BindGroupEntry {
|
wgpu::BindGroupEntry {
|
||||||
binding: 1,
|
binding: 1,
|
||||||
resource: wgpu::BindingResource::TextureViewArray(
|
resource: wgpu::BindingResource::TextureViewArray(
|
||||||
&storage_texture_view_refs[..DISPATCH_COUNT_BINDLESS],
|
&storage_texture_view_refs[..dispatch_count_bindless],
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
wgpu::BindGroupEntry {
|
wgpu::BindGroupEntry {
|
||||||
binding: 2,
|
binding: 2,
|
||||||
resource: wgpu::BindingResource::BufferArray(
|
resource: wgpu::BindingResource::BufferArray(
|
||||||
&storage_buffer_bindings[..DISPATCH_COUNT_BINDLESS],
|
&storage_buffer_bindings[..dispatch_count_bindless],
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
@ -354,7 +372,8 @@ impl ComputepassState {
|
|||||||
fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
|
fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
|
||||||
profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
|
profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
|
||||||
|
|
||||||
let dispatch_per_pass = DISPATCH_COUNT / total_passes;
|
let dispatch_count = dispatch_count();
|
||||||
|
let dispatch_per_pass = dispatch_count / total_passes;
|
||||||
|
|
||||||
let mut encoder = self
|
let mut encoder = self
|
||||||
.device_state
|
.device_state
|
||||||
@ -379,7 +398,7 @@ impl ComputepassState {
|
|||||||
encoder.finish()
|
encoder.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
|
fn run_bindless_pass(&self, dispatch_count_bindless: usize) -> wgpu::CommandBuffer {
|
||||||
profiling::scope!("Bindless Computepass");
|
profiling::scope!("Bindless Computepass");
|
||||||
|
|
||||||
let mut encoder = self
|
let mut encoder = self
|
||||||
@ -394,7 +413,7 @@ impl ComputepassState {
|
|||||||
|
|
||||||
compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
|
compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
|
||||||
compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
|
compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
|
||||||
for _ in 0..DISPATCH_COUNT_BINDLESS {
|
for _ in 0..dispatch_count_bindless {
|
||||||
compute_pass.dispatch_workgroups(1, 1, 1);
|
compute_pass.dispatch_workgroups(1, 1, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -407,13 +426,19 @@ impl ComputepassState {
|
|||||||
fn run_bench(ctx: &mut Criterion) {
|
fn run_bench(ctx: &mut Criterion) {
|
||||||
let state = Lazy::new(ComputepassState::new);
|
let state = Lazy::new(ComputepassState::new);
|
||||||
|
|
||||||
|
let dispatch_count = dispatch_count();
|
||||||
|
let dispatch_count_bindless = dispatch_count_bindless();
|
||||||
|
let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
|
||||||
|
let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
|
||||||
|
let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
|
||||||
|
|
||||||
// Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
|
// Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
|
||||||
let mut group = ctx.benchmark_group("Computepass: Single Threaded");
|
let mut group = ctx.benchmark_group("Computepass: Single Threaded");
|
||||||
group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
|
group.throughput(Throughput::Elements(dispatch_count as _));
|
||||||
|
|
||||||
for time_submit in [false, true] {
|
for time_submit in [false, true] {
|
||||||
for cpasses in [1, 2, 4, 8] {
|
for cpasses in [1, 2, 4, 8] {
|
||||||
let dispatch_per_pass = DISPATCH_COUNT / cpasses;
|
let dispatch_per_pass = dispatch_count / cpasses;
|
||||||
|
|
||||||
let label = if time_submit {
|
let label = if time_submit {
|
||||||
"Submit Time"
|
"Submit Time"
|
||||||
@ -466,10 +491,10 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
// Test 10k dispatch calls split up over 2, 4, and 8 threads.
|
// Test 10k dispatch calls split up over 2, 4, and 8 threads.
|
||||||
let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
|
let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
|
||||||
group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
|
group.throughput(Throughput::Elements(dispatch_count as _));
|
||||||
|
|
||||||
for threads in [2, 4, 8] {
|
for threads in [2, 4, 8] {
|
||||||
let dispatch_per_pass = DISPATCH_COUNT / threads;
|
let dispatch_per_pass = dispatch_count / threads;
|
||||||
group.bench_function(
|
group.bench_function(
|
||||||
&format!("{threads} threads x {dispatch_per_pass} dispatch"),
|
&format!("{threads} threads x {dispatch_per_pass} dispatch"),
|
||||||
|b| {
|
|b| {
|
||||||
@ -510,9 +535,9 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
// Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
|
// Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
|
||||||
let mut group = ctx.benchmark_group("Computepass: Bindless");
|
let mut group = ctx.benchmark_group("Computepass: Bindless");
|
||||||
group.throughput(Throughput::Elements(DISPATCH_COUNT_BINDLESS as _));
|
group.throughput(Throughput::Elements(dispatch_count_bindless as _));
|
||||||
|
|
||||||
group.bench_function(&format!("{DISPATCH_COUNT_BINDLESS} dispatch"), |b| {
|
group.bench_function(&format!("{dispatch_count_bindless} dispatch"), |b| {
|
||||||
Lazy::force(&state);
|
Lazy::force(&state);
|
||||||
|
|
||||||
b.iter_custom(|iters| {
|
b.iter_custom(|iters| {
|
||||||
@ -535,7 +560,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let buffer = state.run_bindless_pass();
|
let buffer = state.run_bindless_pass(dispatch_count_bindless);
|
||||||
|
|
||||||
duration += start.elapsed();
|
duration += start.elapsed();
|
||||||
|
|
||||||
@ -551,7 +576,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
ctx.bench_function(
|
ctx.bench_function(
|
||||||
&format!(
|
&format!(
|
||||||
"Computepass: Empty Submit with {} Resources",
|
"Computepass: Empty Submit with {} Resources",
|
||||||
TEXTURE_COUNT + STORAGE_TEXTURE_COUNT + STORAGE_BUFFER_COUNT
|
texture_count + storage_texture_count + storage_buffer_count
|
||||||
),
|
),
|
||||||
|b| {
|
|b| {
|
||||||
Lazy::force(&state);
|
Lazy::force(&state);
|
||||||
|
@ -10,14 +10,19 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
|||||||
|
|
||||||
use crate::DeviceState;
|
use crate::DeviceState;
|
||||||
|
|
||||||
const DRAW_COUNT: usize = 10_000;
|
fn draw_count() -> usize {
|
||||||
|
// On CI we only want to run a very lightweight version of the benchmark
|
||||||
|
// to ensure that it does not break.
|
||||||
|
if std::env::var("WGPU_TESTING").is_ok() {
|
||||||
|
8
|
||||||
|
} else {
|
||||||
|
10_000
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Must match the number of textures in the renderpass.wgsl shader
|
// Must match the number of textures in the renderpass.wgsl shader
|
||||||
const TEXTURES_PER_DRAW: usize = 7;
|
const TEXTURES_PER_DRAW: usize = 7;
|
||||||
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
|
const VERTEX_BUFFERS_PER_DRAW: usize = 2;
|
||||||
const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
|
|
||||||
|
|
||||||
const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
|
|
||||||
|
|
||||||
struct RenderpassState {
|
struct RenderpassState {
|
||||||
device_state: DeviceState,
|
device_state: DeviceState,
|
||||||
@ -37,6 +42,10 @@ impl RenderpassState {
|
|||||||
fn new() -> Self {
|
fn new() -> Self {
|
||||||
let device_state = DeviceState::new();
|
let device_state = DeviceState::new();
|
||||||
|
|
||||||
|
let draw_count = draw_count();
|
||||||
|
let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
|
||||||
|
let texture_count = draw_count * TEXTURES_PER_DRAW;
|
||||||
|
|
||||||
let supports_bindless = device_state.device.features().contains(
|
let supports_bindless = device_state.device.features().contains(
|
||||||
wgpu::Features::TEXTURE_BINDING_ARRAY
|
wgpu::Features::TEXTURE_BINDING_ARRAY
|
||||||
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
|
| wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
|
||||||
@ -44,7 +53,7 @@ impl RenderpassState {
|
|||||||
.device
|
.device
|
||||||
.limits()
|
.limits()
|
||||||
.max_sampled_textures_per_shader_stage
|
.max_sampled_textures_per_shader_stage
|
||||||
>= TEXTURE_COUNT as _;
|
>= texture_count as _;
|
||||||
|
|
||||||
// Performance gets considerably worse if the resources are shuffled.
|
// Performance gets considerably worse if the resources are shuffled.
|
||||||
//
|
//
|
||||||
@ -74,8 +83,8 @@ impl RenderpassState {
|
|||||||
entries: &bind_group_layout_entries,
|
entries: &bind_group_layout_entries,
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
|
let mut texture_views = Vec::with_capacity(texture_count);
|
||||||
for i in 0..TEXTURE_COUNT {
|
for i in 0..texture_count {
|
||||||
let texture = device_state
|
let texture = device_state
|
||||||
.device
|
.device
|
||||||
.create_texture(&wgpu::TextureDescriptor {
|
.create_texture(&wgpu::TextureDescriptor {
|
||||||
@ -101,8 +110,8 @@ impl RenderpassState {
|
|||||||
|
|
||||||
let texture_view_refs: Vec<_> = texture_views.iter().collect();
|
let texture_view_refs: Vec<_> = texture_views.iter().collect();
|
||||||
|
|
||||||
let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
|
let mut bind_groups = Vec::with_capacity(draw_count);
|
||||||
for draw_idx in 0..DRAW_COUNT {
|
for draw_idx in 0..draw_count {
|
||||||
let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
|
let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
|
||||||
for tex_idx in 0..TEXTURES_PER_DRAW {
|
for tex_idx in 0..TEXTURES_PER_DRAW {
|
||||||
entries.push(wgpu::BindGroupEntry {
|
entries.push(wgpu::BindGroupEntry {
|
||||||
@ -138,8 +147,8 @@ impl RenderpassState {
|
|||||||
push_constant_ranges: &[],
|
push_constant_ranges: &[],
|
||||||
});
|
});
|
||||||
|
|
||||||
let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
|
let mut vertex_buffers = Vec::with_capacity(vertex_buffer_count);
|
||||||
for _ in 0..VERTEX_BUFFER_COUNT {
|
for _ in 0..vertex_buffer_count {
|
||||||
vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
label: None,
|
label: None,
|
||||||
size: 3 * 16,
|
size: 3 * 16,
|
||||||
@ -149,8 +158,8 @@ impl RenderpassState {
|
|||||||
}
|
}
|
||||||
random.shuffle(&mut vertex_buffers);
|
random.shuffle(&mut vertex_buffers);
|
||||||
|
|
||||||
let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
|
let mut index_buffers = Vec::with_capacity(draw_count);
|
||||||
for _ in 0..DRAW_COUNT {
|
for _ in 0..draw_count {
|
||||||
index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
label: None,
|
label: None,
|
||||||
size: 3 * 4,
|
size: 3 * 4,
|
||||||
@ -246,7 +255,7 @@ impl RenderpassState {
|
|||||||
view_dimension: wgpu::TextureViewDimension::D2,
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
multisampled: false,
|
multisampled: false,
|
||||||
},
|
},
|
||||||
count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
|
count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
|
||||||
}],
|
}],
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -324,10 +333,15 @@ impl RenderpassState {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
|
fn run_subpass(
|
||||||
|
&self,
|
||||||
|
pass_number: usize,
|
||||||
|
total_passes: usize,
|
||||||
|
draw_count: usize,
|
||||||
|
) -> wgpu::CommandBuffer {
|
||||||
profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
|
profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
|
||||||
|
|
||||||
let draws_per_pass = DRAW_COUNT / total_passes;
|
let draws_per_pass = draw_count / total_passes;
|
||||||
|
|
||||||
let mut encoder = self
|
let mut encoder = self
|
||||||
.device_state
|
.device_state
|
||||||
@ -372,7 +386,7 @@ impl RenderpassState {
|
|||||||
encoder.finish()
|
encoder.finish()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
|
fn run_bindless_pass(&self, draw_count: usize) -> wgpu::CommandBuffer {
|
||||||
profiling::scope!("Bindless Renderpass");
|
profiling::scope!("Bindless Renderpass");
|
||||||
|
|
||||||
let mut encoder = self
|
let mut encoder = self
|
||||||
@ -402,7 +416,7 @@ impl RenderpassState {
|
|||||||
}
|
}
|
||||||
render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
|
render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
|
||||||
|
|
||||||
for draw_idx in 0..DRAW_COUNT {
|
for draw_idx in 0..draw_count {
|
||||||
render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
|
render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,13 +429,17 @@ impl RenderpassState {
|
|||||||
fn run_bench(ctx: &mut Criterion) {
|
fn run_bench(ctx: &mut Criterion) {
|
||||||
let state = Lazy::new(RenderpassState::new);
|
let state = Lazy::new(RenderpassState::new);
|
||||||
|
|
||||||
|
let draw_count = draw_count();
|
||||||
|
let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
|
||||||
|
let texture_count = draw_count * TEXTURES_PER_DRAW;
|
||||||
|
|
||||||
// Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
|
// Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
|
||||||
let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
|
let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
|
||||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
group.throughput(Throughput::Elements(draw_count as _));
|
||||||
|
|
||||||
for time_submit in [false, true] {
|
for time_submit in [false, true] {
|
||||||
for rpasses in [1, 2, 4, 8] {
|
for rpasses in [1, 2, 4, 8] {
|
||||||
let draws_per_pass = DRAW_COUNT / rpasses;
|
let draws_per_pass = draw_count / rpasses;
|
||||||
|
|
||||||
let label = if time_submit {
|
let label = if time_submit {
|
||||||
"Submit Time"
|
"Submit Time"
|
||||||
@ -451,7 +469,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
|
let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
|
||||||
for i in 0..rpasses {
|
for i in 0..rpasses {
|
||||||
buffers.push(state.run_subpass(i, rpasses));
|
buffers.push(state.run_subpass(i, rpasses, draw_count));
|
||||||
}
|
}
|
||||||
|
|
||||||
if time_submit {
|
if time_submit {
|
||||||
@ -479,10 +497,10 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
// Test 10k draw calls split up over 2, 4, and 8 threads.
|
// Test 10k draw calls split up over 2, 4, and 8 threads.
|
||||||
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
|
let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
|
||||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
group.throughput(Throughput::Elements(draw_count as _));
|
||||||
|
|
||||||
for threads in [2, 4, 8] {
|
for threads in [2, 4, 8] {
|
||||||
let draws_per_pass = DRAW_COUNT / threads;
|
let draws_per_pass = draw_count / threads;
|
||||||
group.bench_function(
|
group.bench_function(
|
||||||
&format!("{threads} threads x {draws_per_pass} draws"),
|
&format!("{threads} threads x {draws_per_pass} draws"),
|
||||||
|b| {
|
|b| {
|
||||||
@ -505,7 +523,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
let buffers = (0..threads)
|
let buffers = (0..threads)
|
||||||
.into_par_iter()
|
.into_par_iter()
|
||||||
.map(|i| state.run_subpass(i, threads))
|
.map(|i| state.run_subpass(i, threads, draw_count))
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
duration += start.elapsed();
|
duration += start.elapsed();
|
||||||
@ -523,9 +541,9 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
// Test 10k draw calls split up over 1, 2, 4, and 8 threads.
|
// Test 10k draw calls split up over 1, 2, 4, and 8 threads.
|
||||||
let mut group = ctx.benchmark_group("Renderpass: Bindless");
|
let mut group = ctx.benchmark_group("Renderpass: Bindless");
|
||||||
group.throughput(Throughput::Elements(DRAW_COUNT as _));
|
group.throughput(Throughput::Elements(draw_count as _));
|
||||||
|
|
||||||
group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
|
group.bench_function(&format!("{draw_count} draws"), |b| {
|
||||||
Lazy::force(&state);
|
Lazy::force(&state);
|
||||||
|
|
||||||
b.iter_custom(|iters| {
|
b.iter_custom(|iters| {
|
||||||
@ -543,7 +561,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
|
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let buffer = state.run_bindless_pass();
|
let buffer = state.run_bindless_pass(draw_count);
|
||||||
|
|
||||||
duration += start.elapsed();
|
duration += start.elapsed();
|
||||||
|
|
||||||
@ -559,7 +577,7 @@ fn run_bench(ctx: &mut Criterion) {
|
|||||||
ctx.bench_function(
|
ctx.bench_function(
|
||||||
&format!(
|
&format!(
|
||||||
"Renderpass: Empty Submit with {} Resources",
|
"Renderpass: Empty Submit with {} Resources",
|
||||||
TEXTURE_COUNT + VERTEX_BUFFER_COUNT
|
texture_count + vertex_buffer_count
|
||||||
),
|
),
|
||||||
|b| {
|
|b| {
|
||||||
Lazy::force(&state);
|
Lazy::force(&state);
|
||||||
|
Loading…
Reference in New Issue
Block a user