diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index edf8501c5..70a83b51d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -59,6 +59,7 @@ env:
   RUSTDOCFLAGS: -D warnings
   WASM_BINDGEN_TEST_TIMEOUT: 300 # 5 minutes
   CACHE_SUFFIX: c # cache busting
+  WGPU_TESTING: true
 
 # We distinguish the following kinds of builds:
 # - native: build for the same target as we compile on
diff --git a/benches/benches/computepass.rs b/benches/benches/computepass.rs
index 9a69eb46e..2af141360 100644
--- a/benches/benches/computepass.rs
+++ b/benches/benches/computepass.rs
@@ -10,24 +10,36 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DISPATCH_COUNT: usize = 10_000;
+fn dispatch_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
 
 // Currently bindless is _much_ slower than with regularly resources,
 // since wgpu needs to issues barriers for all resources between each dispatch for all read/write textures & buffers.
 // This is in fact so slow that it makes the benchmark unusable when we use the same amount of
 // resources as the regular benchmark.
 // For details see https://github.com/gfx-rs/wgpu/issues/5766
-const DISPATCH_COUNT_BINDLESS: usize = 1_000;
+fn dispatch_count_bindless() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        1_000
+    }
+}
 
 // Must match the number of textures in the computepass.wgsl shader
 const TEXTURES_PER_DISPATCH: usize = 2;
 const STORAGE_TEXTURES_PER_DISPATCH: usize = 2;
 const STORAGE_BUFFERS_PER_DISPATCH: usize = 2;
 
-const TEXTURE_COUNT: usize = DISPATCH_COUNT * TEXTURES_PER_DISPATCH;
-const STORAGE_TEXTURE_COUNT: usize = DISPATCH_COUNT * STORAGE_TEXTURES_PER_DISPATCH;
-const STORAGE_BUFFER_COUNT: usize = DISPATCH_COUNT * STORAGE_BUFFERS_PER_DISPATCH;
-
 const BUFFER_SIZE: u64 = 16;
 
 struct ComputepassState {
@@ -45,6 +57,12 @@ impl ComputepassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let dispatch_count = dispatch_count();
+        let dispatch_count_bindless = dispatch_count_bindless();
+        let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+        let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+        let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::BUFFER_BINDING_ARRAY
                 | wgpu::Features::TEXTURE_BINDING_ARRAY
@@ -106,8 +124,8 @@ impl ComputepassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -132,8 +150,8 @@ impl ComputepassState {
         random.shuffle(&mut texture_views);
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut storage_texture_views = Vec::with_capacity(STORAGE_TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut storage_texture_views = Vec::with_capacity(storage_texture_count);
+        for i in 0..storage_texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -158,8 +176,8 @@ impl ComputepassState {
         random.shuffle(&mut storage_texture_views);
         let storage_texture_view_refs: Vec<_> = storage_texture_views.iter().collect();
 
-        let mut storage_buffers = Vec::with_capacity(STORAGE_BUFFER_COUNT);
-        for i in 0..STORAGE_BUFFER_COUNT {
+        let mut storage_buffers = Vec::with_capacity(storage_buffer_count);
+        for i in 0..storage_buffer_count {
             storage_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: Some(&format!("Buffer {i}")),
                 size: BUFFER_SIZE,
@@ -173,8 +191,8 @@ impl ComputepassState {
             .map(|b| b.as_entire_buffer_binding())
             .collect();
 
-        let mut bind_groups = Vec::with_capacity(DISPATCH_COUNT);
-        for dispatch_idx in 0..DISPATCH_COUNT {
+        let mut bind_groups = Vec::with_capacity(dispatch_count);
+        for dispatch_idx in 0..dispatch_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DISPATCH);
             for tex_idx in 0..TEXTURES_PER_DISPATCH {
                 entries.push(wgpu::BindGroupEntry {
@@ -258,7 +276,7 @@ impl ComputepassState {
                                     view_dimension: wgpu::TextureViewDimension::D2,
                                     multisampled: false,
                                 },
-                                count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                             },
                             wgpu::BindGroupLayoutEntry {
                                 binding: 1,
@@ -268,7 +286,7 @@ impl ComputepassState {
                                     format: wgpu::TextureFormat::R32Float,
                                     view_dimension: wgpu::TextureViewDimension::D2,
                                 },
-                                count: Some(NonZeroU32::new(STORAGE_TEXTURE_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(storage_texture_count as u32).unwrap()),
                             },
                             wgpu::BindGroupLayoutEntry {
                                 binding: 2,
@@ -278,7 +296,7 @@ impl ComputepassState {
                                     has_dynamic_offset: false,
                                     min_binding_size: std::num::NonZeroU64::new(BUFFER_SIZE),
                                 },
-                                count: Some(NonZeroU32::new(STORAGE_BUFFER_COUNT as u32).unwrap()),
+                                count: Some(NonZeroU32::new(storage_buffer_count as u32).unwrap()),
                             },
                         ],
                     });
@@ -293,19 +311,19 @@ impl ComputepassState {
                             wgpu::BindGroupEntry {
                                 binding: 0,
                                 resource: wgpu::BindingResource::TextureViewArray(
-                                    &texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                    &texture_view_refs[..dispatch_count_bindless],
                                 ),
                             },
                             wgpu::BindGroupEntry {
                                 binding: 1,
                                 resource: wgpu::BindingResource::TextureViewArray(
-                                    &storage_texture_view_refs[..DISPATCH_COUNT_BINDLESS],
+                                    &storage_texture_view_refs[..dispatch_count_bindless],
                                 ),
                             },
                             wgpu::BindGroupEntry {
                                 binding: 2,
                                 resource: wgpu::BindingResource::BufferArray(
-                                    &storage_buffer_bindings[..DISPATCH_COUNT_BINDLESS],
+                                    &storage_buffer_bindings[..dispatch_count_bindless],
                                 ),
                             },
                         ],
@@ -354,7 +372,8 @@ impl ComputepassState {
     fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Computepass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let dispatch_per_pass = DISPATCH_COUNT / total_passes;
+        let dispatch_count = dispatch_count();
+        let dispatch_per_pass = dispatch_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -379,7 +398,7 @@ impl ComputepassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, dispatch_count_bindless: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Computepass");
 
         let mut encoder = self
@@ -394,7 +413,7 @@ impl ComputepassState {
 
         compute_pass.set_pipeline(self.bindless_pipeline.as_ref().unwrap());
         compute_pass.set_bind_group(0, self.bindless_bind_group.as_ref().unwrap(), &[]);
-        for _ in 0..DISPATCH_COUNT_BINDLESS {
+        for _ in 0..dispatch_count_bindless {
             compute_pass.dispatch_workgroups(1, 1, 1);
         }
 
@@ -407,13 +426,19 @@ impl ComputepassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(ComputepassState::new);
 
+    let dispatch_count = dispatch_count();
+    let dispatch_count_bindless = dispatch_count_bindless();
+    let texture_count = dispatch_count * TEXTURES_PER_DISPATCH;
+    let storage_buffer_count = dispatch_count * STORAGE_BUFFERS_PER_DISPATCH;
+    let storage_texture_count = dispatch_count * STORAGE_TEXTURES_PER_DISPATCH;
+
     // Test 10k dispatch calls split up into 1, 2, 4, and 8 computepasses
     let mut group = ctx.benchmark_group("Computepass: Single Threaded");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+    group.throughput(Throughput::Elements(dispatch_count as _));
 
     for time_submit in [false, true] {
         for cpasses in [1, 2, 4, 8] {
-            let dispatch_per_pass = DISPATCH_COUNT / cpasses;
+            let dispatch_per_pass = dispatch_count / cpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -466,10 +491,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k dispatch calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Computepass: Multi Threaded");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT as _));
+    group.throughput(Throughput::Elements(dispatch_count as _));
 
     for threads in [2, 4, 8] {
-        let dispatch_per_pass = DISPATCH_COUNT / threads;
+        let dispatch_per_pass = dispatch_count / threads;
         group.bench_function(
             &format!("{threads} threads x {dispatch_per_pass} dispatch"),
             |b| {
@@ -510,9 +535,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k dispatch calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Computepass: Bindless");
-    group.throughput(Throughput::Elements(DISPATCH_COUNT_BINDLESS as _));
+    group.throughput(Throughput::Elements(dispatch_count_bindless as _));
 
-    group.bench_function(&format!("{DISPATCH_COUNT_BINDLESS} dispatch"), |b| {
+    group.bench_function(&format!("{dispatch_count_bindless} dispatch"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -535,7 +560,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(dispatch_count_bindless);
 
                 duration += start.elapsed();
 
@@ -551,7 +576,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Computepass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + STORAGE_TEXTURE_COUNT + STORAGE_BUFFER_COUNT
+            texture_count + storage_texture_count + storage_buffer_count
         ),
         |b| {
             Lazy::force(&state);
diff --git a/benches/benches/renderpass.rs b/benches/benches/renderpass.rs
index f31fc0758..7f2e14116 100644
--- a/benches/benches/renderpass.rs
+++ b/benches/benches/renderpass.rs
@@ -10,14 +10,19 @@ use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::DeviceState;
 
-const DRAW_COUNT: usize = 10_000;
+fn draw_count() -> usize {
+    // On CI we only want to run a very lightweight version of the benchmark
+    // to ensure that it does not break.
+    if std::env::var("WGPU_TESTING").is_ok() {
+        8
+    } else {
+        10_000
+    }
+}
 
 // Must match the number of textures in the renderpass.wgsl shader
 const TEXTURES_PER_DRAW: usize = 7;
 const VERTEX_BUFFERS_PER_DRAW: usize = 2;
-const VERTEX_BUFFER_COUNT: usize = DRAW_COUNT * VERTEX_BUFFERS_PER_DRAW;
-
-const TEXTURE_COUNT: usize = DRAW_COUNT * TEXTURES_PER_DRAW;
 
 struct RenderpassState {
     device_state: DeviceState,
@@ -37,6 +42,10 @@ impl RenderpassState {
     fn new() -> Self {
         let device_state = DeviceState::new();
 
+        let draw_count = draw_count();
+        let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+        let texture_count = draw_count * TEXTURES_PER_DRAW;
+
         let supports_bindless = device_state.device.features().contains(
             wgpu::Features::TEXTURE_BINDING_ARRAY
                 | wgpu::Features::SAMPLED_TEXTURE_AND_STORAGE_BUFFER_ARRAY_NON_UNIFORM_INDEXING,
@@ -44,7 +53,7 @@ impl RenderpassState {
             .device
             .limits()
             .max_sampled_textures_per_shader_stage
-            >= TEXTURE_COUNT as _;
+            >= texture_count as _;
 
         // Performance gets considerably worse if the resources are shuffled.
         //
@@ -74,8 +83,8 @@ impl RenderpassState {
                     entries: &bind_group_layout_entries,
                 });
 
-        let mut texture_views = Vec::with_capacity(TEXTURE_COUNT);
-        for i in 0..TEXTURE_COUNT {
+        let mut texture_views = Vec::with_capacity(texture_count);
+        for i in 0..texture_count {
             let texture = device_state
                 .device
                 .create_texture(&wgpu::TextureDescriptor {
@@ -101,8 +110,8 @@ impl RenderpassState {
 
         let texture_view_refs: Vec<_> = texture_views.iter().collect();
 
-        let mut bind_groups = Vec::with_capacity(DRAW_COUNT);
-        for draw_idx in 0..DRAW_COUNT {
+        let mut bind_groups = Vec::with_capacity(draw_count);
+        for draw_idx in 0..draw_count {
             let mut entries = Vec::with_capacity(TEXTURES_PER_DRAW);
             for tex_idx in 0..TEXTURES_PER_DRAW {
                 entries.push(wgpu::BindGroupEntry {
@@ -138,8 +147,8 @@ impl RenderpassState {
                     push_constant_ranges: &[],
                 });
 
-        let mut vertex_buffers = Vec::with_capacity(VERTEX_BUFFER_COUNT);
-        for _ in 0..VERTEX_BUFFER_COUNT {
+        let mut vertex_buffers = Vec::with_capacity(vertex_buffer_count);
+        for _ in 0..vertex_buffer_count {
             vertex_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 16,
@@ -149,8 +158,8 @@ impl RenderpassState {
         }
         random.shuffle(&mut vertex_buffers);
 
-        let mut index_buffers = Vec::with_capacity(DRAW_COUNT);
-        for _ in 0..DRAW_COUNT {
+        let mut index_buffers = Vec::with_capacity(draw_count);
+        for _ in 0..draw_count {
             index_buffers.push(device_state.device.create_buffer(&wgpu::BufferDescriptor {
                 label: None,
                 size: 3 * 4,
@@ -246,7 +255,7 @@ impl RenderpassState {
                                 view_dimension: wgpu::TextureViewDimension::D2,
                                 multisampled: false,
                             },
-                            count: Some(NonZeroU32::new(TEXTURE_COUNT as u32).unwrap()),
+                            count: Some(NonZeroU32::new(texture_count as u32).unwrap()),
                         }],
                     });
 
@@ -324,10 +333,15 @@ impl RenderpassState {
         }
     }
 
-    fn run_subpass(&self, pass_number: usize, total_passes: usize) -> wgpu::CommandBuffer {
+    fn run_subpass(
+        &self,
+        pass_number: usize,
+        total_passes: usize,
+        draw_count: usize,
+    ) -> wgpu::CommandBuffer {
         profiling::scope!("Renderpass", &format!("Pass {pass_number}/{total_passes}"));
 
-        let draws_per_pass = DRAW_COUNT / total_passes;
+        let draws_per_pass = draw_count / total_passes;
 
         let mut encoder = self
             .device_state
@@ -372,7 +386,7 @@ impl RenderpassState {
         encoder.finish()
     }
 
-    fn run_bindless_pass(&self) -> wgpu::CommandBuffer {
+    fn run_bindless_pass(&self, draw_count: usize) -> wgpu::CommandBuffer {
         profiling::scope!("Bindless Renderpass");
 
         let mut encoder = self
@@ -402,7 +416,7 @@ impl RenderpassState {
         }
         render_pass.set_index_buffer(self.index_buffers[0].slice(..), wgpu::IndexFormat::Uint32);
 
-        for draw_idx in 0..DRAW_COUNT {
+        for draw_idx in 0..draw_count {
             render_pass.draw_indexed(0..3, 0, draw_idx as u32..draw_idx as u32 + 1);
         }
 
@@ -415,13 +429,17 @@ impl RenderpassState {
 fn run_bench(ctx: &mut Criterion) {
     let state = Lazy::new(RenderpassState::new);
 
+    let draw_count = draw_count();
+    let vertex_buffer_count = draw_count * VERTEX_BUFFERS_PER_DRAW;
+    let texture_count = draw_count * TEXTURES_PER_DRAW;
+
     // Test 10k draw calls split up into 1, 2, 4, and 8 renderpasses
     let mut group = ctx.benchmark_group("Renderpass: Single Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for time_submit in [false, true] {
         for rpasses in [1, 2, 4, 8] {
-            let draws_per_pass = DRAW_COUNT / rpasses;
+            let draws_per_pass = draw_count / rpasses;
 
             let label = if time_submit {
                 "Submit Time"
@@ -451,7 +469,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                             let mut buffers: Vec<wgpu::CommandBuffer> = Vec::with_capacity(rpasses);
                             for i in 0..rpasses {
-                                buffers.push(state.run_subpass(i, rpasses));
+                                buffers.push(state.run_subpass(i, rpasses, draw_count));
                             }
 
                             if time_submit {
@@ -479,10 +497,10 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Multi Threaded");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
     for threads in [2, 4, 8] {
-        let draws_per_pass = DRAW_COUNT / threads;
+        let draws_per_pass = draw_count / threads;
         group.bench_function(
             &format!("{threads} threads x {draws_per_pass} draws"),
             |b| {
@@ -505,7 +523,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                         let buffers = (0..threads)
                             .into_par_iter()
-                            .map(|i| state.run_subpass(i, threads))
+                            .map(|i| state.run_subpass(i, threads, draw_count))
                             .collect::<Vec<_>>();
 
                         duration += start.elapsed();
@@ -523,9 +541,9 @@ fn run_bench(ctx: &mut Criterion) {
 
     // Test 10k draw calls split up over 1, 2, 4, and 8 threads.
     let mut group = ctx.benchmark_group("Renderpass: Bindless");
-    group.throughput(Throughput::Elements(DRAW_COUNT as _));
+    group.throughput(Throughput::Elements(draw_count as _));
 
-    group.bench_function(&format!("{DRAW_COUNT} draws"), |b| {
+    group.bench_function(&format!("{draw_count} draws"), |b| {
         Lazy::force(&state);
 
         b.iter_custom(|iters| {
@@ -543,7 +561,7 @@ fn run_bench(ctx: &mut Criterion) {
 
                 let start = Instant::now();
 
-                let buffer = state.run_bindless_pass();
+                let buffer = state.run_bindless_pass(draw_count);
 
                 duration += start.elapsed();
 
@@ -559,7 +577,7 @@ fn run_bench(ctx: &mut Criterion) {
     ctx.bench_function(
         &format!(
             "Renderpass: Empty Submit with {} Resources",
-            TEXTURE_COUNT + VERTEX_BUFFER_COUNT
+            texture_count + vertex_buffer_count
         ),
         |b| {
             Lazy::force(&state);