Optimise GL fence checking by querying less (#6427)

This commit is contained in:
Nathan Adams 2024-10-21 02:02:10 +02:00 committed by GitHub
parent b3f665be7d
commit 759c3262ad
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 29 additions and 11 deletions

View File

@ -142,6 +142,7 @@ By @bradwerth [#6216](https://github.com/gfx-rs/wgpu/pull/6216).
- Fix GL debug message callbacks not being properly cleaned up (causing UB). By @Imberflur in [#6114](https://github.com/gfx-rs/wgpu/pull/6114)
- Fix calling `slice::from_raw_parts` with unaligned pointers in push constant handling. By @Imberflur in [#6341](https://github.com/gfx-rs/wgpu/pull/6341)
- Optimise fence checking when `Queue::submit` is called many times per frame. By @dinnerbone in [#6427](https://github.com/gfx-rs/wgpu/pull/6427)
#### WebGPU

View File

@ -8,6 +8,7 @@ use std::{
sync::{Arc, Mutex},
};
use crate::AtomicFenceValue;
use arrayvec::ArrayVec;
use std::sync::atomic::Ordering;
@ -1534,7 +1535,7 @@ impl crate::Device for super::Device {
unsafe fn create_fence(&self) -> Result<super::Fence, crate::DeviceError> {
self.counters.fences.add(1);
Ok(super::Fence {
last_completed: 0,
last_completed: AtomicFenceValue::new(0),
pending: Vec::new(),
})
}
@ -1560,7 +1561,7 @@ impl crate::Device for super::Device {
wait_value: crate::FenceValue,
timeout_ms: u32,
) -> Result<bool, crate::DeviceError> {
if fence.last_completed < wait_value {
if fence.last_completed.load(Ordering::Relaxed) < wait_value {
let gl = &self.shared.context.lock();
let timeout_ns = if cfg!(any(webgl, Emscripten)) {
0
@ -1572,19 +1573,25 @@ impl crate::Device for super::Device {
.iter()
.find(|&&(value, _)| value >= wait_value)
{
return match unsafe {
let signalled = match unsafe {
gl.client_wait_sync(sync, glow::SYNC_FLUSH_COMMANDS_BIT, timeout_ns as i32)
} {
// for some reason firefox returns WAIT_FAILED, to investigate
#[cfg(any(webgl, Emscripten))]
glow::WAIT_FAILED => {
log::warn!("wait failed!");
Ok(false)
false
}
glow::TIMEOUT_EXPIRED => Ok(false),
glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => Ok(true),
_ => Err(crate::DeviceError::Lost),
glow::TIMEOUT_EXPIRED => false,
glow::CONDITION_SATISFIED | glow::ALREADY_SIGNALED => true,
_ => return Err(crate::DeviceError::Lost),
};
if signalled {
fence
.last_completed
.fetch_max(wait_value, Ordering::Relaxed);
}
return Ok(signalled);
}
}
Ok(true)

View File

@ -120,7 +120,7 @@ use glow::HasContext;
use naga::FastHashMap;
use parking_lot::Mutex;
use std::sync::atomic::{AtomicU32, AtomicU8};
use std::sync::atomic::{AtomicU32, AtomicU8, Ordering};
use std::{fmt, ops::Range, sync::Arc};
#[derive(Clone, Debug)]
@ -718,7 +718,7 @@ impl crate::DynQuerySet for QuerySet {}
#[derive(Debug)]
pub struct Fence {
last_completed: crate::FenceValue,
last_completed: crate::AtomicFenceValue,
pending: Vec<(crate::FenceValue, glow::Fence)>,
}
@ -743,13 +743,24 @@ unsafe impl Sync for Fence {}
impl Fence {
fn get_latest(&self, gl: &glow::Context) -> crate::FenceValue {
let mut max_value = self.last_completed;
let mut max_value = self.last_completed.load(Ordering::Relaxed);
for &(value, sync) in self.pending.iter() {
if value <= max_value {
// We already know this was good, no need to check again
continue;
}
let status = unsafe { gl.get_sync_status(sync) };
if status == glow::SIGNALED {
max_value = value;
} else {
// Anything after the first unsignalled is guaranteed to also be unsignalled
break;
}
}
// Track the latest value, to save ourselves some querying later
self.last_completed.fetch_max(max_value, Ordering::Relaxed);
max_value
}
@ -763,7 +774,6 @@ impl Fence {
}
}
self.pending.retain(|&(value, _)| value > latest);
self.last_completed = latest;
}
}