Merge branch 'trunk' into fix-perf

This commit is contained in:
rudderbucky 2024-11-14 22:10:40 +05:30 committed by GitHub
commit 481ce70512
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
30 changed files with 519 additions and 561 deletions

View File

@ -128,6 +128,7 @@ By @ErichDonGubler in [#6456](https://github.com/gfx-rs/wgpu/pull/6456), [#6148]
- Check for device mismatches when beginning render and compute passes. By @ErichDonGubler in [#6497](https://github.com/gfx-rs/wgpu/pull/6497).
- Lower `QUERY_SET_MAX_QUERIES` (and enforced limits) from 8192 to 4096 to match WebGPU spec. By @ErichDonGubler in [#6525](https://github.com/gfx-rs/wgpu/pull/6525).
- Allow non-filterable float on texture bindings never used with samplers when using a derived bind group layout. By @ErichDonGubler in [#6531](https://github.com/gfx-rs/wgpu/pull/6531/).
- Replace potentially unsound usage of `PreHashedMap` with `FastHashMap`. By @jamienicol in [#6541](https://github.com/gfx-rs/wgpu/pull/6541).
#### Naga

View File

@ -3497,13 +3497,13 @@ impl<'a, W: Write> Writer<'a, W> {
Mf::Transpose => "transpose",
Mf::Determinant => "determinant",
Mf::QuantizeToF16 => match *ctx.resolve_type(arg, &self.module.types) {
crate::TypeInner::Scalar { .. } => {
TypeInner::Scalar { .. } => {
write!(self.out, "unpackHalf2x16(packHalf2x16(vec2(")?;
self.write_expr(arg, ctx)?;
write!(self.out, "))).x")?;
return Ok(());
}
crate::TypeInner::Vector {
TypeInner::Vector {
size: crate::VectorSize::Bi,
..
} => {
@ -3512,7 +3512,7 @@ impl<'a, W: Write> Writer<'a, W> {
write!(self.out, "))")?;
return Ok(());
}
crate::TypeInner::Vector {
TypeInner::Vector {
size: crate::VectorSize::Tri,
..
} => {
@ -3523,7 +3523,7 @@ impl<'a, W: Write> Writer<'a, W> {
write!(self.out, ".zz)).x)")?;
return Ok(());
}
crate::TypeInner::Vector {
TypeInner::Vector {
size: crate::VectorSize::Quad,
..
} => {

View File

@ -630,7 +630,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().expect_fail(FailureCase::webgl2()))
.run_sync(|ctx| {
// This test checks that when the device is dropped (such as in a GC),
// the provided DeviceLostClosure is called with reason DeviceLostReason::Unknown.
// the provided DeviceLostClosure is called with reason DeviceLostReason::Dropped.
// Fails on webgl because webgl doesn't implement drop.
static WAS_CALLED: std::sync::atomic::AtomicBool = AtomicBool::new(false);
@ -642,8 +642,7 @@ static DEVICE_DROP_THEN_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
});
ctx.device.set_device_lost_callback(callback);
// Drop the device.
drop(ctx.device);
drop(ctx);
assert!(
WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
@ -676,35 +675,6 @@ static DEVICE_LOST_REPLACED_CALLBACK: GpuTestConfiguration = GpuTestConfiguratio
);
});
#[gpu_test]
static DROPPED_GLOBAL_THEN_DEVICE_LOST: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default().skip(FailureCase::always()))
.run_sync(|ctx| {
// What we want to do is to drop the Global, forcing a code path that
// eventually calls Device.prepare_to_die, without having first dropped
// the device. This models what might happen in a user agent that kills
// wgpu without providing a more orderly shutdown. In such a case, the
// device lost callback should be invoked with the message "Device is
// dying."
static WAS_CALLED: AtomicBool = AtomicBool::new(false);
// Set a LoseDeviceCallback on the device.
let callback = Box::new(|reason, message| {
WAS_CALLED.store(true, std::sync::atomic::Ordering::SeqCst);
assert_eq!(reason, wgt::DeviceLostReason::Dropped);
assert_eq!(message, "Device is dying.");
});
ctx.device.set_device_lost_callback(callback);
// TODO: Drop the Global, somehow.
// Confirm that the callback was invoked.
assert!(
WAS_CALLED.load(std::sync::atomic::Ordering::SeqCst),
"Device lost callback should have been called."
);
});
#[gpu_test]
static DIFFERENT_BGL_ORDER_BW_SHADER_AND_API: GpuTestConfiguration = GpuTestConfiguration::new()
.parameters(TestParameters::default())

View File

@ -1,5 +1,5 @@
use crate::{
device::queue::TempResource,
device::{queue::TempResource, Device},
global::Global,
hub::Hub,
id::CommandEncoderId,
@ -20,10 +20,7 @@ use crate::{
use wgt::{math::align_to, BufferUsages, Features};
use super::CommandBufferMutable;
use crate::device::queue::PendingWrites;
use hal::BufferUses;
use std::mem::ManuallyDrop;
use std::ops::DerefMut;
use std::{
cmp::max,
num::NonZeroU64,
@ -184,7 +181,7 @@ impl Global {
build_command_index,
&mut buf_storage,
hub,
device.pending_writes.lock().deref_mut(),
device,
)?;
let snatch_guard = device.snatchable_lock.read();
@ -248,7 +245,9 @@ impl Global {
.get()
.map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
device.pending_writes.lock().insert_tlas(&tlas);
if let Some(queue) = device.get_queue() {
queue.pending_writes.lock().insert_tlas(&tlas);
}
cmd_buf_data.tlas_actions.push(TlasAction {
tlas: tlas.clone(),
@ -349,10 +348,12 @@ impl Global {
}
}
device
.pending_writes
.lock()
.consume_temp(TempResource::ScratchBuffer(scratch_buffer));
if let Some(queue) = device.get_queue() {
queue
.pending_writes
.lock()
.consume_temp(TempResource::ScratchBuffer(scratch_buffer));
}
Ok(())
}
@ -495,7 +496,7 @@ impl Global {
build_command_index,
&mut buf_storage,
hub,
device.pending_writes.lock().deref_mut(),
device,
)?;
let snatch_guard = device.snatchable_lock.read();
@ -516,7 +517,9 @@ impl Global {
.get(package.tlas_id)
.get()
.map_err(|_| BuildAccelerationStructureError::InvalidTlasId)?;
device.pending_writes.lock().insert_tlas(&tlas);
if let Some(queue) = device.get_queue() {
queue.pending_writes.lock().insert_tlas(&tlas);
}
cmd_buf_data.trackers.tlas_s.set_single(tlas.clone());
tlas_lock_store.push((Some(package), tlas.clone()))
@ -689,7 +692,7 @@ impl Global {
if let Some(ref staging_buffer) = staging_buffer {
cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
buffer: staging_buffer.raw(),
usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
usage: BufferUses::MAP_WRITE..BufferUses::COPY_SRC,
}]);
}
}
@ -711,7 +714,7 @@ impl Global {
unsafe {
cmd_buf_raw.transition_buffers(&[hal::BufferBarrier::<dyn hal::DynBuffer> {
buffer: tlas.instance_buffer.as_ref(),
usage: hal::BufferUses::MAP_READ..hal::BufferUses::COPY_DST,
usage: BufferUses::MAP_READ..BufferUses::COPY_DST,
}]);
let temp = hal::BufferCopy {
src_offset: range.start as u64,
@ -742,17 +745,21 @@ impl Global {
}
if let Some(staging_buffer) = staging_buffer {
device
.pending_writes
.lock()
.consume_temp(TempResource::StagingBuffer(staging_buffer));
if let Some(queue) = device.get_queue() {
queue
.pending_writes
.lock()
.consume_temp(TempResource::StagingBuffer(staging_buffer));
}
}
}
device
.pending_writes
.lock()
.consume_temp(TempResource::ScratchBuffer(scratch_buffer));
if let Some(queue) = device.get_queue() {
queue
.pending_writes
.lock()
.consume_temp(TempResource::ScratchBuffer(scratch_buffer));
}
Ok(())
}
@ -839,7 +846,7 @@ fn iter_blas<'a>(
build_command_index: NonZeroU64,
buf_storage: &mut Vec<TriangleBufferStore<'a>>,
hub: &Hub,
pending_writes: &mut ManuallyDrop<PendingWrites>,
device: &Device,
) -> Result<(), BuildAccelerationStructureError> {
let mut temp_buffer = Vec::new();
for entry in blas_iter {
@ -849,7 +856,9 @@ fn iter_blas<'a>(
.get()
.map_err(|_| BuildAccelerationStructureError::InvalidBlasId)?;
cmd_buf_data.trackers.blas_s.set_single(blas.clone());
pending_writes.insert_blas(&blas);
if let Some(queue) = device.get_queue() {
queue.pending_writes.lock().insert_blas(&blas);
}
cmd_buf_data.blas_actions.push(BlasAction {
blas: blas.clone(),
@ -951,7 +960,7 @@ fn iter_blas<'a>(
}
let data = cmd_buf_data.trackers.buffers.set_single(
&index_buffer,
hal::BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT,
BufferUses::BOTTOM_LEVEL_ACCELERATION_STRUCTURE_INPUT,
);
Some((index_buffer.clone(), data))
} else {

View File

@ -219,9 +219,11 @@ impl Global {
device.check_is_valid()?;
buffer.check_usage(wgt::BufferUsages::MAP_WRITE)?;
let last_submission = device
.lock_life()
.get_buffer_latest_submission_index(&buffer);
let last_submission = device.get_queue().and_then(|queue| {
queue
.lock_life()
.get_buffer_latest_submission_index(&buffer)
});
if let Some(last_submission) = last_submission {
device.wait_for_submit(last_submission)?;
@ -2081,20 +2083,7 @@ impl Global {
profiling::scope!("Device::drop");
api_log!("Device::drop {device_id:?}");
let device = self.hub.devices.remove(device_id);
let device_lost_closure = device.lock_life().device_lost_closure.take();
if let Some(closure) = device_lost_closure {
closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
}
// The things `Device::prepare_to_die` takes care are mostly
// unnecessary here. We know our queue is empty, so we don't
// need to wait for submissions or triage them. We know we were
// just polled, so `life_tracker.free_resources` is empty.
debug_assert!(device.lock_life().queue_empty());
device.pending_writes.lock().deactivate();
drop(device);
self.hub.devices.remove(device_id);
}
// This closure will be called exactly once during "lose the device",
@ -2106,14 +2095,14 @@ impl Global {
) {
let device = self.hub.devices.get(device_id);
let mut life_tracker = device.lock_life();
if let Some(existing_closure) = life_tracker.device_lost_closure.take() {
// It's important to not hold the lock while calling the closure.
drop(life_tracker);
existing_closure.call(DeviceLostReason::ReplacedCallback, "".to_string());
life_tracker = device.lock_life();
let old_device_lost_closure = device
.device_lost_closure
.lock()
.replace(device_lost_closure);
if let Some(old_device_lost_closure) = old_device_lost_closure {
old_device_lost_closure.call(DeviceLostReason::ReplacedCallback, "".to_string());
}
life_tracker.device_lost_closure = Some(device_lost_closure);
}
pub fn device_destroy(&self, device_id: DeviceId) {

View File

@ -1,9 +1,9 @@
use crate::{
device::{
queue::{EncoderInFlight, SubmittedWorkDoneClosure, TempResource},
DeviceError, DeviceLostClosure,
DeviceError,
},
resource::{self, Buffer, Texture, Trackable},
resource::{Buffer, Texture, Trackable},
snatch::SnatchGuard,
SubmissionIndex,
};
@ -196,11 +196,6 @@ pub(crate) struct LifetimeTracker {
/// must happen _after_ all mapped buffer callbacks are mapped, so we defer them
/// here until the next time the device is maintained.
work_done_closures: SmallVec<[SubmittedWorkDoneClosure; 1]>,
/// Closure to be called on "lose the device". This is invoked directly by
/// device.lose or by the UserCallbacks returned from maintain when the device
/// has been destroyed and its queues are empty.
pub device_lost_closure: Option<DeviceLostClosure>,
}
impl LifetimeTracker {
@ -209,7 +204,6 @@ impl LifetimeTracker {
active: Vec::new(),
ready_to_map: Vec::new(),
work_done_closures: SmallVec::new(),
device_lost_closure: None,
}
}
@ -394,7 +388,6 @@ impl LifetimeTracker {
#[must_use]
pub(crate) fn handle_mapping(
&mut self,
raw: &dyn hal::DynDevice,
snatch_guard: &SnatchGuard,
) -> Vec<super::BufferMapPendingClosure> {
if self.ready_to_map.is_empty() {
@ -404,61 +397,10 @@ impl LifetimeTracker {
Vec::with_capacity(self.ready_to_map.len());
for buffer in self.ready_to_map.drain(..) {
// This _cannot_ be inlined into the match. If it is, the lock will be held
// open through the whole match, resulting in a deadlock when we try to re-lock
// the buffer back to active.
let mapping = std::mem::replace(
&mut *buffer.map_state.lock(),
resource::BufferMapState::Idle,
);
let pending_mapping = match mapping {
resource::BufferMapState::Waiting(pending_mapping) => pending_mapping,
// Mapping cancelled
resource::BufferMapState::Idle => continue,
// Mapping queued at least twice by map -> unmap -> map
// and was already successfully mapped below
resource::BufferMapState::Active { .. } => {
*buffer.map_state.lock() = mapping;
continue;
}
_ => panic!("No pending mapping."),
};
let status = if pending_mapping.range.start != pending_mapping.range.end {
let host = pending_mapping.op.host;
let size = pending_mapping.range.end - pending_mapping.range.start;
match super::map_buffer(
raw,
&buffer,
pending_mapping.range.start,
size,
host,
snatch_guard,
) {
Ok(mapping) => {
*buffer.map_state.lock() = resource::BufferMapState::Active {
mapping,
range: pending_mapping.range.clone(),
host,
};
Ok(())
}
Err(e) => {
log::error!("Mapping failed: {e}");
Err(e)
}
}
} else {
*buffer.map_state.lock() = resource::BufferMapState::Active {
mapping: hal::BufferMapping {
ptr: std::ptr::NonNull::dangling(),
is_coherent: true,
},
range: pending_mapping.range,
host: pending_mapping.op.host,
};
Ok(())
};
pending_callbacks.push((pending_mapping.op, status));
match buffer.map(snatch_guard) {
Some(cb) => pending_callbacks.push(cb),
None => continue,
}
}
pending_callbacks
}

View File

@ -298,24 +298,25 @@ impl DeviceLostClosure {
}
}
fn map_buffer(
raw: &dyn hal::DynDevice,
pub(crate) fn map_buffer(
buffer: &Buffer,
offset: BufferAddress,
size: BufferAddress,
kind: HostMap,
snatch_guard: &SnatchGuard,
) -> Result<hal::BufferMapping, BufferAccessError> {
let raw_device = buffer.device.raw();
let raw_buffer = buffer.try_raw(snatch_guard)?;
let mapping = unsafe {
raw.map_buffer(raw_buffer, offset..offset + size)
raw_device
.map_buffer(raw_buffer, offset..offset + size)
.map_err(|e| buffer.device.handle_hal_error(e))?
};
if !mapping.is_coherent && kind == HostMap::Read {
#[allow(clippy::single_range_in_vec_init)]
unsafe {
raw.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
raw_device.invalidate_mapped_ranges(raw_buffer, &[offset..offset + size]);
}
}
@ -370,7 +371,7 @@ fn map_buffer(
&& kind == HostMap::Read
&& buffer.usage.contains(wgt::BufferUsages::MAP_WRITE)
{
unsafe { raw.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
unsafe { raw_device.flush_mapped_ranges(raw_buffer, &[uninitialized]) };
}
}
}

View File

@ -14,13 +14,14 @@ use crate::{
hal_label,
id::{self, QueueId},
init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange},
lock::RwLockWriteGuard,
lock::{rank, Mutex, MutexGuard, RwLockWriteGuard},
resource::{
Buffer, BufferAccessError, BufferMapState, DestroyedBuffer, DestroyedResourceError,
DestroyedTexture, Fallible, FlushedStagingBuffer, InvalidResourceError, Labeled,
ParentDevice, ResourceErrorIdent, StagingBuffer, Texture, TextureInner, Trackable,
},
resource_log,
snatch::SnatchGuard,
track::{self, Tracker, TrackerIndex},
FastHashMap, SubmissionIndex,
};
@ -37,24 +38,95 @@ use std::{
};
use thiserror::Error;
use super::Device;
use super::{life::LifetimeTracker, Device};
pub struct Queue {
raw: ManuallyDrop<Box<dyn hal::DynQueue>>,
raw: Box<dyn hal::DynQueue>,
pub(crate) device: Arc<Device>,
pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
life_tracker: Mutex<LifetimeTracker>,
}
impl Queue {
pub(crate) fn new(device: Arc<Device>, raw: Box<dyn hal::DynQueue>) -> Self {
Queue {
raw: ManuallyDrop::new(raw),
device,
pub(crate) fn new(
device: Arc<Device>,
raw: Box<dyn hal::DynQueue>,
) -> Result<Self, DeviceError> {
let pending_encoder = device
.command_allocator
.acquire_encoder(device.raw(), raw.as_ref())
.map_err(DeviceError::from_hal);
let pending_encoder = match pending_encoder {
Ok(pending_encoder) => pending_encoder,
Err(e) => {
return Err(e);
}
};
let mut pending_writes = PendingWrites::new(pending_encoder);
let zero_buffer = device.zero_buffer.as_ref();
pending_writes.activate();
unsafe {
pending_writes
.command_encoder
.transition_buffers(&[hal::BufferBarrier {
buffer: zero_buffer,
usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
}]);
pending_writes
.command_encoder
.clear_buffer(zero_buffer, 0..super::ZERO_BUFFER_SIZE);
pending_writes
.command_encoder
.transition_buffers(&[hal::BufferBarrier {
buffer: zero_buffer,
usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
}]);
}
let pending_writes = Mutex::new(
rank::QUEUE_PENDING_WRITES,
ManuallyDrop::new(pending_writes),
);
Ok(Queue {
raw,
device,
pending_writes,
life_tracker: Mutex::new(rank::QUEUE_LIFE_TRACKER, LifetimeTracker::new()),
})
}
pub(crate) fn raw(&self) -> &dyn hal::DynQueue {
self.raw.as_ref()
}
#[track_caller]
pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
self.life_tracker.lock()
}
pub(crate) fn maintain(
&self,
submission_index: u64,
snatch_guard: &SnatchGuard,
) -> (
SmallVec<[SubmittedWorkDoneClosure; 1]>,
Vec<super::BufferMapPendingClosure>,
bool,
) {
let mut life_tracker = self.lock_life();
let submission_closures =
life_tracker.triage_submissions(submission_index, &self.device.command_allocator);
let mapping_closures = life_tracker.handle_mapping(snatch_guard);
let queue_empty = life_tracker.queue_empty();
(submission_closures, mapping_closures, queue_empty)
}
}
crate::impl_resource_type!(Queue);
@ -70,9 +142,101 @@ crate::impl_storage_item!(Queue);
impl Drop for Queue {
fn drop(&mut self) {
resource_log!("Drop {}", self.error_ident());
// SAFETY: we never access `self.raw` beyond this point.
let queue = unsafe { ManuallyDrop::take(&mut self.raw) };
self.device.release_queue(queue);
let last_successful_submission_index = self
.device
.last_successful_submission_index
.load(Ordering::Acquire);
let fence = self.device.fence.read();
// Try waiting on the last submission using the following sequence of timeouts
let timeouts_in_ms = [100, 200, 400, 800, 1600, 3200];
for (i, timeout_ms) in timeouts_in_ms.into_iter().enumerate() {
let is_last_iter = i == timeouts_in_ms.len() - 1;
api_log!(
"Waiting on last submission. try: {}/{}. timeout: {}ms",
i + 1,
timeouts_in_ms.len(),
timeout_ms
);
let wait_res = unsafe {
self.device.raw().wait(
fence.as_ref(),
last_successful_submission_index,
#[cfg(not(target_arch = "wasm32"))]
timeout_ms,
#[cfg(target_arch = "wasm32")]
0, // WebKit and Chromium don't support a non-0 timeout
)
};
// Note: If we don't panic below we are in UB land (destroying resources while they are still in use by the GPU).
match wait_res {
Ok(true) => break,
Ok(false) => {
// It's fine that we timed out on WebGL; GL objects can be deleted early as they
// will be kept around by the driver if GPU work hasn't finished.
// Moreover, the way we emulate read mappings on WebGL allows us to execute map_buffer earlier than on other
// backends since getBufferSubData is synchronous with respect to the other previously enqueued GL commands.
// Relying on this behavior breaks the clean abstraction wgpu-hal tries to maintain and
// we should find ways to improve this. See https://github.com/gfx-rs/wgpu/issues/6538.
#[cfg(target_arch = "wasm32")]
{
break;
}
#[cfg(not(target_arch = "wasm32"))]
{
if is_last_iter {
panic!(
"We timed out while waiting on the last successful submission to complete!"
);
}
}
}
Err(e) => match e {
hal::DeviceError::OutOfMemory => {
if is_last_iter {
panic!(
"We ran into an OOM error while waiting on the last successful submission to complete!"
);
}
}
hal::DeviceError::Lost => {
self.device.handle_hal_error(e); // will lose the device
break;
}
hal::DeviceError::ResourceCreationFailed => unreachable!(),
hal::DeviceError::Unexpected => {
panic!(
"We ran into an unexpected error while waiting on the last successful submission to complete!"
);
}
},
}
}
drop(fence);
let snatch_guard = self.device.snatchable_lock.read();
let (submission_closures, mapping_closures, queue_empty) =
self.maintain(last_successful_submission_index, &snatch_guard);
drop(snatch_guard);
assert!(queue_empty);
let closures = crate::device::UserClosures {
mappings: mapping_closures,
submissions: submission_closures,
device_lost_invocations: SmallVec::new(),
};
// SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
pending_writes.dispose(self.device.raw());
closures.fire();
}
}
@ -345,15 +509,6 @@ impl PendingWrites {
}
self.command_encoder.as_mut()
}
pub fn deactivate(&mut self) {
if self.is_recording {
unsafe {
self.command_encoder.discard_encoding();
}
self.is_recording = false;
}
}
}
#[derive(Clone, Debug, Error)]
@ -427,7 +582,7 @@ impl Queue {
// freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let mut staging_buffer = StagingBuffer::new(&self.device, data_size)?;
let mut pending_writes = self.device.pending_writes.lock();
let mut pending_writes = self.pending_writes.lock();
let staging_buffer = {
profiling::scope!("copy");
@ -469,7 +624,7 @@ impl Queue {
let buffer = buffer.get()?;
let mut pending_writes = self.device.pending_writes.lock();
let mut pending_writes = self.pending_writes.lock();
// At this point, we have taken ownership of the staging_buffer from the
// user. Platform validation requires that the staging buffer always
@ -645,7 +800,7 @@ impl Queue {
.map_err(TransferError::from)?;
}
let mut pending_writes = self.device.pending_writes.lock();
let mut pending_writes = self.pending_writes.lock();
let encoder = pending_writes.activate();
// If the copy does not fully cover the layers, we need to initialize to
@ -897,7 +1052,7 @@ impl Queue {
let (selector, dst_base) = extract_texture_selector(&destination, &size, &dst)?;
let mut pending_writes = self.device.pending_writes.lock();
let mut pending_writes = self.pending_writes.lock();
let encoder = pending_writes.activate();
// If the copy does not fully cover the layers, we need to initialize to
@ -1054,6 +1209,13 @@ impl Queue {
}
}
if first_error.is_some() {
if let Ok(cmd_buf_data) = cmd_buf_data {
cmd_buf_data.destroy(&command_buffer.device);
}
continue;
}
let mut baked = match cmd_buf_data {
Ok(cmd_buf_data) => {
let res = validate_command_buffer(
@ -1077,10 +1239,6 @@ impl Queue {
}
};
if first_error.is_some() {
continue;
}
// execute resource transitions
if let Err(e) = unsafe {
baked.encoder.begin_encoding(hal_label(
@ -1166,7 +1324,7 @@ impl Queue {
}
}
let mut pending_writes = self.device.pending_writes.lock();
let mut pending_writes = self.pending_writes.lock();
{
used_surface_textures.set_size(self.device.tracker_indices.textures.size());
@ -1253,7 +1411,7 @@ impl Queue {
profiling::scope!("cleanup");
// this will register the new submission to the life time tracker
self.device.lock_life().track_submission(
self.lock_life().track_submission(
submit_index,
pending_writes.temp_resources.drain(..),
active_executions,
@ -1302,7 +1460,7 @@ impl Queue {
) -> Option<SubmissionIndex> {
api_log!("Queue::on_submitted_work_done");
//TODO: flush pending writes
self.device.lock_life().add_work_done_closure(closure)
self.lock_life().add_work_done_closure(closure)
}
}
@ -1459,7 +1617,7 @@ fn validate_command_buffer(
command_buffer: &CommandBuffer,
queue: &Queue,
cmd_buf_data: &crate::command::CommandBufferMutable,
snatch_guard: &crate::snatch::SnatchGuard<'_>,
snatch_guard: &SnatchGuard,
submit_surface_textures_owned: &mut FastHashMap<*const Texture, Arc<Texture>>,
used_surface_textures: &mut track::TextureUsageScope,
) -> Result<(), QueueSubmitError> {

View File

@ -4,12 +4,9 @@ use crate::{
binding_model::{self, BindGroup, BindGroupLayout, BindGroupLayoutEntryError},
command, conv,
device::{
bgl, create_validator,
life::{LifetimeTracker, WaitIdleError},
map_buffer,
queue::PendingWrites,
AttachmentData, DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures,
RenderPassContext, CLEANUP_WAIT_MS,
bgl, create_validator, life::WaitIdleError, map_buffer, AttachmentData,
DeviceLostInvocation, HostMap, MissingDownlevelFlags, MissingFeatures, RenderPassContext,
CLEANUP_WAIT_MS,
},
hal_label,
init_tracker::{
@ -17,7 +14,7 @@ use crate::{
TextureInitTrackerAction,
},
instance::Adapter,
lock::{rank, Mutex, MutexGuard, RwLock},
lock::{rank, Mutex, RwLock},
pipeline,
pool::ResourcePool,
resource::{
@ -32,7 +29,7 @@ use crate::{
},
validation::{self, validate_color_attachment_bytes_per_sample},
weak_vec::WeakVec,
FastHashMap, LabelHelpers, PreHashedKey, PreHashedMap,
FastHashMap, LabelHelpers,
};
use arrayvec::ArrayVec;
@ -53,35 +50,16 @@ use std::{
};
use super::{
queue::Queue, DeviceDescriptor, DeviceError, UserClosures, ENTRYPOINT_FAILURE_ERROR,
ZERO_BUFFER_SIZE,
queue::Queue, DeviceDescriptor, DeviceError, DeviceLostClosure, UserClosures,
ENTRYPOINT_FAILURE_ERROR, ZERO_BUFFER_SIZE,
};
/// Structure describing a logical device. Some members are internally mutable,
/// stored behind mutexes.
///
/// TODO: establish clear order of locking for these:
/// `life_tracker`, `trackers`, `render_passes`, `pending_writes`, `trace`.
///
/// Currently, the rules are:
/// 1. `life_tracker` is locked after `hub.devices`, enforced by the type system
/// 1. `self.trackers` is locked last (unenforced)
/// 1. `self.trace` is locked last (unenforced)
///
/// Right now avoid locking twice same resource or registry in a call execution
/// and minimize the locking to the minimum scope possible
/// Unless otherwise specified, no lock may be acquired while holding another lock.
/// This means that you must inspect function calls made while a lock is held
/// to see what locks the callee may try to acquire.
///
/// Important:
/// When locking pending_writes please check that trackers is not locked
/// trackers should be locked only when needed for the shortest time possible
pub struct Device {
raw: ManuallyDrop<Box<dyn hal::DynDevice>>,
raw: Box<dyn hal::DynDevice>,
pub(crate) adapter: Arc<Adapter>,
pub(crate) queue: OnceLock<Weak<Queue>>,
queue_to_drop: OnceLock<Box<dyn hal::DynQueue>>,
pub(crate) zero_buffer: ManuallyDrop<Box<dyn hal::DynBuffer>>,
/// The `label` from the descriptor used to create the resource.
label: String,
@ -126,14 +104,14 @@ pub struct Device {
/// using ref-counted references for internal access.
pub(crate) valid: AtomicBool,
/// All live resources allocated with this [`Device`].
///
/// Has to be locked temporarily only (locked last)
/// and never before pending_writes
/// Closure to be called on "lose the device". This is invoked directly by
/// device.lose or by the UserCallbacks returned from maintain when the device
/// has been destroyed and its queues are empty.
pub(crate) device_lost_closure: Mutex<Option<DeviceLostClosure>>,
/// Stores the state of buffers and textures.
pub(crate) trackers: Mutex<DeviceTracker>,
pub(crate) tracker_indices: TrackerIndexAllocators,
// Life tracker should be locked right after the device and before anything else.
life_tracker: Mutex<LifetimeTracker>,
/// Pool of bind group layouts, allowing deduplication.
pub(crate) bgl_pool: ResourcePool<bgl::EntryMap, BindGroupLayout>,
pub(crate) alignments: hal::Alignments,
@ -141,15 +119,14 @@ pub struct Device {
pub(crate) features: wgt::Features,
pub(crate) downlevel: wgt::DownlevelCapabilities,
pub(crate) instance_flags: wgt::InstanceFlags,
pub(crate) pending_writes: Mutex<ManuallyDrop<PendingWrites>>,
pub(crate) deferred_destroy: Mutex<Vec<DeferredDestroy>>,
#[cfg(feature = "trace")]
pub(crate) trace: Mutex<Option<trace::Trace>>,
pub(crate) usage_scopes: UsageScopePool,
pub(crate) last_acceleration_structure_build_command_index: AtomicU64,
#[cfg(feature = "indirect-validation")]
pub(crate) indirect_validation: Option<crate::indirect_validation::IndirectValidation>,
// needs to be dropped last
#[cfg(feature = "trace")]
pub(crate) trace: Mutex<Option<trace::Trace>>,
}
pub(crate) enum DeferredDestroy {
@ -171,26 +148,24 @@ impl std::fmt::Debug for Device {
impl Drop for Device {
fn drop(&mut self) {
resource_log!("Drop {}", self.error_ident());
// SAFETY: We are in the Drop impl and we don't use self.raw anymore after this point.
let raw = unsafe { ManuallyDrop::take(&mut self.raw) };
if let Some(closure) = self.device_lost_closure.lock().take() {
closure.call(DeviceLostReason::Dropped, String::from("Device dropped."));
}
// SAFETY: We are in the Drop impl and we don't use self.zero_buffer anymore after this point.
let zero_buffer = unsafe { ManuallyDrop::take(&mut self.zero_buffer) };
// SAFETY: We are in the Drop impl and we don't use self.pending_writes anymore after this point.
let pending_writes = unsafe { ManuallyDrop::take(&mut self.pending_writes.lock()) };
// SAFETY: We are in the Drop impl and we don't use self.fence anymore after this point.
let fence = unsafe { ManuallyDrop::take(&mut self.fence.write()) };
pending_writes.dispose(raw.as_ref());
self.command_allocator.dispose(raw.as_ref());
self.command_allocator.dispose(self.raw.as_ref());
#[cfg(feature = "indirect-validation")]
self.indirect_validation
.take()
.unwrap()
.dispose(raw.as_ref());
.dispose(self.raw.as_ref());
unsafe {
raw.destroy_buffer(zero_buffer);
raw.destroy_fence(fence);
let queue = self.queue_to_drop.take().unwrap();
raw.exit(queue);
self.raw.destroy_buffer(zero_buffer);
self.raw.destroy_fence(fence);
}
}
}
@ -222,7 +197,6 @@ impl Device {
impl Device {
pub(crate) fn new(
raw_device: Box<dyn hal::DynDevice>,
raw_queue: &dyn hal::DynQueue,
adapter: &Arc<Adapter>,
desc: &DeviceDescriptor,
trace_path: Option<&std::path::Path>,
@ -235,10 +209,6 @@ impl Device {
let fence = unsafe { raw_device.create_fence() }.map_err(DeviceError::from_hal)?;
let command_allocator = command::CommandAllocator::new();
let pending_encoder = command_allocator
.acquire_encoder(raw_device.as_ref(), raw_queue)
.map_err(DeviceError::from_hal)?;
let mut pending_writes = PendingWrites::new(pending_encoder);
// Create zeroed buffer used for texture clears.
let zero_buffer = unsafe {
@ -250,24 +220,6 @@ impl Device {
})
}
.map_err(DeviceError::from_hal)?;
pending_writes.activate();
unsafe {
pending_writes
.command_encoder
.transition_buffers(&[hal::BufferBarrier {
buffer: zero_buffer.as_ref(),
usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
}]);
pending_writes
.command_encoder
.clear_buffer(zero_buffer.as_ref(), 0..ZERO_BUFFER_SIZE);
pending_writes
.command_encoder
.transition_buffers(&[hal::BufferBarrier {
buffer: zero_buffer.as_ref(),
usage: hal::BufferUses::COPY_DST..hal::BufferUses::COPY_SRC,
}]);
}
let alignments = adapter.raw.capabilities.alignments.clone();
let downlevel = adapter.raw.capabilities.downlevel.clone();
@ -292,10 +244,9 @@ impl Device {
};
Ok(Self {
raw: ManuallyDrop::new(raw_device),
raw: raw_device,
adapter: adapter.clone(),
queue: OnceLock::new(),
queue_to_drop: OnceLock::new(),
zero_buffer: ManuallyDrop::new(zero_buffer),
label: desc.label.to_string(),
command_allocator,
@ -304,9 +255,9 @@ impl Device {
fence: RwLock::new(rank::DEVICE_FENCE, ManuallyDrop::new(fence)),
snatchable_lock: unsafe { SnatchLock::new(rank::DEVICE_SNATCHABLE_LOCK) },
valid: AtomicBool::new(true),
device_lost_closure: Mutex::new(rank::DEVICE_LOST_CLOSURE, None),
trackers: Mutex::new(rank::DEVICE_TRACKERS, DeviceTracker::new()),
tracker_indices: TrackerIndexAllocators::new(),
life_tracker: Mutex::new(rank::DEVICE_LIFE_TRACKER, LifetimeTracker::new()),
bgl_pool: ResourcePool::new(),
#[cfg(feature = "trace")]
trace: Mutex::new(
@ -330,10 +281,6 @@ impl Device {
features: desc.required_features,
downlevel,
instance_flags,
pending_writes: Mutex::new(
rank::DEVICE_PENDING_WRITES,
ManuallyDrop::new(pending_writes),
),
deferred_destroy: Mutex::new(rank::DEVICE_DEFERRED_DESTROY, Vec::new()),
usage_scopes: Mutex::new(rank::DEVICE_USAGE_SCOPES, Default::default()),
// By starting at one, we can put the result in a NonZeroU64.
@ -372,15 +319,6 @@ impl Device {
DeviceError::from_hal(error)
}
pub(crate) fn release_queue(&self, queue: Box<dyn hal::DynQueue>) {
assert!(self.queue_to_drop.set(queue).is_ok());
}
#[track_caller]
pub(crate) fn lock_life<'a>(&'a self) -> MutexGuard<'a, LifetimeTracker> {
self.life_tracker.lock()
}
/// Run some destroy operations that were deferred.
///
/// Destroying the resources requires taking a write lock on the device's snatch lock,
@ -493,13 +431,12 @@ impl Device {
.map_err(|e| self.handle_hal_error(e))?;
}
let mut life_tracker = self.lock_life();
let submission_closures =
life_tracker.triage_submissions(submission_index, &self.command_allocator);
let mapping_closures = life_tracker.handle_mapping(self.raw(), &snatch_guard);
let queue_empty = life_tracker.queue_empty();
let (submission_closures, mapping_closures, queue_empty) =
if let Some(queue) = self.get_queue() {
queue.maintain(submission_index, &snatch_guard)
} else {
(SmallVec::new(), Vec::new(), true)
};
// Detect if we have been destroyed and now need to lose the device.
// If we are invalid (set at start of destroy) and our queue is empty,
@ -515,9 +452,9 @@ impl Device {
// If we have a DeviceLostClosure, build an invocation with the
// reason DeviceLostReason::Destroyed and no message.
if life_tracker.device_lost_closure.is_some() {
if let Some(device_lost_closure) = self.device_lost_closure.lock().take() {
device_lost_invocations.push(DeviceLostInvocation {
closure: life_tracker.device_lost_closure.take().unwrap(),
closure: device_lost_closure,
reason: DeviceLostReason::Destroyed,
message: String::new(),
});
@ -525,7 +462,6 @@ impl Device {
}
// Don't hold the locks while calling release_gpu_resources.
drop(life_tracker);
drop(fence);
drop(snatch_guard);
@ -666,14 +602,7 @@ impl Device {
}
} else {
let snatch_guard: SnatchGuard = self.snatchable_lock.read();
map_buffer(
self.raw(),
&buffer,
0,
map_size,
HostMap::Write,
&snatch_guard,
)?
map_buffer(&buffer, 0, map_size, HostMap::Write, &snatch_guard)?
};
*buffer.map_state.lock() = resource::BufferMapState::Active {
mapping,
@ -2712,18 +2641,18 @@ impl Device {
derived_group_layouts.pop();
}
let mut unique_bind_group_layouts = PreHashedMap::default();
let mut unique_bind_group_layouts = FastHashMap::default();
let bind_group_layouts = derived_group_layouts
.into_iter()
.map(|mut bgl_entry_map| {
bgl_entry_map.sort();
match unique_bind_group_layouts.entry(PreHashedKey::from_key(&bgl_entry_map)) {
match unique_bind_group_layouts.entry(bgl_entry_map) {
std::collections::hash_map::Entry::Occupied(v) => Ok(Arc::clone(v.get())),
std::collections::hash_map::Entry::Vacant(e) => {
match self.create_bind_group_layout(
&None,
bgl_entry_map,
e.key().clone(),
bgl::Origin::Derived,
) {
Ok(bgl) => {
@ -3609,13 +3538,15 @@ impl Device {
unsafe { self.raw().wait(fence.as_ref(), submission_index, !0) }
.map_err(|e| self.handle_hal_error(e))?;
drop(fence);
let closures = self
.lock_life()
.triage_submissions(submission_index, &self.command_allocator);
assert!(
closures.is_empty(),
"wait_for_submit is not expected to work with closures"
);
if let Some(queue) = self.get_queue() {
let closures = queue
.lock_life()
.triage_submissions(submission_index, &self.command_allocator);
assert!(
closures.is_empty(),
"wait_for_submit is not expected to work with closures"
);
}
}
Ok(())
}
@ -3675,13 +3606,7 @@ impl Device {
self.valid.store(false, Ordering::Release);
// 1) Resolve the GPUDevice device.lost promise.
let mut life_lock = self.lock_life();
let closure = life_lock.device_lost_closure.take();
// It's important to not hold the lock while calling the closure and while calling
// release_gpu_resources which may take the lock again.
drop(life_lock);
if let Some(device_lost_closure) = closure {
if let Some(device_lost_closure) = self.device_lost_closure.lock().take() {
device_lost_closure.call(DeviceLostReason::Unknown, message.to_string());
}
@ -3733,34 +3658,6 @@ impl Device {
}
}
impl Device {
/// Wait for idle and remove resources that we can, before we die.
pub(crate) fn prepare_to_die(&self) {
self.pending_writes.lock().deactivate();
let current_index = self
.last_successful_submission_index
.load(Ordering::Acquire);
if let Err(error) = unsafe {
let fence = self.fence.read();
self.raw()
.wait(fence.as_ref(), current_index, CLEANUP_WAIT_MS)
} {
log::error!("failed to wait for the device: {error}");
}
let mut life_tracker = self.lock_life();
let _ = life_tracker.triage_submissions(current_index, &self.command_allocator);
if let Some(device_lost_closure) = life_tracker.device_lost_closure.take() {
// It's important to not hold the lock while calling the closure.
drop(life_tracker);
device_lost_closure.call(DeviceLostReason::Dropped, "Device is dying.".to_string());
}
#[cfg(feature = "trace")]
{
*self.trace.lock() = None;
}
}
}
crate::impl_resource_type!(Device);
crate::impl_labeled!(Device);
crate::impl_storage_item!(Device);

View File

@ -89,10 +89,6 @@ impl Drop for Global {
fn drop(&mut self) {
profiling::scope!("Global::drop");
resource_log!("Global::drop");
for (_, device) in self.hub.devices.read().iter() {
device.prepare_to_die();
}
}
}

View File

@ -12,75 +12,3 @@ pub type FastHashSet<K> =
/// IndexMap using a fast, non-cryptographic hash algorithm.
pub type FastIndexMap<K, V> =
indexmap::IndexMap<K, V, std::hash::BuildHasherDefault<rustc_hash::FxHasher>>;
/// HashMap that uses pre-hashed keys and an identity hasher.
///
/// This is useful when you only need the key to lookup the value, and don't need to store the key,
/// particularly when the key is large.
pub type PreHashedMap<K, V> =
std::collections::HashMap<PreHashedKey<K>, V, std::hash::BuildHasherDefault<IdentityHasher>>;
/// A pre-hashed key using FxHash which allows the hashing operation to be disconnected
/// from the storage in the map.
pub struct PreHashedKey<K>(u64, std::marker::PhantomData<fn() -> K>);
impl<K> std::fmt::Debug for PreHashedKey<K> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("PreHashedKey").field(&self.0).finish()
}
}
impl<K> Copy for PreHashedKey<K> {}
impl<K> Clone for PreHashedKey<K> {
fn clone(&self) -> Self {
*self
}
}
impl<K> PartialEq for PreHashedKey<K> {
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl<K> Eq for PreHashedKey<K> {}
impl<K> std::hash::Hash for PreHashedKey<K> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl<K: std::hash::Hash> PreHashedKey<K> {
pub fn from_key(key: &K) -> Self {
use std::hash::Hasher;
let mut hasher = rustc_hash::FxHasher::default();
key.hash(&mut hasher);
Self(hasher.finish(), std::marker::PhantomData)
}
}
/// A hasher which does nothing. Useful for when you want to use a map with pre-hashed keys.
///
/// When hashing with this hasher, you must provide exactly 8 bytes. Multiple calls to `write`
/// will overwrite the previous value.
#[derive(Default)]
pub struct IdentityHasher {
hash: u64,
}
impl std::hash::Hasher for IdentityHasher {
fn write(&mut self, bytes: &[u8]) {
self.hash = u64::from_ne_bytes(
bytes
.try_into()
.expect("identity hasher must be given exactly 8 bytes"),
);
}
fn finish(&self) -> u64 {
self.hash
}
}

View File

@ -573,18 +573,14 @@ impl Adapter {
) -> Result<(Arc<Device>, Arc<Queue>), RequestDeviceError> {
api_log!("Adapter::create_device");
let device = Device::new(
hal_device.device,
hal_device.queue.as_ref(),
self,
desc,
trace_path,
instance_flags,
)?;
let device = Device::new(hal_device.device, self, desc, trace_path, instance_flags)?;
let device = Arc::new(device);
let queue = Arc::new(Queue::new(device.clone(), hal_device.queue));
let queue = Queue::new(device.clone(), hal_device.queue)?;
let queue = Arc::new(queue);
device.set_queue(&queue);
Ok((device, queue))
}

View File

@ -111,16 +111,16 @@ define_lock_ranks! {
// COMMAND_BUFFER_DATA,
}
rank BUFFER_MAP_STATE "Buffer::map_state" followed by {
DEVICE_PENDING_WRITES,
QUEUE_PENDING_WRITES,
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
DEVICE_TRACE,
}
rank DEVICE_PENDING_WRITES "Device::pending_writes" followed by {
rank QUEUE_PENDING_WRITES "Queue::pending_writes" followed by {
COMMAND_ALLOCATOR_FREE_ENCODERS,
SHARED_TRACKER_INDEX_ALLOCATOR_INNER,
DEVICE_LIFE_TRACKER,
QUEUE_LIFE_TRACKER,
}
rank DEVICE_LIFE_TRACKER "Device::life_tracker" followed by {
rank QUEUE_LIFE_TRACKER "Queue::life_tracker" followed by {
COMMAND_ALLOCATOR_FREE_ENCODERS,
DEVICE_TRACE,
}
@ -135,6 +135,7 @@ define_lock_ranks! {
#[allow(dead_code)]
rank DEVICE_TRACE "Device::trace" followed by { }
rank DEVICE_TRACKERS "Device::trackers" followed by { }
rank DEVICE_LOST_CLOSURE "Device::device_lost_closure" followed by { }
rank DEVICE_USAGE_SCOPES "Device::usage_scopes" followed by { }
rank IDENTITY_MANAGER_VALUES "IdentityManager::values" followed by { }
rank REGISTRY_STORAGE "Registry::storage" followed by { }

View File

@ -7,16 +7,13 @@ use std::{
use once_cell::sync::OnceCell;
use crate::lock::{rank, Mutex};
use crate::{PreHashedKey, PreHashedMap};
use crate::FastHashMap;
type SlotInner<V> = Weak<V>;
type ResourcePoolSlot<V> = Arc<OnceCell<SlotInner<V>>>;
pub struct ResourcePool<K, V> {
// We use a pre-hashed map as we never actually need to read the keys.
//
// This additionally allows us to not need to hash more than once on get_or_init.
inner: Mutex<PreHashedMap<K, ResourcePoolSlot<V>>>,
inner: Mutex<FastHashMap<K, ResourcePoolSlot<V>>>,
}
impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
@ -35,9 +32,6 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
where
F: FnOnce(K) -> Result<Arc<V>, E>,
{
// Hash the key outside of the lock.
let hashed_key = PreHashedKey::from_key(&key);
// We can't prove at compile time that these will only ever be consumed once,
// so we need to do the check at runtime.
let mut key = Some(key);
@ -46,7 +40,7 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
'race: loop {
let mut map_guard = self.inner.lock();
let entry = match map_guard.entry(hashed_key) {
let entry = match map_guard.entry(key.clone().unwrap()) {
// An entry exists for this resource.
//
// We know that either:
@ -86,9 +80,11 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
return Ok(strong);
}
// The resource is in the process of being dropped, because upgrade failed. The entry still exists in the map, but it points to nothing.
// The resource is in the process of being dropped, because upgrade failed.
// The entry still exists in the map, but it points to nothing.
//
// We're in a race with the drop implementation of the resource, so lets just go around again. When we go around again:
// We're in a race with the drop implementation of the resource,
// so lets just go around again. When we go around again:
// - If the entry exists, we might need to go around a few more times.
// - If the entry doesn't exist, we'll create a new one.
continue 'race;
@ -101,13 +97,11 @@ impl<K: Clone + Eq + Hash, V> ResourcePool<K, V> {
///
/// [`BindGroupLayout`]: crate::binding_model::BindGroupLayout
pub fn remove(&self, key: &K) {
let hashed_key = PreHashedKey::from_key(key);
let mut map_guard = self.inner.lock();
// Weak::upgrade will be failing long before this code is called. All threads trying to access the resource will be spinning,
// waiting for the entry to be removed. It is safe to remove the entry from the map.
map_guard.remove(&hashed_key);
map_guard.remove(key);
}
}

View File

@ -13,7 +13,7 @@ use crate::{
id::{BlasId, BufferId, TlasId},
resource::CreateBufferError,
};
use std::sync::Arc;
use std::{mem::size_of, sync::Arc};
use std::{num::NonZeroU64, slice};
use crate::resource::{Blas, ResourceErrorIdent, Tlas};
@ -325,11 +325,8 @@ pub(crate) fn tlas_instance_into_bytes(
};
let temp: *const _ = &temp;
unsafe {
slice::from_raw_parts::<u8>(
temp.cast::<u8>(),
std::mem::size_of::<RawTlasInstance>(),
)
.to_vec()
slice::from_raw_parts::<u8>(temp.cast::<u8>(), size_of::<RawTlasInstance>())
.to_vec()
}
}
_ => unimplemented!(),

View File

@ -634,11 +634,76 @@ impl Buffer {
.buffers
.set_single(self, internal_use);
let submit_index = device.lock_life().map(self).unwrap_or(0); // '0' means no wait is necessary
let submit_index = if let Some(queue) = device.get_queue() {
queue.lock_life().map(self).unwrap_or(0) // '0' means no wait is necessary
} else {
// We can safely unwrap below since we just set the `map_state` to `BufferMapState::Waiting`.
let (mut operation, status) = self.map(&device.snatchable_lock.read()).unwrap();
if let Some(callback) = operation.callback.take() {
callback.call(status);
}
0
};
Ok(submit_index)
}
/// This function returns [`None`] only if [`Self::map_state`] is not [`BufferMapState::Waiting`].
#[must_use]
pub(crate) fn map(&self, snatch_guard: &SnatchGuard) -> Option<BufferMapPendingClosure> {
// This _cannot_ be inlined into the match. If it is, the lock will be held
// open through the whole match, resulting in a deadlock when we try to re-lock
// the buffer back to active.
let mapping = mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle);
let pending_mapping = match mapping {
BufferMapState::Waiting(pending_mapping) => pending_mapping,
// Mapping cancelled
BufferMapState::Idle => return None,
// Mapping queued at least twice by map -> unmap -> map
// and was already successfully mapped below
BufferMapState::Active { .. } => {
*self.map_state.lock() = mapping;
return None;
}
_ => panic!("No pending mapping."),
};
let status = if pending_mapping.range.start != pending_mapping.range.end {
let host = pending_mapping.op.host;
let size = pending_mapping.range.end - pending_mapping.range.start;
match crate::device::map_buffer(
self,
pending_mapping.range.start,
size,
host,
snatch_guard,
) {
Ok(mapping) => {
*self.map_state.lock() = BufferMapState::Active {
mapping,
range: pending_mapping.range.clone(),
host,
};
Ok(())
}
Err(e) => {
log::error!("Mapping failed: {e}");
Err(e)
}
}
} else {
*self.map_state.lock() = BufferMapState::Active {
mapping: hal::BufferMapping {
ptr: NonNull::dangling(),
is_coherent: true,
},
range: pending_mapping.range,
host: pending_mapping.op.host,
};
Ok(())
};
Some((pending_mapping.op, status))
}
// Note: This must not be called while holding a lock.
pub(crate) fn unmap(
self: &Arc<Self>,
@ -676,36 +741,37 @@ impl Buffer {
});
}
let mut pending_writes = device.pending_writes.lock();
let staging_buffer = staging_buffer.flush();
let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
src_offset: 0,
dst_offset: 0,
size,
});
let transition_src = hal::BufferBarrier {
buffer: staging_buffer.raw(),
usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
};
let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
buffer: raw_buf,
usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
};
let encoder = pending_writes.activate();
unsafe {
encoder.transition_buffers(&[transition_src, transition_dst]);
if self.size > 0 {
encoder.copy_buffer_to_buffer(
staging_buffer.raw(),
raw_buf,
region.as_slice(),
);
if let Some(queue) = device.get_queue() {
let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
src_offset: 0,
dst_offset: 0,
size,
});
let transition_src = hal::BufferBarrier {
buffer: staging_buffer.raw(),
usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC,
};
let transition_dst = hal::BufferBarrier::<dyn hal::DynBuffer> {
buffer: raw_buf,
usage: hal::BufferUses::empty()..hal::BufferUses::COPY_DST,
};
let mut pending_writes = queue.pending_writes.lock();
let encoder = pending_writes.activate();
unsafe {
encoder.transition_buffers(&[transition_src, transition_dst]);
if self.size > 0 {
encoder.copy_buffer_to_buffer(
staging_buffer.raw(),
raw_buf,
region.as_slice(),
);
}
}
pending_writes.consume(staging_buffer);
pending_writes.insert_buffer(self);
}
pending_writes.consume(staging_buffer);
pending_writes.insert_buffer(self);
}
BufferMapState::Idle => {
return Err(BufferAccessError::NotMapped);
@ -778,14 +844,16 @@ impl Buffer {
})
};
let mut pending_writes = device.pending_writes.lock();
if pending_writes.contains_buffer(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = device.lock_life();
let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
if let Some(queue) = device.get_queue() {
let mut pending_writes = queue.pending_writes.lock();
if pending_writes.contains_buffer(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = queue.lock_life();
let last_submit_index = life_lock.get_buffer_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
}
}
}
@ -1244,14 +1312,16 @@ impl Texture {
})
};
let mut pending_writes = device.pending_writes.lock();
if pending_writes.contains_texture(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = device.lock_life();
let last_submit_index = life_lock.get_texture_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
if let Some(queue) = device.get_queue() {
let mut pending_writes = queue.pending_writes.lock();
if pending_writes.contains_texture(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = queue.lock_life();
let last_submit_index = life_lock.get_texture_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
}
}
}
@ -1960,14 +2030,16 @@ impl Blas {
})
};
let mut pending_writes = device.pending_writes.lock();
if pending_writes.contains_blas(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = device.lock_life();
let last_submit_index = life_lock.get_blas_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
if let Some(queue) = device.get_queue() {
let mut pending_writes = queue.pending_writes.lock();
if pending_writes.contains_blas(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = queue.lock_life();
let last_submit_index = life_lock.get_blas_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
}
}
}
@ -2047,14 +2119,16 @@ impl Tlas {
})
};
let mut pending_writes = device.pending_writes.lock();
if pending_writes.contains_tlas(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = device.lock_life();
let last_submit_index = life_lock.get_tlas_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
if let Some(queue) = device.get_queue() {
let mut pending_writes = queue.pending_writes.lock();
if pending_writes.contains_tlas(self) {
pending_writes.consume_temp(temp);
} else {
let mut life_lock = queue.lock_life();
let last_submit_index = life_lock.get_tlas_latest_submission_index(self);
if let Some(last_submit_index) = last_submit_index {
life_lock.schedule_resource_destruction(temp, last_submit_index);
}
}
}

View File

@ -21,7 +21,7 @@ impl ScratchBuffer {
usage: BufferUses::ACCELERATION_STRUCTURE_SCRATCH,
memory_flags: hal::MemoryFlags::empty(),
})
.map_err(crate::device::DeviceError::from_hal)?
.map_err(DeviceError::from_hal)?
};
Ok(Self {
raw: ManuallyDrop::new(raw),

View File

@ -581,7 +581,8 @@ impl<A: hal::Api> Example<A> {
self.device.destroy_pipeline_layout(self.pipeline_layout);
self.surface.unconfigure(&self.device);
self.device.exit(self.queue);
drop(self.queue);
drop(self.device);
drop(self.surface);
drop(self.adapter);
}

View File

@ -1069,7 +1069,8 @@ impl<A: hal::Api> Example<A> {
self.device.destroy_shader_module(self.shader_module);
self.surface.unconfigure(&self.device);
self.device.exit(self.queue);
drop(self.queue);
drop(self.device);
drop(self.surface);
drop(self.adapter);
}

View File

@ -391,10 +391,6 @@ impl super::Device {
impl crate::Device for super::Device {
type A = super::Api;
unsafe fn exit(self, _queue: super::Queue) {
self.rtv_pool.lock().free_handle(self.null_rtv_handle);
}
unsafe fn create_buffer(
&self,
desc: &crate::BufferDescriptor,

View File

@ -602,6 +602,12 @@ pub struct Device {
counters: wgt::HalCounters,
}
impl Drop for Device {
fn drop(&mut self) {
self.rtv_pool.lock().free_handle(self.null_rtv_handle);
}
}
unsafe impl Send for Device {}
unsafe impl Sync for Device {}

View File

@ -16,8 +16,6 @@ use super::{
};
pub trait DynDevice: DynResource {
unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>);
unsafe fn create_buffer(
&self,
desc: &BufferDescriptor,
@ -166,10 +164,6 @@ pub trait DynDevice: DynResource {
}
impl<D: Device + DynResource> DynDevice for D {
unsafe fn exit(self: Box<Self>, queue: Box<dyn DynQueue>) {
unsafe { D::exit(*self, queue.unbox()) }
}
unsafe fn create_buffer(
&self,
desc: &BufferDescriptor,

View File

@ -163,7 +163,6 @@ impl crate::Queue for Context {
impl crate::Device for Context {
type A = Api;
unsafe fn exit(self, queue: Context) {}
unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<Resource> {
Ok(Resource)
}

View File

@ -501,14 +501,6 @@ impl super::Device {
impl crate::Device for super::Device {
type A = super::Api;
unsafe fn exit(self, queue: super::Queue) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_vertex_array(self.main_vao) };
unsafe { gl.delete_framebuffer(queue.draw_fbo) };
unsafe { gl.delete_framebuffer(queue.copy_fbo) };
unsafe { gl.delete_buffer(queue.zero_buffer) };
}
unsafe fn create_buffer(
&self,
desc: &crate::BufferDescriptor,
@ -1563,6 +1555,10 @@ impl crate::Device for super::Device {
) -> Result<bool, crate::DeviceError> {
if fence.last_completed.load(Ordering::Relaxed) < wait_value {
let gl = &self.shared.context.lock();
// MAX_CLIENT_WAIT_TIMEOUT_WEBGL is:
// - 1s in Gecko https://searchfox.org/mozilla-central/rev/754074e05178e017ef6c3d8e30428ffa8f1b794d/dom/canvas/WebGLTypes.h#1386
// - 0 in WebKit https://github.com/WebKit/WebKit/blob/4ef90d4672ca50267c0971b85db403d9684508ea/Source/WebCore/html/canvas/WebGL2RenderingContext.cpp#L110
// - 0 in Chromium https://source.chromium.org/chromium/chromium/src/+/main:third_party/blink/renderer/modules/webgl/webgl2_rendering_context_base.cc;l=112;drc=a3cb0ac4c71ec04abfeaed199e5d63230eca2551
let timeout_ns = if cfg!(any(webgl, Emscripten)) {
0
} else {

View File

@ -295,6 +295,13 @@ pub struct Device {
counters: wgt::HalCounters,
}
impl Drop for Device {
fn drop(&mut self) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_vertex_array(self.main_vao) };
}
}
pub struct ShaderClearProgram {
pub program: glow::Program,
pub color_uniform_location: glow::UniformLocation,
@ -316,6 +323,15 @@ pub struct Queue {
current_index_buffer: Mutex<Option<glow::Buffer>>,
}
impl Drop for Queue {
fn drop(&mut self) {
let gl = &self.shared.context.lock();
unsafe { gl.delete_framebuffer(self.draw_fbo) };
unsafe { gl.delete_framebuffer(self.copy_fbo) };
unsafe { gl.delete_buffer(self.zero_buffer) };
}
}
#[derive(Clone, Debug)]
pub struct Buffer {
raw: Option<glow::Buffer>,

View File

@ -676,7 +676,7 @@ pub trait Adapter: WasmNotSendSync {
/// 1) Free resources with methods like [`Device::destroy_texture`] or
/// [`Device::destroy_shader_module`].
///
/// 1) Shut down the device by calling [`Device::exit`].
/// 1) Drop the device.
///
/// [`vkDevice`]: https://registry.khronos.org/vulkan/specs/1.3-extensions/html/vkspec.html#VkDevice
/// [`ID3D12Device`]: https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nn-d3d12-id3d12device
@ -706,8 +706,6 @@ pub trait Adapter: WasmNotSendSync {
pub trait Device: WasmNotSendSync {
type A: Api;
/// Exit connection to this logical device.
unsafe fn exit(self, queue: <Self::A as Api>::Queue);
/// Creates a new buffer.
///
/// The initial usage is `BufferUses::empty()`.

View File

@ -321,8 +321,6 @@ impl super::Device {
impl crate::Device for super::Device {
type A = super::Api;
unsafe fn exit(self, _queue: super::Queue) {}
unsafe fn create_buffer(&self, desc: &crate::BufferDescriptor) -> DeviceResult<super::Buffer> {
let map_read = desc.usage.contains(crate::BufferUses::MAP_READ);
let map_write = desc.usage.contains(crate::BufferUses::MAP_WRITE);

View File

@ -289,18 +289,6 @@ impl super::DeviceShared {
.size((range.end - range.start + mask) & !mask)
}))
}
unsafe fn free_resources(&self) {
for &raw in self.render_passes.lock().values() {
unsafe { self.raw.destroy_render_pass(raw, None) };
}
for &raw in self.framebuffers.lock().values() {
unsafe { self.raw.destroy_framebuffer(raw, None) };
}
if self.drop_guard.is_none() {
unsafe { self.raw.destroy_device(None) };
}
}
}
impl gpu_alloc::MemoryDevice<vk::DeviceMemory> for super::DeviceShared {
@ -1023,18 +1011,6 @@ impl super::Device {
impl crate::Device for super::Device {
type A = super::Api;
unsafe fn exit(self, queue: super::Queue) {
unsafe { self.mem_allocator.into_inner().cleanup(&*self.shared) };
unsafe { self.desc_allocator.into_inner().cleanup(&*self.shared) };
unsafe {
queue
.relay_semaphores
.into_inner()
.destroy(&self.shared.raw)
};
unsafe { self.shared.free_resources() };
}
unsafe fn create_buffer(
&self,
desc: &crate::BufferDescriptor,

View File

@ -646,6 +646,20 @@ struct DeviceShared {
memory_allocations_counter: InternalCounter,
}
impl Drop for DeviceShared {
fn drop(&mut self) {
for &raw in self.render_passes.lock().values() {
unsafe { self.raw.destroy_render_pass(raw, None) };
}
for &raw in self.framebuffers.lock().values() {
unsafe { self.raw.destroy_framebuffer(raw, None) };
}
if self.drop_guard.is_none() {
unsafe { self.raw.destroy_device(None) };
}
}
}
pub struct Device {
shared: Arc<DeviceShared>,
mem_allocator: Mutex<gpu_alloc::GpuAllocator<vk::DeviceMemory>>,
@ -658,6 +672,13 @@ pub struct Device {
counters: wgt::HalCounters,
}
impl Drop for Device {
fn drop(&mut self) {
unsafe { self.mem_allocator.lock().cleanup(&*self.shared) };
unsafe { self.desc_allocator.lock().cleanup(&*self.shared) };
}
}
/// Semaphores for forcing queue submissions to run in order.
///
/// The [`wgpu_hal::Queue`] trait promises that if two calls to [`submit`] are
@ -741,6 +762,12 @@ pub struct Queue {
relay_semaphores: Mutex<RelaySemaphores>,
}
impl Drop for Queue {
fn drop(&mut self) {
unsafe { self.relay_semaphores.lock().destroy(&self.device.raw) };
}
}
#[derive(Debug)]
pub struct Buffer {
raw: vk::Buffer,

View File

@ -1350,9 +1350,6 @@ impl crate::Context for ContextWgpuCore {
fn device_drop(&self, device_data: &Self::DeviceData) {
#[cfg(any(native, Emscripten))]
{
// Call device_poll, but don't check for errors. We have to use its
// return value, but we just drop it.
let _ = self.0.device_poll(device_data.id, wgt::Maintain::wait());
self.0.device_drop(device_data.id);
}
}