make the StagingBuffer implementation more robust

This commit is contained in:
teoxoy 2024-07-12 14:38:38 +02:00 committed by Teodor Tanasoaia
parent 347d902bcb
commit 6f16ea460a
5 changed files with 89 additions and 62 deletions

View File

@ -2519,7 +2519,7 @@ impl Global {
}
let map_state = &*buffer.map_state.lock();
match *map_state {
resource::BufferMapState::Init { ref ptr, .. } => {
resource::BufferMapState::Init { ref staging_buffer } => {
// offset (u64) can not be < 0, so no need to validate the lower bound
if offset + range_size > buffer.size {
return Err(BufferAccessError::OutOfBoundsOverrun {
@ -2527,12 +2527,9 @@ impl Global {
max: buffer.size,
});
}
unsafe {
Ok((
NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)),
range_size,
))
}
let ptr = unsafe { staging_buffer.ptr() };
let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
Ok((ptr, range_size))
}
resource::BufferMapState::Active {
ref ptr, ref range, ..

View File

@ -31,7 +31,7 @@ use smallvec::SmallVec;
use std::{
iter,
mem::{self},
ptr::{self, NonNull},
ptr::NonNull,
sync::{atomic::Ordering, Arc},
};
use thiserror::Error;
@ -405,17 +405,13 @@ impl Global {
// Platform validation requires that the staging buffer always be
// freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, data_size)?;
let mut staging_buffer = StagingBuffer::new(device, data_size)?;
let mut pending_writes = device.pending_writes.lock();
let pending_writes = pending_writes.as_mut().unwrap();
let staging_buffer = unsafe {
let staging_buffer = {
profiling::scope!("copy");
ptr::copy_nonoverlapping(
data.as_ptr(),
staging_buffer_ptr.as_ptr(),
data_size.get() as usize,
);
staging_buffer.write(data);
staging_buffer.flush()
};
@ -448,13 +444,14 @@ impl Global {
let device = &queue.device;
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, buffer_size)?;
let staging_buffer = StagingBuffer::new(device, buffer_size)?;
let ptr = unsafe { staging_buffer.ptr() };
let fid = hub.staging_buffers.prepare(id_in);
let id = fid.assign(Arc::new(staging_buffer));
resource_log!("Queue::create_staging_buffer {id:?}");
Ok((id, staging_buffer_ptr))
Ok((id, ptr))
}
pub fn queue_write_staging_buffer<A: HalApi>(
@ -487,7 +484,7 @@ impl Global {
// user. Platform validation requires that the staging buffer always
// be freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let staging_buffer = unsafe { staging_buffer.flush() };
let staging_buffer = staging_buffer.flush();
let result = self.queue_write_staging_buffer_impl(
&queue,
@ -779,42 +776,34 @@ impl Global {
// Platform validation requires that the staging buffer always be
// freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, stage_size)?;
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
if stage_bytes_per_row == bytes_per_row {
profiling::scope!("copy aligned");
// Fast path if the data is already being aligned optimally.
unsafe {
ptr::copy_nonoverlapping(
data.as_ptr().offset(data_layout.offset as isize),
staging_buffer_ptr.as_ptr(),
stage_size.get() as usize,
);
}
staging_buffer.write(&data[data_layout.offset as usize..]);
} else {
profiling::scope!("copy chunked");
// Copy row by row into the optimal alignment.
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
for layer in 0..size.depth_or_array_layers {
let rows_offset = layer * block_rows_per_image;
for row in 0..height_blocks {
for row in rows_offset..rows_offset + height_blocks {
let src_offset = data_layout.offset as u32 + row * bytes_per_row;
let dst_offset = row * stage_bytes_per_row;
unsafe {
ptr::copy_nonoverlapping(
data.as_ptr().offset(
data_layout.offset as isize
+ (rows_offset + row) as isize * bytes_per_row as isize,
),
staging_buffer_ptr.as_ptr().offset(
(rows_offset + row) as isize * stage_bytes_per_row as isize,
),
staging_buffer.write_with_offset(
data,
src_offset as isize,
dst_offset as isize,
copy_bytes_per_row,
);
)
}
}
}
}
let staging_buffer = unsafe { staging_buffer.flush() };
let staging_buffer = staging_buffer.flush();
let regions = (0..array_layer_count).map(|rel_array_layer| {
let mut texture_base = dst_base.clone();

View File

@ -591,18 +591,15 @@ impl<A: HalApi> Device<A> {
};
hal::BufferUses::MAP_WRITE
} else {
let (staging_buffer, staging_buffer_ptr) =
let mut staging_buffer =
StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
// Zero initialize memory and then mark the buffer as initialized
// (it's guaranteed that this is the case by the time the buffer is usable)
unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, aligned_size as usize) };
staging_buffer.write_zeros();
buffer.initialization_status.write().drain(0..aligned_size);
*buffer.map_state.lock() = resource::BufferMapState::Init {
staging_buffer,
ptr: staging_buffer_ptr,
};
*buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
hal::BufferUses::COPY_DST
};

View File

@ -256,10 +256,7 @@ pub enum BufferMapAsyncStatus {
#[derive(Debug)]
pub(crate) enum BufferMapState<A: HalApi> {
/// Mapped at creation.
Init {
staging_buffer: StagingBuffer<A>,
ptr: NonNull<u8>,
},
Init { staging_buffer: StagingBuffer<A> },
/// Waiting for GPU to be done before mapping
Waiting(BufferPendingMapping<A>),
/// Mapped
@ -651,15 +648,10 @@ impl<A: HalApi> Buffer<A> {
let raw_buf = self.try_raw(&snatch_guard)?;
log::debug!("{} map state -> Idle", self.error_ident());
match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
BufferMapState::Init {
staging_buffer,
ptr,
} => {
BufferMapState::Init { staging_buffer } => {
#[cfg(feature = "trace")]
if let Some(ref mut trace) = *device.trace.lock() {
let data = trace.make_binary("bin", unsafe {
std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
});
let data = trace.make_binary("bin", staging_buffer.get_data());
trace.add(trace::Action::WriteBuffer {
id: buffer_id,
data,
@ -667,12 +659,11 @@ impl<A: HalApi> Buffer<A> {
queued: true,
});
}
let _ = ptr;
let mut pending_writes = device.pending_writes.lock();
let pending_writes = pending_writes.as_mut().unwrap();
let staging_buffer = unsafe { staging_buffer.flush() };
let staging_buffer = staging_buffer.flush();
self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
@ -832,6 +823,11 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
}
}
#[cfg(send_sync)]
unsafe impl<A: HalApi> Send for StagingBuffer<A> {}
#[cfg(send_sync)]
unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
/// A temporary buffer, consumed by the command that uses it.
///
/// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
@ -857,13 +853,11 @@ pub struct StagingBuffer<A: HalApi> {
device: Arc<Device<A>>,
pub(crate) size: wgt::BufferSize,
is_coherent: bool,
ptr: NonNull<u8>,
}
impl<A: HalApi> StagingBuffer<A> {
pub(crate) fn new(
device: &Arc<Device<A>>,
size: wgt::BufferSize,
) -> Result<(Self, NonNull<u8>), DeviceError> {
pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
use hal::Device;
profiling::scope!("StagingBuffer::new");
let stage_desc = hal::BufferDescriptor {
@ -881,9 +875,55 @@ impl<A: HalApi> StagingBuffer<A> {
device: device.clone(),
size,
is_coherent: mapping.is_coherent,
ptr: mapping.ptr,
};
Ok((staging_buffer, mapping.ptr))
Ok(staging_buffer)
}
/// SAFETY: You must not call any functions of `self`
/// until you stopped using the returned pointer.
pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
self.ptr
}
#[cfg(feature = "trace")]
pub(crate) fn get_data(&self) -> &[u8] {
unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
}
pub(crate) fn write_zeros(&mut self) {
unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
}
pub(crate) fn write(&mut self, data: &[u8]) {
assert!(data.len() >= self.size.get() as usize);
// SAFETY: With the assert above, all of `copy_nonoverlapping`'s
// requirements are satisfied.
unsafe {
core::ptr::copy_nonoverlapping(
data.as_ptr(),
self.ptr.as_ptr(),
self.size.get() as usize,
);
}
}
/// SAFETY: The offsets and size must be in-bounds.
pub(crate) unsafe fn write_with_offset(
&mut self,
data: &[u8],
src_offset: isize,
dst_offset: isize,
size: usize,
) {
unsafe {
core::ptr::copy_nonoverlapping(
data.as_ptr().offset(src_offset),
self.ptr.as_ptr().offset(dst_offset),
size,
);
}
}
pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {

View File

@ -714,9 +714,13 @@ pub trait Device: WasmNotSendSync {
/// be ordered, so it is meaningful to talk about what must occur
/// "between" them.
///
/// - Zero-sized mappings are not allowed.
///
/// - The returned [`BufferMapping::ptr`] must not be used after a call to
/// [`Device::unmap_buffer`].
///
/// [`MAP_READ`]: BufferUses::MAP_READ
/// [`MAP_WRITE`]: BufferUses::MAP_WRITE
//TODO: clarify if zero-sized mapping is allowed
unsafe fn map_buffer(
&self,
buffer: &<Self::A as Api>::Buffer,