mirror of
https://github.com/gfx-rs/wgpu.git
synced 2024-11-25 00:03:29 +00:00
make the StagingBuffer
implementation more robust
This commit is contained in:
parent
347d902bcb
commit
6f16ea460a
@ -2519,7 +2519,7 @@ impl Global {
|
||||
}
|
||||
let map_state = &*buffer.map_state.lock();
|
||||
match *map_state {
|
||||
resource::BufferMapState::Init { ref ptr, .. } => {
|
||||
resource::BufferMapState::Init { ref staging_buffer } => {
|
||||
// offset (u64) can not be < 0, so no need to validate the lower bound
|
||||
if offset + range_size > buffer.size {
|
||||
return Err(BufferAccessError::OutOfBoundsOverrun {
|
||||
@ -2527,12 +2527,9 @@ impl Global {
|
||||
max: buffer.size,
|
||||
});
|
||||
}
|
||||
unsafe {
|
||||
Ok((
|
||||
NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)),
|
||||
range_size,
|
||||
))
|
||||
}
|
||||
let ptr = unsafe { staging_buffer.ptr() };
|
||||
let ptr = unsafe { NonNull::new_unchecked(ptr.as_ptr().offset(offset as isize)) };
|
||||
Ok((ptr, range_size))
|
||||
}
|
||||
resource::BufferMapState::Active {
|
||||
ref ptr, ref range, ..
|
||||
|
@ -31,7 +31,7 @@ use smallvec::SmallVec;
|
||||
use std::{
|
||||
iter,
|
||||
mem::{self},
|
||||
ptr::{self, NonNull},
|
||||
ptr::NonNull,
|
||||
sync::{atomic::Ordering, Arc},
|
||||
};
|
||||
use thiserror::Error;
|
||||
@ -405,17 +405,13 @@ impl Global {
|
||||
// Platform validation requires that the staging buffer always be
|
||||
// freed, even if an error occurs. All paths from here must call
|
||||
// `device.pending_writes.consume`.
|
||||
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, data_size)?;
|
||||
let mut staging_buffer = StagingBuffer::new(device, data_size)?;
|
||||
let mut pending_writes = device.pending_writes.lock();
|
||||
let pending_writes = pending_writes.as_mut().unwrap();
|
||||
|
||||
let staging_buffer = unsafe {
|
||||
let staging_buffer = {
|
||||
profiling::scope!("copy");
|
||||
ptr::copy_nonoverlapping(
|
||||
data.as_ptr(),
|
||||
staging_buffer_ptr.as_ptr(),
|
||||
data_size.get() as usize,
|
||||
);
|
||||
staging_buffer.write(data);
|
||||
staging_buffer.flush()
|
||||
};
|
||||
|
||||
@ -448,13 +444,14 @@ impl Global {
|
||||
|
||||
let device = &queue.device;
|
||||
|
||||
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, buffer_size)?;
|
||||
let staging_buffer = StagingBuffer::new(device, buffer_size)?;
|
||||
let ptr = unsafe { staging_buffer.ptr() };
|
||||
|
||||
let fid = hub.staging_buffers.prepare(id_in);
|
||||
let id = fid.assign(Arc::new(staging_buffer));
|
||||
resource_log!("Queue::create_staging_buffer {id:?}");
|
||||
|
||||
Ok((id, staging_buffer_ptr))
|
||||
Ok((id, ptr))
|
||||
}
|
||||
|
||||
pub fn queue_write_staging_buffer<A: HalApi>(
|
||||
@ -487,7 +484,7 @@ impl Global {
|
||||
// user. Platform validation requires that the staging buffer always
|
||||
// be freed, even if an error occurs. All paths from here must call
|
||||
// `device.pending_writes.consume`.
|
||||
let staging_buffer = unsafe { staging_buffer.flush() };
|
||||
let staging_buffer = staging_buffer.flush();
|
||||
|
||||
let result = self.queue_write_staging_buffer_impl(
|
||||
&queue,
|
||||
@ -779,42 +776,34 @@ impl Global {
|
||||
// Platform validation requires that the staging buffer always be
|
||||
// freed, even if an error occurs. All paths from here must call
|
||||
// `device.pending_writes.consume`.
|
||||
let (staging_buffer, staging_buffer_ptr) = StagingBuffer::new(device, stage_size)?;
|
||||
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
|
||||
|
||||
if stage_bytes_per_row == bytes_per_row {
|
||||
profiling::scope!("copy aligned");
|
||||
// Fast path if the data is already being aligned optimally.
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
data.as_ptr().offset(data_layout.offset as isize),
|
||||
staging_buffer_ptr.as_ptr(),
|
||||
stage_size.get() as usize,
|
||||
);
|
||||
}
|
||||
staging_buffer.write(&data[data_layout.offset as usize..]);
|
||||
} else {
|
||||
profiling::scope!("copy chunked");
|
||||
// Copy row by row into the optimal alignment.
|
||||
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
|
||||
for layer in 0..size.depth_or_array_layers {
|
||||
let rows_offset = layer * block_rows_per_image;
|
||||
for row in 0..height_blocks {
|
||||
for row in rows_offset..rows_offset + height_blocks {
|
||||
let src_offset = data_layout.offset as u32 + row * bytes_per_row;
|
||||
let dst_offset = row * stage_bytes_per_row;
|
||||
unsafe {
|
||||
ptr::copy_nonoverlapping(
|
||||
data.as_ptr().offset(
|
||||
data_layout.offset as isize
|
||||
+ (rows_offset + row) as isize * bytes_per_row as isize,
|
||||
),
|
||||
staging_buffer_ptr.as_ptr().offset(
|
||||
(rows_offset + row) as isize * stage_bytes_per_row as isize,
|
||||
),
|
||||
staging_buffer.write_with_offset(
|
||||
data,
|
||||
src_offset as isize,
|
||||
dst_offset as isize,
|
||||
copy_bytes_per_row,
|
||||
);
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let staging_buffer = unsafe { staging_buffer.flush() };
|
||||
let staging_buffer = staging_buffer.flush();
|
||||
|
||||
let regions = (0..array_layer_count).map(|rel_array_layer| {
|
||||
let mut texture_base = dst_base.clone();
|
||||
|
@ -591,18 +591,15 @@ impl<A: HalApi> Device<A> {
|
||||
};
|
||||
hal::BufferUses::MAP_WRITE
|
||||
} else {
|
||||
let (staging_buffer, staging_buffer_ptr) =
|
||||
let mut staging_buffer =
|
||||
StagingBuffer::new(self, wgt::BufferSize::new(aligned_size).unwrap())?;
|
||||
|
||||
// Zero initialize memory and then mark the buffer as initialized
|
||||
// (it's guaranteed that this is the case by the time the buffer is usable)
|
||||
unsafe { std::ptr::write_bytes(staging_buffer_ptr.as_ptr(), 0, aligned_size as usize) };
|
||||
staging_buffer.write_zeros();
|
||||
buffer.initialization_status.write().drain(0..aligned_size);
|
||||
|
||||
*buffer.map_state.lock() = resource::BufferMapState::Init {
|
||||
staging_buffer,
|
||||
ptr: staging_buffer_ptr,
|
||||
};
|
||||
*buffer.map_state.lock() = resource::BufferMapState::Init { staging_buffer };
|
||||
hal::BufferUses::COPY_DST
|
||||
};
|
||||
|
||||
|
@ -256,10 +256,7 @@ pub enum BufferMapAsyncStatus {
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum BufferMapState<A: HalApi> {
|
||||
/// Mapped at creation.
|
||||
Init {
|
||||
staging_buffer: StagingBuffer<A>,
|
||||
ptr: NonNull<u8>,
|
||||
},
|
||||
Init { staging_buffer: StagingBuffer<A> },
|
||||
/// Waiting for GPU to be done before mapping
|
||||
Waiting(BufferPendingMapping<A>),
|
||||
/// Mapped
|
||||
@ -651,15 +648,10 @@ impl<A: HalApi> Buffer<A> {
|
||||
let raw_buf = self.try_raw(&snatch_guard)?;
|
||||
log::debug!("{} map state -> Idle", self.error_ident());
|
||||
match mem::replace(&mut *self.map_state.lock(), BufferMapState::Idle) {
|
||||
BufferMapState::Init {
|
||||
staging_buffer,
|
||||
ptr,
|
||||
} => {
|
||||
BufferMapState::Init { staging_buffer } => {
|
||||
#[cfg(feature = "trace")]
|
||||
if let Some(ref mut trace) = *device.trace.lock() {
|
||||
let data = trace.make_binary("bin", unsafe {
|
||||
std::slice::from_raw_parts(ptr.as_ptr(), self.size as usize)
|
||||
});
|
||||
let data = trace.make_binary("bin", staging_buffer.get_data());
|
||||
trace.add(trace::Action::WriteBuffer {
|
||||
id: buffer_id,
|
||||
data,
|
||||
@ -667,12 +659,11 @@ impl<A: HalApi> Buffer<A> {
|
||||
queued: true,
|
||||
});
|
||||
}
|
||||
let _ = ptr;
|
||||
|
||||
let mut pending_writes = device.pending_writes.lock();
|
||||
let pending_writes = pending_writes.as_mut().unwrap();
|
||||
|
||||
let staging_buffer = unsafe { staging_buffer.flush() };
|
||||
let staging_buffer = staging_buffer.flush();
|
||||
|
||||
self.use_at(device.active_submission_index.load(Ordering::Relaxed) + 1);
|
||||
let region = wgt::BufferSize::new(self.size).map(|size| hal::BufferCopy {
|
||||
@ -832,6 +823,11 @@ impl<A: HalApi> Drop for DestroyedBuffer<A> {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(send_sync)]
|
||||
unsafe impl<A: HalApi> Send for StagingBuffer<A> {}
|
||||
#[cfg(send_sync)]
|
||||
unsafe impl<A: HalApi> Sync for StagingBuffer<A> {}
|
||||
|
||||
/// A temporary buffer, consumed by the command that uses it.
|
||||
///
|
||||
/// A [`StagingBuffer`] is designed for one-shot uploads of data to the GPU. It
|
||||
@ -857,13 +853,11 @@ pub struct StagingBuffer<A: HalApi> {
|
||||
device: Arc<Device<A>>,
|
||||
pub(crate) size: wgt::BufferSize,
|
||||
is_coherent: bool,
|
||||
ptr: NonNull<u8>,
|
||||
}
|
||||
|
||||
impl<A: HalApi> StagingBuffer<A> {
|
||||
pub(crate) fn new(
|
||||
device: &Arc<Device<A>>,
|
||||
size: wgt::BufferSize,
|
||||
) -> Result<(Self, NonNull<u8>), DeviceError> {
|
||||
pub(crate) fn new(device: &Arc<Device<A>>, size: wgt::BufferSize) -> Result<Self, DeviceError> {
|
||||
use hal::Device;
|
||||
profiling::scope!("StagingBuffer::new");
|
||||
let stage_desc = hal::BufferDescriptor {
|
||||
@ -881,9 +875,55 @@ impl<A: HalApi> StagingBuffer<A> {
|
||||
device: device.clone(),
|
||||
size,
|
||||
is_coherent: mapping.is_coherent,
|
||||
ptr: mapping.ptr,
|
||||
};
|
||||
|
||||
Ok((staging_buffer, mapping.ptr))
|
||||
Ok(staging_buffer)
|
||||
}
|
||||
|
||||
/// SAFETY: You must not call any functions of `self`
|
||||
/// until you stopped using the returned pointer.
|
||||
pub(crate) unsafe fn ptr(&self) -> NonNull<u8> {
|
||||
self.ptr
|
||||
}
|
||||
|
||||
#[cfg(feature = "trace")]
|
||||
pub(crate) fn get_data(&self) -> &[u8] {
|
||||
unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size.get() as usize) }
|
||||
}
|
||||
|
||||
pub(crate) fn write_zeros(&mut self) {
|
||||
unsafe { core::ptr::write_bytes(self.ptr.as_ptr(), 0, self.size.get() as usize) };
|
||||
}
|
||||
|
||||
pub(crate) fn write(&mut self, data: &[u8]) {
|
||||
assert!(data.len() >= self.size.get() as usize);
|
||||
// SAFETY: With the assert above, all of `copy_nonoverlapping`'s
|
||||
// requirements are satisfied.
|
||||
unsafe {
|
||||
core::ptr::copy_nonoverlapping(
|
||||
data.as_ptr(),
|
||||
self.ptr.as_ptr(),
|
||||
self.size.get() as usize,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/// SAFETY: The offsets and size must be in-bounds.
|
||||
pub(crate) unsafe fn write_with_offset(
|
||||
&mut self,
|
||||
data: &[u8],
|
||||
src_offset: isize,
|
||||
dst_offset: isize,
|
||||
size: usize,
|
||||
) {
|
||||
unsafe {
|
||||
core::ptr::copy_nonoverlapping(
|
||||
data.as_ptr().offset(src_offset),
|
||||
self.ptr.as_ptr().offset(dst_offset),
|
||||
size,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn flush(self) -> FlushedStagingBuffer<A> {
|
||||
|
@ -714,9 +714,13 @@ pub trait Device: WasmNotSendSync {
|
||||
/// be ordered, so it is meaningful to talk about what must occur
|
||||
/// "between" them.
|
||||
///
|
||||
/// - Zero-sized mappings are not allowed.
|
||||
///
|
||||
/// - The returned [`BufferMapping::ptr`] must not be used after a call to
|
||||
/// [`Device::unmap_buffer`].
|
||||
///
|
||||
/// [`MAP_READ`]: BufferUses::MAP_READ
|
||||
/// [`MAP_WRITE`]: BufferUses::MAP_WRITE
|
||||
//TODO: clarify if zero-sized mapping is allowed
|
||||
unsafe fn map_buffer(
|
||||
&self,
|
||||
buffer: &<Self::A as Api>::Buffer,
|
||||
|
Loading…
Reference in New Issue
Block a user