mirror of
https://github.com/vulkano-rs/vulkano.git
synced 2024-11-22 06:45:23 +00:00
Fix subbuffer alignment to the non-coherent atom size limitation (#2148)
* Fix subbuffer alignment to the non-coherent atom size limitation * Fix incorrect range in the buffer guards * Document the new behavior * Simplify range aligning
This commit is contained in:
parent
52748c64bb
commit
2fff56624d
@ -874,15 +874,6 @@ pub enum BufferError {
|
||||
queue_family_index: u32,
|
||||
queue_family_count: u32,
|
||||
},
|
||||
|
||||
/// The memory is not host-coherent, and the [`Subbuffer`] bounds are not a multiple of the
|
||||
/// [`non_coherent_atom_size`] device property.
|
||||
///
|
||||
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
||||
SubbufferNotAlignedToAtomSize {
|
||||
range: Range<DeviceSize>,
|
||||
atom_size: DeviceAlignment,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error for BufferError {
|
||||
@ -1006,12 +997,6 @@ impl Display for BufferError {
|
||||
"the sharing mode was set to `Concurrent`, but one of the specified queue family \
|
||||
indices was out of range",
|
||||
),
|
||||
Self::SubbufferNotAlignedToAtomSize { range, atom_size } => write!(
|
||||
f,
|
||||
"the memory is not host-coherent, and the `Subbuffer` bounds ({:?}) are not \
|
||||
a multiple of the `non_coherent_atom_size` device property ({:?})",
|
||||
range, atom_size,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -12,7 +12,7 @@ use crate::{
|
||||
device::{Device, DeviceOwned},
|
||||
memory::{
|
||||
self,
|
||||
allocator::{align_up, DeviceAlignment, DeviceLayout},
|
||||
allocator::{align_down, align_up, DeviceAlignment, DeviceLayout},
|
||||
is_aligned,
|
||||
},
|
||||
DeviceSize, NonZeroDeviceSize,
|
||||
@ -20,6 +20,7 @@ use crate::{
|
||||
use bytemuck::PodCastError;
|
||||
use std::{
|
||||
alloc::Layout,
|
||||
cmp,
|
||||
error::Error,
|
||||
fmt::{Display, Error as FmtError, Formatter},
|
||||
hash::{Hash, Hasher},
|
||||
@ -111,13 +112,6 @@ impl<T: ?Sized> Subbuffer<T> {
|
||||
self.offset..self.offset + self.size
|
||||
}
|
||||
|
||||
/// Returns the range the subbuffer occupies, in bytes, relative to the [`DeviceMemory`] block.
|
||||
fn memory_range(&self) -> Range<DeviceSize> {
|
||||
let memory_offset = self.memory_offset();
|
||||
|
||||
memory_offset..memory_offset + self.size
|
||||
}
|
||||
|
||||
/// Returns the buffer that this subbuffer is a part of.
|
||||
pub fn buffer(&self) -> &Arc<Buffer> {
|
||||
match &self.parent {
|
||||
@ -193,25 +187,47 @@ where
|
||||
/// that uses it in exclusive mode will fail. You can still submit this subbuffer for
|
||||
/// non-exclusive accesses (ie. reads).
|
||||
///
|
||||
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
|
||||
/// range that is potentially larger than the subbuffer, because the range given to
|
||||
/// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
|
||||
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
|
||||
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
|
||||
/// buffer is backed by non-coherent memory, then invalidating one subbuffer would also
|
||||
/// invalidate the other subbuffer. This can lead to data races and is therefore not allowed.
|
||||
/// What you should do in that case is ensure that each subbuffer is aligned to the
|
||||
/// non-coherent atom size, so in this case one would be at offset 0 and the other at offset
|
||||
/// 64. [`SubbufferAllocator`] does this automatically.
|
||||
///
|
||||
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
|
||||
/// [`invalidate_range`]: crate::memory::allocator::MemoryAlloc::invalidate_range
|
||||
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
||||
/// [`write`]: Self::write
|
||||
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
|
||||
pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> {
|
||||
let allocation = match self.buffer().memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"),
|
||||
};
|
||||
|
||||
if let Some(atom_size) = allocation.atom_size() {
|
||||
let range = self.memory_range();
|
||||
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
|
||||
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
|
||||
}
|
||||
}
|
||||
|
||||
let range = self.range();
|
||||
|
||||
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
|
||||
// This works because the suballocators align allocations to the non-coherent atom size
|
||||
// when the memory is host-visible but not host-coherent.
|
||||
let start = align_down(self.offset, atom_size);
|
||||
let end = cmp::min(
|
||||
align_up(self.offset + self.size, atom_size),
|
||||
allocation.size(),
|
||||
);
|
||||
|
||||
Range { start, end }
|
||||
} else {
|
||||
range.clone()
|
||||
};
|
||||
|
||||
let mut state = self.buffer().state();
|
||||
state.check_cpu_read(range.clone())?;
|
||||
unsafe { state.cpu_read_lock(range.clone()) };
|
||||
state.check_cpu_read(aligned_range.clone())?;
|
||||
unsafe { state.cpu_read_lock(aligned_range.clone()) };
|
||||
|
||||
if allocation.atom_size().is_some() {
|
||||
// If there are other read locks being held at this point, they also called
|
||||
@ -219,7 +235,7 @@ where
|
||||
// lock, so there will be no new data and this call will do nothing.
|
||||
// TODO: probably still more efficient to call it only if we're the first to acquire a
|
||||
// read lock, but the number of CPU locks isn't currently tracked anywhere.
|
||||
unsafe { allocation.invalidate_range(range.clone()) }?;
|
||||
unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
|
||||
}
|
||||
|
||||
let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
||||
@ -228,6 +244,7 @@ where
|
||||
Ok(BufferReadGuard {
|
||||
subbuffer: self,
|
||||
data,
|
||||
range: aligned_range,
|
||||
})
|
||||
}
|
||||
|
||||
@ -240,28 +257,50 @@ where
|
||||
/// After this function successfully locks the buffer, any attempt to submit a command buffer
|
||||
/// that uses it and any attempt to call `read` will return an error.
|
||||
///
|
||||
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
|
||||
/// range that is potentially larger than the subbuffer, because the range given to
|
||||
/// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
|
||||
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
|
||||
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
|
||||
/// buffer is backed by non-coherent memory, then flushing one subbuffer would also flush the
|
||||
/// other subbuffer. This can lead to data races and is therefore not allowed. What you should
|
||||
/// do in that case is ensure that each subbuffer is aligned to the non-coherent atom size, so
|
||||
/// in this case one would be at offset 0 and the other at offset 64. [`SubbufferAllocator`]
|
||||
/// does this automatically.
|
||||
///
|
||||
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
|
||||
/// [`flush_range`]: crate::memory::allocator::MemoryAlloc::flush_range
|
||||
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
||||
/// [`read`]: Self::read
|
||||
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
|
||||
pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> {
|
||||
let allocation = match self.buffer().memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"),
|
||||
};
|
||||
|
||||
if let Some(atom_size) = allocation.atom_size() {
|
||||
let range = self.memory_range();
|
||||
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
|
||||
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
|
||||
}
|
||||
}
|
||||
|
||||
let range = self.range();
|
||||
|
||||
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
|
||||
// This works because the suballocators align allocations to the non-coherent atom size
|
||||
// when the memory is host-visible but not host-coherent.
|
||||
let start = align_down(self.offset, atom_size);
|
||||
let end = cmp::min(
|
||||
align_up(self.offset + self.size, atom_size),
|
||||
allocation.size(),
|
||||
);
|
||||
|
||||
Range { start, end }
|
||||
} else {
|
||||
range.clone()
|
||||
};
|
||||
|
||||
let mut state = self.buffer().state();
|
||||
state.check_cpu_write(range.clone())?;
|
||||
unsafe { state.cpu_write_lock(range.clone()) };
|
||||
state.check_cpu_write(aligned_range.clone())?;
|
||||
unsafe { state.cpu_write_lock(aligned_range.clone()) };
|
||||
|
||||
if allocation.atom_size().is_some() {
|
||||
unsafe { allocation.invalidate_range(range.clone()) }?;
|
||||
unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
|
||||
}
|
||||
|
||||
let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
||||
@ -270,6 +309,7 @@ where
|
||||
Ok(BufferWriteGuard {
|
||||
subbuffer: self,
|
||||
data,
|
||||
range: aligned_range,
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -523,13 +563,13 @@ impl<T: ?Sized> Hash for Subbuffer<T> {
|
||||
pub struct BufferReadGuard<'a, T: ?Sized> {
|
||||
subbuffer: &'a Subbuffer<T>,
|
||||
data: &'a T,
|
||||
range: Range<DeviceSize>,
|
||||
}
|
||||
|
||||
impl<T: ?Sized> Drop for BufferReadGuard<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
let range = self.subbuffer.range();
|
||||
let mut state = self.subbuffer.buffer().state();
|
||||
unsafe { state.cpu_read_unlock(range) };
|
||||
unsafe { state.cpu_read_unlock(self.range.clone()) };
|
||||
}
|
||||
}
|
||||
|
||||
@ -550,22 +590,22 @@ impl<T: ?Sized> Deref for BufferReadGuard<'_, T> {
|
||||
pub struct BufferWriteGuard<'a, T: ?Sized> {
|
||||
subbuffer: &'a Subbuffer<T>,
|
||||
data: &'a mut T,
|
||||
range: Range<DeviceSize>,
|
||||
}
|
||||
|
||||
impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> {
|
||||
fn drop(&mut self) {
|
||||
let range = self.subbuffer.range();
|
||||
let allocation = match self.subbuffer.buffer().memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => unreachable!(),
|
||||
};
|
||||
|
||||
if allocation.atom_size().is_some() {
|
||||
unsafe { allocation.flush_range(range.clone()).unwrap() };
|
||||
unsafe { allocation.flush_range(self.range.clone()).unwrap() };
|
||||
}
|
||||
|
||||
let mut state = self.subbuffer.buffer().state();
|
||||
unsafe { state.cpu_write_unlock(range) };
|
||||
unsafe { state.cpu_write_unlock(self.range.clone()) };
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user