Fix subbuffer alignment to the non-coherent atom size limitation (#2148)

* Fix subbuffer alignment to the non-coherent atom size limitation

* Fix incorrect range in the buffer guards

* Document the new behavior

* Simplify range aligning
This commit is contained in:
marc0246 2023-03-03 10:17:33 +01:00 committed by GitHub
parent 52748c64bb
commit 2fff56624d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 73 additions and 48 deletions

View File

@ -874,15 +874,6 @@ pub enum BufferError {
queue_family_index: u32, queue_family_index: u32,
queue_family_count: u32, queue_family_count: u32,
}, },
/// The memory is not host-coherent, and the [`Subbuffer`] bounds are not a multiple of the
/// [`non_coherent_atom_size`] device property.
///
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
SubbufferNotAlignedToAtomSize {
range: Range<DeviceSize>,
atom_size: DeviceAlignment,
},
} }
impl Error for BufferError { impl Error for BufferError {
@ -1006,12 +997,6 @@ impl Display for BufferError {
"the sharing mode was set to `Concurrent`, but one of the specified queue family \ "the sharing mode was set to `Concurrent`, but one of the specified queue family \
indices was out of range", indices was out of range",
), ),
Self::SubbufferNotAlignedToAtomSize { range, atom_size } => write!(
f,
"the memory is not host-coherent, and the `Subbuffer` bounds ({:?}) are not \
a multiple of the `non_coherent_atom_size` device property ({:?})",
range, atom_size,
),
} }
} }
} }

View File

@ -12,7 +12,7 @@ use crate::{
device::{Device, DeviceOwned}, device::{Device, DeviceOwned},
memory::{ memory::{
self, self,
allocator::{align_up, DeviceAlignment, DeviceLayout}, allocator::{align_down, align_up, DeviceAlignment, DeviceLayout},
is_aligned, is_aligned,
}, },
DeviceSize, NonZeroDeviceSize, DeviceSize, NonZeroDeviceSize,
@ -20,6 +20,7 @@ use crate::{
use bytemuck::PodCastError; use bytemuck::PodCastError;
use std::{ use std::{
alloc::Layout, alloc::Layout,
cmp,
error::Error, error::Error,
fmt::{Display, Error as FmtError, Formatter}, fmt::{Display, Error as FmtError, Formatter},
hash::{Hash, Hasher}, hash::{Hash, Hasher},
@ -111,13 +112,6 @@ impl<T: ?Sized> Subbuffer<T> {
self.offset..self.offset + self.size self.offset..self.offset + self.size
} }
/// Returns the range the subbuffer occupies, in bytes, relative to the [`DeviceMemory`] block.
fn memory_range(&self) -> Range<DeviceSize> {
let memory_offset = self.memory_offset();
memory_offset..memory_offset + self.size
}
/// Returns the buffer that this subbuffer is a part of. /// Returns the buffer that this subbuffer is a part of.
pub fn buffer(&self) -> &Arc<Buffer> { pub fn buffer(&self) -> &Arc<Buffer> {
match &self.parent { match &self.parent {
@ -193,25 +187,47 @@ where
/// that uses it in exclusive mode will fail. You can still submit this subbuffer for /// that uses it in exclusive mode will fail. You can still submit this subbuffer for
/// non-exclusive accesses (ie. reads). /// non-exclusive accesses (ie. reads).
/// ///
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
/// range that is potentially larger than the subbuffer, because the range given to
/// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
/// buffer is backed by non-coherent memory, then invalidating one subbuffer would also
/// invalidate the other subbuffer. This can lead to data races and is therefore not allowed.
/// What you should do in that case is ensure that each subbuffer is aligned to the
/// non-coherent atom size, so in this case one would be at offset 0 and the other at offset
/// 64. [`SubbufferAllocator`] does this automatically.
///
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
/// [`invalidate_range`]: crate::memory::allocator::MemoryAlloc::invalidate_range
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
/// [`write`]: Self::write /// [`write`]: Self::write
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> { pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> {
let allocation = match self.buffer().memory() { let allocation = match self.buffer().memory() {
BufferMemory::Normal(a) => a, BufferMemory::Normal(a) => a,
BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"), BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"),
}; };
if let Some(atom_size) = allocation.atom_size() {
let range = self.memory_range();
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
}
}
let range = self.range(); let range = self.range();
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
// This works because the suballocators align allocations to the non-coherent atom size
// when the memory is host-visible but not host-coherent.
let start = align_down(self.offset, atom_size);
let end = cmp::min(
align_up(self.offset + self.size, atom_size),
allocation.size(),
);
Range { start, end }
} else {
range.clone()
};
let mut state = self.buffer().state(); let mut state = self.buffer().state();
state.check_cpu_read(range.clone())?; state.check_cpu_read(aligned_range.clone())?;
unsafe { state.cpu_read_lock(range.clone()) }; unsafe { state.cpu_read_lock(aligned_range.clone()) };
if allocation.atom_size().is_some() { if allocation.atom_size().is_some() {
// If there are other read locks being held at this point, they also called // If there are other read locks being held at this point, they also called
@ -219,7 +235,7 @@ where
// lock, so there will be no new data and this call will do nothing. // lock, so there will be no new data and this call will do nothing.
// TODO: probably still more efficient to call it only if we're the first to acquire a // TODO: probably still more efficient to call it only if we're the first to acquire a
// read lock, but the number of CPU locks isn't currently tracked anywhere. // read lock, but the number of CPU locks isn't currently tracked anywhere.
unsafe { allocation.invalidate_range(range.clone()) }?; unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
} }
let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?; let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
@ -228,6 +244,7 @@ where
Ok(BufferReadGuard { Ok(BufferReadGuard {
subbuffer: self, subbuffer: self,
data, data,
range: aligned_range,
}) })
} }
@ -240,28 +257,50 @@ where
/// After this function successfully locks the buffer, any attempt to submit a command buffer /// After this function successfully locks the buffer, any attempt to submit a command buffer
/// that uses it and any attempt to call `read` will return an error. /// that uses it and any attempt to call `read` will return an error.
/// ///
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
/// range that is potentially larger than the subbuffer, because the range given to
/// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
/// buffer is backed by non-coherent memory, then flushing one subbuffer would also flush the
/// other subbuffer. This can lead to data races and is therefore not allowed. What you should
/// do in that case is ensure that each subbuffer is aligned to the non-coherent atom size, so
/// in this case one would be at offset 0 and the other at offset 64. [`SubbufferAllocator`]
/// does this automatically.
///
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
/// [`flush_range`]: crate::memory::allocator::MemoryAlloc::flush_range
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
/// [`read`]: Self::read /// [`read`]: Self::read
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> { pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> {
let allocation = match self.buffer().memory() { let allocation = match self.buffer().memory() {
BufferMemory::Normal(a) => a, BufferMemory::Normal(a) => a,
BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"), BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"),
}; };
if let Some(atom_size) = allocation.atom_size() {
let range = self.memory_range();
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
}
}
let range = self.range(); let range = self.range();
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
// This works because the suballocators align allocations to the non-coherent atom size
// when the memory is host-visible but not host-coherent.
let start = align_down(self.offset, atom_size);
let end = cmp::min(
align_up(self.offset + self.size, atom_size),
allocation.size(),
);
Range { start, end }
} else {
range.clone()
};
let mut state = self.buffer().state(); let mut state = self.buffer().state();
state.check_cpu_write(range.clone())?; state.check_cpu_write(aligned_range.clone())?;
unsafe { state.cpu_write_lock(range.clone()) }; unsafe { state.cpu_write_lock(aligned_range.clone()) };
if allocation.atom_size().is_some() { if allocation.atom_size().is_some() {
unsafe { allocation.invalidate_range(range.clone()) }?; unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
} }
let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?; let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
@ -270,6 +309,7 @@ where
Ok(BufferWriteGuard { Ok(BufferWriteGuard {
subbuffer: self, subbuffer: self,
data, data,
range: aligned_range,
}) })
} }
} }
@ -523,13 +563,13 @@ impl<T: ?Sized> Hash for Subbuffer<T> {
pub struct BufferReadGuard<'a, T: ?Sized> { pub struct BufferReadGuard<'a, T: ?Sized> {
subbuffer: &'a Subbuffer<T>, subbuffer: &'a Subbuffer<T>,
data: &'a T, data: &'a T,
range: Range<DeviceSize>,
} }
impl<T: ?Sized> Drop for BufferReadGuard<'_, T> { impl<T: ?Sized> Drop for BufferReadGuard<'_, T> {
fn drop(&mut self) { fn drop(&mut self) {
let range = self.subbuffer.range();
let mut state = self.subbuffer.buffer().state(); let mut state = self.subbuffer.buffer().state();
unsafe { state.cpu_read_unlock(range) }; unsafe { state.cpu_read_unlock(self.range.clone()) };
} }
} }
@ -550,22 +590,22 @@ impl<T: ?Sized> Deref for BufferReadGuard<'_, T> {
pub struct BufferWriteGuard<'a, T: ?Sized> { pub struct BufferWriteGuard<'a, T: ?Sized> {
subbuffer: &'a Subbuffer<T>, subbuffer: &'a Subbuffer<T>,
data: &'a mut T, data: &'a mut T,
range: Range<DeviceSize>,
} }
impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> { impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> {
fn drop(&mut self) { fn drop(&mut self) {
let range = self.subbuffer.range();
let allocation = match self.subbuffer.buffer().memory() { let allocation = match self.subbuffer.buffer().memory() {
BufferMemory::Normal(a) => a, BufferMemory::Normal(a) => a,
BufferMemory::Sparse => unreachable!(), BufferMemory::Sparse => unreachable!(),
}; };
if allocation.atom_size().is_some() { if allocation.atom_size().is_some() {
unsafe { allocation.flush_range(range.clone()).unwrap() }; unsafe { allocation.flush_range(self.range.clone()).unwrap() };
} }
let mut state = self.subbuffer.buffer().state(); let mut state = self.subbuffer.buffer().state();
unsafe { state.cpu_write_unlock(range) }; unsafe { state.cpu_write_unlock(self.range.clone()) };
} }
} }