mirror of
https://github.com/vulkano-rs/vulkano.git
synced 2024-11-22 14:56:42 +00:00
Fix subbuffer alignment to the non-coherent atom size limitation (#2148)
* Fix subbuffer alignment to the non-coherent atom size limitation * Fix incorrect range in the buffer guards * Document the new behavior * Simplify range aligning
This commit is contained in:
parent
52748c64bb
commit
2fff56624d
@ -874,15 +874,6 @@ pub enum BufferError {
|
|||||||
queue_family_index: u32,
|
queue_family_index: u32,
|
||||||
queue_family_count: u32,
|
queue_family_count: u32,
|
||||||
},
|
},
|
||||||
|
|
||||||
/// The memory is not host-coherent, and the [`Subbuffer`] bounds are not a multiple of the
|
|
||||||
/// [`non_coherent_atom_size`] device property.
|
|
||||||
///
|
|
||||||
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
|
||||||
SubbufferNotAlignedToAtomSize {
|
|
||||||
range: Range<DeviceSize>,
|
|
||||||
atom_size: DeviceAlignment,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Error for BufferError {
|
impl Error for BufferError {
|
||||||
@ -1006,12 +997,6 @@ impl Display for BufferError {
|
|||||||
"the sharing mode was set to `Concurrent`, but one of the specified queue family \
|
"the sharing mode was set to `Concurrent`, but one of the specified queue family \
|
||||||
indices was out of range",
|
indices was out of range",
|
||||||
),
|
),
|
||||||
Self::SubbufferNotAlignedToAtomSize { range, atom_size } => write!(
|
|
||||||
f,
|
|
||||||
"the memory is not host-coherent, and the `Subbuffer` bounds ({:?}) are not \
|
|
||||||
a multiple of the `non_coherent_atom_size` device property ({:?})",
|
|
||||||
range, atom_size,
|
|
||||||
),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -12,7 +12,7 @@ use crate::{
|
|||||||
device::{Device, DeviceOwned},
|
device::{Device, DeviceOwned},
|
||||||
memory::{
|
memory::{
|
||||||
self,
|
self,
|
||||||
allocator::{align_up, DeviceAlignment, DeviceLayout},
|
allocator::{align_down, align_up, DeviceAlignment, DeviceLayout},
|
||||||
is_aligned,
|
is_aligned,
|
||||||
},
|
},
|
||||||
DeviceSize, NonZeroDeviceSize,
|
DeviceSize, NonZeroDeviceSize,
|
||||||
@ -20,6 +20,7 @@ use crate::{
|
|||||||
use bytemuck::PodCastError;
|
use bytemuck::PodCastError;
|
||||||
use std::{
|
use std::{
|
||||||
alloc::Layout,
|
alloc::Layout,
|
||||||
|
cmp,
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{Display, Error as FmtError, Formatter},
|
fmt::{Display, Error as FmtError, Formatter},
|
||||||
hash::{Hash, Hasher},
|
hash::{Hash, Hasher},
|
||||||
@ -111,13 +112,6 @@ impl<T: ?Sized> Subbuffer<T> {
|
|||||||
self.offset..self.offset + self.size
|
self.offset..self.offset + self.size
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the range the subbuffer occupies, in bytes, relative to the [`DeviceMemory`] block.
|
|
||||||
fn memory_range(&self) -> Range<DeviceSize> {
|
|
||||||
let memory_offset = self.memory_offset();
|
|
||||||
|
|
||||||
memory_offset..memory_offset + self.size
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the buffer that this subbuffer is a part of.
|
/// Returns the buffer that this subbuffer is a part of.
|
||||||
pub fn buffer(&self) -> &Arc<Buffer> {
|
pub fn buffer(&self) -> &Arc<Buffer> {
|
||||||
match &self.parent {
|
match &self.parent {
|
||||||
@ -193,25 +187,47 @@ where
|
|||||||
/// that uses it in exclusive mode will fail. You can still submit this subbuffer for
|
/// that uses it in exclusive mode will fail. You can still submit this subbuffer for
|
||||||
/// non-exclusive accesses (ie. reads).
|
/// non-exclusive accesses (ie. reads).
|
||||||
///
|
///
|
||||||
|
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
|
||||||
|
/// range that is potentially larger than the subbuffer, because the range given to
|
||||||
|
/// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
|
||||||
|
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
|
||||||
|
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
|
||||||
|
/// buffer is backed by non-coherent memory, then invalidating one subbuffer would also
|
||||||
|
/// invalidate the other subbuffer. This can lead to data races and is therefore not allowed.
|
||||||
|
/// What you should do in that case is ensure that each subbuffer is aligned to the
|
||||||
|
/// non-coherent atom size, so in this case one would be at offset 0 and the other at offset
|
||||||
|
/// 64. [`SubbufferAllocator`] does this automatically.
|
||||||
|
///
|
||||||
|
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
|
||||||
|
/// [`invalidate_range`]: crate::memory::allocator::MemoryAlloc::invalidate_range
|
||||||
|
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
||||||
/// [`write`]: Self::write
|
/// [`write`]: Self::write
|
||||||
|
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
|
||||||
pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> {
|
pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> {
|
||||||
let allocation = match self.buffer().memory() {
|
let allocation = match self.buffer().memory() {
|
||||||
BufferMemory::Normal(a) => a,
|
BufferMemory::Normal(a) => a,
|
||||||
BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"),
|
BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(atom_size) = allocation.atom_size() {
|
|
||||||
let range = self.memory_range();
|
|
||||||
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
|
|
||||||
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let range = self.range();
|
let range = self.range();
|
||||||
|
|
||||||
|
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
|
||||||
|
// This works because the suballocators align allocations to the non-coherent atom size
|
||||||
|
// when the memory is host-visible but not host-coherent.
|
||||||
|
let start = align_down(self.offset, atom_size);
|
||||||
|
let end = cmp::min(
|
||||||
|
align_up(self.offset + self.size, atom_size),
|
||||||
|
allocation.size(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Range { start, end }
|
||||||
|
} else {
|
||||||
|
range.clone()
|
||||||
|
};
|
||||||
|
|
||||||
let mut state = self.buffer().state();
|
let mut state = self.buffer().state();
|
||||||
state.check_cpu_read(range.clone())?;
|
state.check_cpu_read(aligned_range.clone())?;
|
||||||
unsafe { state.cpu_read_lock(range.clone()) };
|
unsafe { state.cpu_read_lock(aligned_range.clone()) };
|
||||||
|
|
||||||
if allocation.atom_size().is_some() {
|
if allocation.atom_size().is_some() {
|
||||||
// If there are other read locks being held at this point, they also called
|
// If there are other read locks being held at this point, they also called
|
||||||
@ -219,7 +235,7 @@ where
|
|||||||
// lock, so there will be no new data and this call will do nothing.
|
// lock, so there will be no new data and this call will do nothing.
|
||||||
// TODO: probably still more efficient to call it only if we're the first to acquire a
|
// TODO: probably still more efficient to call it only if we're the first to acquire a
|
||||||
// read lock, but the number of CPU locks isn't currently tracked anywhere.
|
// read lock, but the number of CPU locks isn't currently tracked anywhere.
|
||||||
unsafe { allocation.invalidate_range(range.clone()) }?;
|
unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
||||||
@ -228,6 +244,7 @@ where
|
|||||||
Ok(BufferReadGuard {
|
Ok(BufferReadGuard {
|
||||||
subbuffer: self,
|
subbuffer: self,
|
||||||
data,
|
data,
|
||||||
|
range: aligned_range,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -240,28 +257,50 @@ where
|
|||||||
/// After this function successfully locks the buffer, any attempt to submit a command buffer
|
/// After this function successfully locks the buffer, any attempt to submit a command buffer
|
||||||
/// that uses it and any attempt to call `read` will return an error.
|
/// that uses it and any attempt to call `read` will return an error.
|
||||||
///
|
///
|
||||||
|
/// If the memory backing the buffer is not [host-coherent], then this function will lock a
|
||||||
|
/// range that is potentially larger than the subbuffer, because the range given to
|
||||||
|
/// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
|
||||||
|
/// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
|
||||||
|
/// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
|
||||||
|
/// buffer is backed by non-coherent memory, then flushing one subbuffer would also flush the
|
||||||
|
/// other subbuffer. This can lead to data races and is therefore not allowed. What you should
|
||||||
|
/// do in that case is ensure that each subbuffer is aligned to the non-coherent atom size, so
|
||||||
|
/// in this case one would be at offset 0 and the other at offset 64. [`SubbufferAllocator`]
|
||||||
|
/// does this automatically.
|
||||||
|
///
|
||||||
|
/// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
|
||||||
|
/// [`flush_range`]: crate::memory::allocator::MemoryAlloc::flush_range
|
||||||
|
/// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
|
||||||
/// [`read`]: Self::read
|
/// [`read`]: Self::read
|
||||||
|
/// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
|
||||||
pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> {
|
pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> {
|
||||||
let allocation = match self.buffer().memory() {
|
let allocation = match self.buffer().memory() {
|
||||||
BufferMemory::Normal(a) => a,
|
BufferMemory::Normal(a) => a,
|
||||||
BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"),
|
BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"),
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(atom_size) = allocation.atom_size() {
|
|
||||||
let range = self.memory_range();
|
|
||||||
if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
|
|
||||||
return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let range = self.range();
|
let range = self.range();
|
||||||
|
|
||||||
|
let aligned_range = if let Some(atom_size) = allocation.atom_size() {
|
||||||
|
// This works because the suballocators align allocations to the non-coherent atom size
|
||||||
|
// when the memory is host-visible but not host-coherent.
|
||||||
|
let start = align_down(self.offset, atom_size);
|
||||||
|
let end = cmp::min(
|
||||||
|
align_up(self.offset + self.size, atom_size),
|
||||||
|
allocation.size(),
|
||||||
|
);
|
||||||
|
|
||||||
|
Range { start, end }
|
||||||
|
} else {
|
||||||
|
range.clone()
|
||||||
|
};
|
||||||
|
|
||||||
let mut state = self.buffer().state();
|
let mut state = self.buffer().state();
|
||||||
state.check_cpu_write(range.clone())?;
|
state.check_cpu_write(aligned_range.clone())?;
|
||||||
unsafe { state.cpu_write_lock(range.clone()) };
|
unsafe { state.cpu_write_lock(aligned_range.clone()) };
|
||||||
|
|
||||||
if allocation.atom_size().is_some() {
|
if allocation.atom_size().is_some() {
|
||||||
unsafe { allocation.invalidate_range(range.clone()) }?;
|
unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
|
||||||
@ -270,6 +309,7 @@ where
|
|||||||
Ok(BufferWriteGuard {
|
Ok(BufferWriteGuard {
|
||||||
subbuffer: self,
|
subbuffer: self,
|
||||||
data,
|
data,
|
||||||
|
range: aligned_range,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -523,13 +563,13 @@ impl<T: ?Sized> Hash for Subbuffer<T> {
|
|||||||
pub struct BufferReadGuard<'a, T: ?Sized> {
|
pub struct BufferReadGuard<'a, T: ?Sized> {
|
||||||
subbuffer: &'a Subbuffer<T>,
|
subbuffer: &'a Subbuffer<T>,
|
||||||
data: &'a T,
|
data: &'a T,
|
||||||
|
range: Range<DeviceSize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: ?Sized> Drop for BufferReadGuard<'_, T> {
|
impl<T: ?Sized> Drop for BufferReadGuard<'_, T> {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let range = self.subbuffer.range();
|
|
||||||
let mut state = self.subbuffer.buffer().state();
|
let mut state = self.subbuffer.buffer().state();
|
||||||
unsafe { state.cpu_read_unlock(range) };
|
unsafe { state.cpu_read_unlock(self.range.clone()) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -550,22 +590,22 @@ impl<T: ?Sized> Deref for BufferReadGuard<'_, T> {
|
|||||||
pub struct BufferWriteGuard<'a, T: ?Sized> {
|
pub struct BufferWriteGuard<'a, T: ?Sized> {
|
||||||
subbuffer: &'a Subbuffer<T>,
|
subbuffer: &'a Subbuffer<T>,
|
||||||
data: &'a mut T,
|
data: &'a mut T,
|
||||||
|
range: Range<DeviceSize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> {
|
impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> {
|
||||||
fn drop(&mut self) {
|
fn drop(&mut self) {
|
||||||
let range = self.subbuffer.range();
|
|
||||||
let allocation = match self.subbuffer.buffer().memory() {
|
let allocation = match self.subbuffer.buffer().memory() {
|
||||||
BufferMemory::Normal(a) => a,
|
BufferMemory::Normal(a) => a,
|
||||||
BufferMemory::Sparse => unreachable!(),
|
BufferMemory::Sparse => unreachable!(),
|
||||||
};
|
};
|
||||||
|
|
||||||
if allocation.atom_size().is_some() {
|
if allocation.atom_size().is_some() {
|
||||||
unsafe { allocation.flush_range(range.clone()).unwrap() };
|
unsafe { allocation.flush_range(self.range.clone()).unwrap() };
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut state = self.subbuffer.buffer().state();
|
let mut state = self.subbuffer.buffer().state();
|
||||||
unsafe { state.cpu_write_unlock(range) };
|
unsafe { state.cpu_write_unlock(self.range.clone()) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user