diff --git a/vulkano/src/buffer/mod.rs b/vulkano/src/buffer/mod.rs index f017aecc..54d3a0e5 100644 --- a/vulkano/src/buffer/mod.rs +++ b/vulkano/src/buffer/mod.rs @@ -874,15 +874,6 @@ pub enum BufferError { queue_family_index: u32, queue_family_count: u32, }, - - /// The memory is not host-coherent, and the [`Subbuffer`] bounds are not a multiple of the - /// [`non_coherent_atom_size`] device property. - /// - /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size - SubbufferNotAlignedToAtomSize { - range: Range, - atom_size: DeviceAlignment, - }, } impl Error for BufferError { @@ -1006,12 +997,6 @@ impl Display for BufferError { "the sharing mode was set to `Concurrent`, but one of the specified queue family \ indices was out of range", ), - Self::SubbufferNotAlignedToAtomSize { range, atom_size } => write!( - f, - "the memory is not host-coherent, and the `Subbuffer` bounds ({:?}) are not \ - a multiple of the `non_coherent_atom_size` device property ({:?})", - range, atom_size, - ), } } } diff --git a/vulkano/src/buffer/subbuffer.rs b/vulkano/src/buffer/subbuffer.rs index 9ea642dc..4996076b 100644 --- a/vulkano/src/buffer/subbuffer.rs +++ b/vulkano/src/buffer/subbuffer.rs @@ -12,7 +12,7 @@ use crate::{ device::{Device, DeviceOwned}, memory::{ self, - allocator::{align_up, DeviceAlignment, DeviceLayout}, + allocator::{align_down, align_up, DeviceAlignment, DeviceLayout}, is_aligned, }, DeviceSize, NonZeroDeviceSize, @@ -20,6 +20,7 @@ use crate::{ use bytemuck::PodCastError; use std::{ alloc::Layout, + cmp, error::Error, fmt::{Display, Error as FmtError, Formatter}, hash::{Hash, Hasher}, @@ -111,13 +112,6 @@ impl Subbuffer { self.offset..self.offset + self.size } - /// Returns the range the subbuffer occupies, in bytes, relative to the [`DeviceMemory`] block. - fn memory_range(&self) -> Range { - let memory_offset = self.memory_offset(); - - memory_offset..memory_offset + self.size - } - /// Returns the buffer that this subbuffer is a part of. pub fn buffer(&self) -> &Arc { match &self.parent { @@ -193,25 +187,47 @@ where /// that uses it in exclusive mode will fail. You can still submit this subbuffer for /// non-exclusive accesses (ie. reads). /// + /// If the memory backing the buffer is not [host-coherent], then this function will lock a + /// range that is potentially larger than the subbuffer, because the range given to + /// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for + /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2 + /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the + /// buffer is backed by non-coherent memory, then invalidating one subbuffer would also + /// invalidate the other subbuffer. This can lead to data races and is therefore not allowed. + /// What you should do in that case is ensure that each subbuffer is aligned to the + /// non-coherent atom size, so in this case one would be at offset 0 and the other at offset + /// 64. [`SubbufferAllocator`] does this automatically. + /// + /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT + /// [`invalidate_range`]: crate::memory::allocator::MemoryAlloc::invalidate_range + /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size /// [`write`]: Self::write + /// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator pub fn read(&self) -> Result, BufferError> { let allocation = match self.buffer().memory() { BufferMemory::Normal(a) => a, BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"), }; - if let Some(atom_size) = allocation.atom_size() { - let range = self.memory_range(); - if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) { - return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size }); - } - } - let range = self.range(); + let aligned_range = if let Some(atom_size) = allocation.atom_size() { + // This works because the suballocators align allocations to the non-coherent atom size + // when the memory is host-visible but not host-coherent. + let start = align_down(self.offset, atom_size); + let end = cmp::min( + align_up(self.offset + self.size, atom_size), + allocation.size(), + ); + + Range { start, end } + } else { + range.clone() + }; + let mut state = self.buffer().state(); - state.check_cpu_read(range.clone())?; - unsafe { state.cpu_read_lock(range.clone()) }; + state.check_cpu_read(aligned_range.clone())?; + unsafe { state.cpu_read_lock(aligned_range.clone()) }; if allocation.atom_size().is_some() { // If there are other read locks being held at this point, they also called @@ -219,7 +235,7 @@ where // lock, so there will be no new data and this call will do nothing. // TODO: probably still more efficient to call it only if we're the first to acquire a // read lock, but the number of CPU locks isn't currently tracked anywhere. - unsafe { allocation.invalidate_range(range.clone()) }?; + unsafe { allocation.invalidate_range(aligned_range.clone()) }?; } let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?; @@ -228,6 +244,7 @@ where Ok(BufferReadGuard { subbuffer: self, data, + range: aligned_range, }) } @@ -240,28 +257,50 @@ where /// After this function successfully locks the buffer, any attempt to submit a command buffer /// that uses it and any attempt to call `read` will return an error. /// + /// If the memory backing the buffer is not [host-coherent], then this function will lock a + /// range that is potentially larger than the subbuffer, because the range given to + /// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for + /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2 + /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the + /// buffer is backed by non-coherent memory, then flushing one subbuffer would also flush the + /// other subbuffer. This can lead to data races and is therefore not allowed. What you should + /// do in that case is ensure that each subbuffer is aligned to the non-coherent atom size, so + /// in this case one would be at offset 0 and the other at offset 64. [`SubbufferAllocator`] + /// does this automatically. + /// + /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT + /// [`flush_range`]: crate::memory::allocator::MemoryAlloc::flush_range + /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size /// [`read`]: Self::read + /// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator pub fn write(&self) -> Result, BufferError> { let allocation = match self.buffer().memory() { BufferMemory::Normal(a) => a, BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"), }; - if let Some(atom_size) = allocation.atom_size() { - let range = self.memory_range(); - if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) { - return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size }); - } - } - let range = self.range(); + let aligned_range = if let Some(atom_size) = allocation.atom_size() { + // This works because the suballocators align allocations to the non-coherent atom size + // when the memory is host-visible but not host-coherent. + let start = align_down(self.offset, atom_size); + let end = cmp::min( + align_up(self.offset + self.size, atom_size), + allocation.size(), + ); + + Range { start, end } + } else { + range.clone() + }; + let mut state = self.buffer().state(); - state.check_cpu_write(range.clone())?; - unsafe { state.cpu_write_lock(range.clone()) }; + state.check_cpu_write(aligned_range.clone())?; + unsafe { state.cpu_write_lock(aligned_range.clone()) }; if allocation.atom_size().is_some() { - unsafe { allocation.invalidate_range(range.clone()) }?; + unsafe { allocation.invalidate_range(aligned_range.clone()) }?; } let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?; @@ -270,6 +309,7 @@ where Ok(BufferWriteGuard { subbuffer: self, data, + range: aligned_range, }) } } @@ -523,13 +563,13 @@ impl Hash for Subbuffer { pub struct BufferReadGuard<'a, T: ?Sized> { subbuffer: &'a Subbuffer, data: &'a T, + range: Range, } impl Drop for BufferReadGuard<'_, T> { fn drop(&mut self) { - let range = self.subbuffer.range(); let mut state = self.subbuffer.buffer().state(); - unsafe { state.cpu_read_unlock(range) }; + unsafe { state.cpu_read_unlock(self.range.clone()) }; } } @@ -550,22 +590,22 @@ impl Deref for BufferReadGuard<'_, T> { pub struct BufferWriteGuard<'a, T: ?Sized> { subbuffer: &'a Subbuffer, data: &'a mut T, + range: Range, } impl Drop for BufferWriteGuard<'_, T> { fn drop(&mut self) { - let range = self.subbuffer.range(); let allocation = match self.subbuffer.buffer().memory() { BufferMemory::Normal(a) => a, BufferMemory::Sparse => unreachable!(), }; if allocation.atom_size().is_some() { - unsafe { allocation.flush_range(range.clone()).unwrap() }; + unsafe { allocation.flush_range(self.range.clone()).unwrap() }; } let mut state = self.subbuffer.buffer().state(); - unsafe { state.cpu_write_unlock(range) }; + unsafe { state.cpu_write_unlock(self.range.clone()) }; } }