Fix subbuffer alignment to the non-coherent atom size limitation (#2148)

* Fix subbuffer alignment to the non-coherent atom size limitation * Fix incorrect range in the buffer guards * Document the new behavior * Simplify range aligning
2024-11-22 06:45:23 +00:00 · 2023-03-03 10:17:33 +01:00 · 2023-03-03 10:17:33 +01:00 · 2fff56624d
commit 2fff56624d
parent 52748c64bb
2 changed files with 73 additions and 48 deletions
--- a/vulkano/src/buffer/mod.rs
+++ b/vulkano/src/buffer/mod.rs
@ -874,15 +874,6 @@ pub enum BufferError {
        queue_family_index: u32,
        queue_family_count: u32,
    },
-
-    /// The memory is not host-coherent, and the [`Subbuffer`] bounds are not a multiple of the
-    /// [`non_coherent_atom_size`] device property.
-    ///
-    /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
-    SubbufferNotAlignedToAtomSize {
-        range: Range<DeviceSize>,
-        atom_size: DeviceAlignment,
-    },
 }

 impl Error for BufferError {
@ -1006,12 +997,6 @@ impl Display for BufferError {
                "the sharing mode was set to `Concurrent`, but one of the specified queue family \
                indices was out of range",
            ),
-            Self::SubbufferNotAlignedToAtomSize { range, atom_size } => write!(
-                f,
-                "the memory is not host-coherent, and the `Subbuffer` bounds ({:?}) are not \
-                a multiple of the `non_coherent_atom_size` device property ({:?})",
-                range, atom_size,
-            ),
        }
    }
 }
--- a/vulkano/src/buffer/subbuffer.rs
+++ b/vulkano/src/buffer/subbuffer.rs
@ -12,7 +12,7 @@ use crate::{
    device::{Device, DeviceOwned},
    memory::{
        self,
-        allocator::{align_up, DeviceAlignment, DeviceLayout},
+        allocator::{align_down, align_up, DeviceAlignment, DeviceLayout},
        is_aligned,
    },
    DeviceSize, NonZeroDeviceSize,
@ -20,6 +20,7 @@ use crate::{
 use bytemuck::PodCastError;
 use std::{
    alloc::Layout,
+    cmp,
    error::Error,
    fmt::{Display, Error as FmtError, Formatter},
    hash::{Hash, Hasher},
@ -111,13 +112,6 @@ impl<T: ?Sized> Subbuffer<T> {
        self.offset..self.offset + self.size
    }

-    /// Returns the range the subbuffer occupies, in bytes, relative to the [`DeviceMemory`] block.
-    fn memory_range(&self) -> Range<DeviceSize> {
-        let memory_offset = self.memory_offset();
-
-        memory_offset..memory_offset + self.size
-    }
-
    /// Returns the buffer that this subbuffer is a part of.
    pub fn buffer(&self) -> &Arc<Buffer> {
        match &self.parent {
@ -193,25 +187,47 @@ where
    /// that uses it in exclusive mode will fail. You can still submit this subbuffer for
    /// non-exclusive accesses (ie. reads).
    ///
+    /// If the memory backing the buffer is not [host-coherent], then this function will lock a
+    /// range that is potentially larger than the subbuffer, because the range given to
+    /// [`invalidate_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
+    /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
+    /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
+    /// buffer is backed by non-coherent memory, then invalidating one subbuffer would also
+    /// invalidate the other subbuffer. This can lead to data races and is therefore not allowed.
+    /// What you should do in that case is ensure that each subbuffer is aligned to the
+    /// non-coherent atom size, so in this case one would be at offset 0 and the other at offset
+    /// 64. [`SubbufferAllocator`] does this automatically.
+    ///
+    /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
+    /// [`invalidate_range`]: crate::memory::allocator::MemoryAlloc::invalidate_range
+    /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
    /// [`write`]: Self::write
+    /// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
    pub fn read(&self) -> Result<BufferReadGuard<'_, T>, BufferError> {
        let allocation = match self.buffer().memory() {
            BufferMemory::Normal(a) => a,
            BufferMemory::Sparse => todo!("`Subbuffer::read` doesn't support sparse binding yet"),
        };

-        if let Some(atom_size) = allocation.atom_size() {
-            let range = self.memory_range();
-            if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
-                return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
-            }
-        }
-
        let range = self.range();

+        let aligned_range = if let Some(atom_size) = allocation.atom_size() {
+            // This works because the suballocators align allocations to the non-coherent atom size
+            // when the memory is host-visible but not host-coherent.
+            let start = align_down(self.offset, atom_size);
+            let end = cmp::min(
+                align_up(self.offset + self.size, atom_size),
+                allocation.size(),
+            );
+
+            Range { start, end }
+        } else {
+            range.clone()
+        };
+
        let mut state = self.buffer().state();
-        state.check_cpu_read(range.clone())?;
-        unsafe { state.cpu_read_lock(range.clone()) };
+        state.check_cpu_read(aligned_range.clone())?;
+        unsafe { state.cpu_read_lock(aligned_range.clone()) };

        if allocation.atom_size().is_some() {
            // If there are other read locks being held at this point, they also called
@ -219,7 +235,7 @@ where
            // lock, so there will be no new data and this call will do nothing.
            // TODO: probably still more efficient to call it only if we're the first to acquire a
            // read lock, but the number of CPU locks isn't currently tracked anywhere.
-            unsafe { allocation.invalidate_range(range.clone()) }?;
+            unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
        }

        let bytes = unsafe { allocation.read(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
@ -228,6 +244,7 @@ where
        Ok(BufferReadGuard {
            subbuffer: self,
            data,
+            range: aligned_range,
        })
    }

@ -240,28 +257,50 @@ where
    /// After this function successfully locks the buffer, any attempt to submit a command buffer
    /// that uses it and any attempt to call `read` will return an error.
    ///
+    /// If the memory backing the buffer is not [host-coherent], then this function will lock a
+    /// range that is potentially larger than the subbuffer, because the range given to
+    /// [`flush_range`] must be aligned to the [`non_coherent_atom_size`]. This means that for
+    /// example if your Vulkan implementation reports an atom size of 64, and you tried to put 2
+    /// subbuffers of size 32 in the same buffer, one at offset 0 and one at offset 32, while the
+    /// buffer is backed by non-coherent memory, then flushing one subbuffer would also flush the
+    /// other subbuffer. This can lead to data races and is therefore not allowed. What you should
+    /// do in that case is ensure that each subbuffer is aligned to the non-coherent atom size, so
+    /// in this case one would be at offset 0 and the other at offset 64. [`SubbufferAllocator`]
+    /// does this automatically.
+    ///
+    /// [host-coherent]: crate::memory::MemoryPropertyFlags::HOST_COHERENT
+    /// [`flush_range`]: crate::memory::allocator::MemoryAlloc::flush_range
+    /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size
    /// [`read`]: Self::read
+    /// [`SubbufferAllocator`]: super::allocator::SubbufferAllocator
    pub fn write(&self) -> Result<BufferWriteGuard<'_, T>, BufferError> {
        let allocation = match self.buffer().memory() {
            BufferMemory::Normal(a) => a,
            BufferMemory::Sparse => todo!("`Subbuffer::write` doesn't support sparse binding yet"),
        };

-        if let Some(atom_size) = allocation.atom_size() {
-            let range = self.memory_range();
-            if !is_aligned(range.start, atom_size) || !is_aligned(range.end, atom_size) {
-                return Err(BufferError::SubbufferNotAlignedToAtomSize { range, atom_size });
-            }
-        }
-
        let range = self.range();

+        let aligned_range = if let Some(atom_size) = allocation.atom_size() {
+            // This works because the suballocators align allocations to the non-coherent atom size
+            // when the memory is host-visible but not host-coherent.
+            let start = align_down(self.offset, atom_size);
+            let end = cmp::min(
+                align_up(self.offset + self.size, atom_size),
+                allocation.size(),
+            );
+
+            Range { start, end }
+        } else {
+            range.clone()
+        };
+
        let mut state = self.buffer().state();
-        state.check_cpu_write(range.clone())?;
-        unsafe { state.cpu_write_lock(range.clone()) };
+        state.check_cpu_write(aligned_range.clone())?;
+        unsafe { state.cpu_write_lock(aligned_range.clone()) };

        if allocation.atom_size().is_some() {
-            unsafe { allocation.invalidate_range(range.clone()) }?;
+            unsafe { allocation.invalidate_range(aligned_range.clone()) }?;
        }

        let bytes = unsafe { allocation.write(range) }.ok_or(BufferError::MemoryNotHostVisible)?;
@ -270,6 +309,7 @@ where
        Ok(BufferWriteGuard {
            subbuffer: self,
            data,
+            range: aligned_range,
        })
    }
 }
@ -523,13 +563,13 @@ impl<T: ?Sized> Hash for Subbuffer<T> {
 pub struct BufferReadGuard<'a, T: ?Sized> {
    subbuffer: &'a Subbuffer<T>,
    data: &'a T,
+    range: Range<DeviceSize>,
 }

 impl<T: ?Sized> Drop for BufferReadGuard<'_, T> {
    fn drop(&mut self) {
-        let range = self.subbuffer.range();
        let mut state = self.subbuffer.buffer().state();
-        unsafe { state.cpu_read_unlock(range) };
+        unsafe { state.cpu_read_unlock(self.range.clone()) };
    }
 }

@ -550,22 +590,22 @@ impl<T: ?Sized> Deref for BufferReadGuard<'_, T> {
 pub struct BufferWriteGuard<'a, T: ?Sized> {
    subbuffer: &'a Subbuffer<T>,
    data: &'a mut T,
+    range: Range<DeviceSize>,
 }

 impl<T: ?Sized> Drop for BufferWriteGuard<'_, T> {
    fn drop(&mut self) {
-        let range = self.subbuffer.range();
        let allocation = match self.subbuffer.buffer().memory() {
            BufferMemory::Normal(a) => a,
            BufferMemory::Sparse => unreachable!(),
        };

        if allocation.atom_size().is_some() {
-            unsafe { allocation.flush_range(range.clone()).unwrap() };
+            unsafe { allocation.flush_range(self.range.clone()).unwrap() };
        }

        let mut state = self.subbuffer.buffer().state();
-        unsafe { state.cpu_write_unlock(range) };
+        unsafe { state.cpu_write_unlock(self.range.clone()) };
    }
 }