Implement new locking for UnsafeBuffer and UnsafeImage (#1860)

This commit is contained in:
Rua 2022-03-18 17:41:46 +01:00 committed by GitHub
parent 706f44bde8
commit a6e853e34d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 1342 additions and 1052 deletions

View File

@ -23,6 +23,7 @@ half = "1.8"
lazy_static = "1.4"
nalgebra = { version = "0.30.0", optional = true }
parking_lot = { version = "0.12", features = ["send_guard"] }
rangemap = { git = "https://github.com/vulkano-rs/rangemap", branch = "range-split" }
shared_library = "0.1"
smallvec = "1.8"

View File

@ -21,18 +21,17 @@ use super::{
};
use crate::{
buffer::{sys::UnsafeBufferCreateInfo, BufferCreationError, TypedBufferAccess},
device::{physical::QueueFamily, Device, DeviceOwned, Queue},
device::{physical::QueueFamily, Device, DeviceOwned},
memory::{
pool::{
AllocFromRequirementsFilter, AllocLayout, MappingRequirement, MemoryPoolAlloc,
PotentialDedicatedAllocation, StdMemoryPoolAlloc,
},
DedicatedAllocation, DeviceMemoryAllocationError, MappedDeviceMemory, MemoryPool,
DedicatedAllocation, DeviceMemoryAllocationError, MemoryPool,
},
sync::{AccessError, Sharing},
sync::Sharing,
DeviceSize,
};
use parking_lot::{RwLock, RwLockReadGuard, RwLockWriteGuard};
use smallvec::SmallVec;
use std::{
error, fmt,
@ -41,10 +40,7 @@ use std::{
mem::size_of,
ops::{Deref, DerefMut, Range},
ptr,
sync::{
atomic::{AtomicUsize, Ordering},
Arc,
},
sync::{atomic::AtomicUsize, Arc},
};
/// Buffer whose content is accessible by the CPU.
@ -64,12 +60,6 @@ where
// The memory held by the buffer.
memory: A,
// Access pattern of the buffer.
// Every time the user tries to read or write the buffer from the CPU, this `RwLock` is kept
// locked and its content is checked to verify that we are allowed access. Every time the user
// tries to submit this buffer for the GPU, this `RwLock` is briefly locked and modified.
access: RwLock<CurrentGpuAccess>,
// Queue families allowed to access this buffer.
queue_families: SmallVec<[u32; 4]>,
@ -261,7 +251,7 @@ where
};
let mem_reqs = buffer.memory_requirements();
let mem = MemoryPool::alloc_from_requirements(
let memory = MemoryPool::alloc_from_requirements(
&Device::standard_pool(&device),
&mem_reqs,
AllocLayout::Linear,
@ -283,17 +273,14 @@ where
}
},
)?;
debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
debug_assert!(mem.mapped_memory().is_some());
buffer.bind_memory(mem.memory(), mem.offset())?;
debug_assert!((memory.offset() % mem_reqs.alignment) == 0);
debug_assert!(memory.mapped_memory().is_some());
buffer.bind_memory(memory.memory(), memory.offset())?;
Ok(Arc::new(CpuAccessibleBuffer {
inner: buffer,
memory: mem,
access: RwLock::new(CurrentGpuAccess::NonExclusive {
num: AtomicUsize::new(0),
}),
queue_families: queue_families,
memory,
queue_families,
marker: PhantomData,
}))
}
@ -334,22 +321,14 @@ where
/// that uses it in exclusive mode will fail. You can still submit this buffer for non-exclusive
/// accesses (ie. reads).
#[inline]
pub fn read(&self) -> Result<ReadLock<T>, ReadLockError> {
let lock = match self.access.try_read() {
Some(l) => l,
// TODO: if a user simultaneously calls .write(), and write() is currently finding out
// that the buffer is in fact GPU locked, then we will return a CpuWriteLocked
// error instead of a GpuWriteLocked ; is this a problem? how do we fix this?
None => return Err(ReadLockError::CpuWriteLocked),
};
if let CurrentGpuAccess::Exclusive { .. } = *lock {
return Err(ReadLockError::GpuWriteLocked);
}
pub fn read(&self) -> Result<ReadLock<T, A>, ReadLockError> {
let mut state = self.inner.state();
let buffer_range = self.inner().offset..self.inner().offset + self.size();
state.try_cpu_read(buffer_range.clone())?;
let mapped_memory = self.memory.mapped_memory().unwrap();
let offset = self.memory.offset();
let range = offset..offset + self.inner.size();
let memory_range = offset..offset + self.inner.size();
let bytes = unsafe {
// If there are other read locks being held at this point, they also called
@ -357,14 +336,17 @@ where
// lock, so there will no new data and this call will do nothing.
// TODO: probably still more efficient to call it only if we're the first to acquire a
// read lock, but the number of CPU locks isn't currently tracked anywhere.
mapped_memory.invalidate_range(range.clone()).unwrap();
mapped_memory.read(range.clone()).unwrap()
mapped_memory
.invalidate_range(memory_range.clone())
.unwrap();
mapped_memory.read(memory_range.clone()).unwrap()
};
Ok(ReadLock {
inner: self,
buffer_range,
memory_range,
data: T::from_bytes(bytes).unwrap(),
range,
lock,
})
}
@ -377,35 +359,27 @@ where
/// After this function successfully locks the buffer, any attempt to submit a command buffer
/// that uses it and any attempt to call `read()` will return an error.
#[inline]
pub fn write(&self) -> Result<WriteLock<T>, WriteLockError> {
let lock = match self.access.try_write() {
Some(l) => l,
// TODO: if a user simultaneously calls .read() or .write(), and the function is
// currently finding out that the buffer is in fact GPU locked, then we will
// return a CpuLocked error instead of a GpuLocked ; is this a problem?
// how do we fix this?
None => return Err(WriteLockError::CpuLocked),
};
match *lock {
CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (),
_ => return Err(WriteLockError::GpuLocked),
}
pub fn write(&self) -> Result<WriteLock<T, A>, WriteLockError> {
let mut state = self.inner.state();
let buffer_range = self.inner().offset..self.inner().offset + self.size();
state.try_cpu_write(buffer_range.clone())?;
let mapped_memory = self.memory.mapped_memory().unwrap();
let offset = self.memory.offset();
let range = offset..offset + self.inner.size();
let memory_range = offset..offset + self.size();
let bytes = unsafe {
mapped_memory.invalidate_range(range.clone()).unwrap();
mapped_memory.write(range.clone()).unwrap()
mapped_memory
.invalidate_range(memory_range.clone())
.unwrap();
mapped_memory.write(memory_range.clone()).unwrap()
};
Ok(WriteLock {
inner: self,
buffer_range,
memory_range,
data: T::from_bytes_mut(bytes).unwrap(),
mapped_memory,
range,
lock,
})
}
}
@ -432,99 +406,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
(self.inner.key(), 0)
}
#[inline]
fn try_gpu_lock(&self, exclusive_access: bool, _: &Queue) -> Result<(), AccessError> {
if exclusive_access {
let mut lock = match self.access.try_write() {
Some(lock) => lock,
None => return Err(AccessError::AlreadyInUse),
};
match *lock {
CurrentGpuAccess::NonExclusive { ref num } if num.load(Ordering::SeqCst) == 0 => (),
_ => return Err(AccessError::AlreadyInUse),
};
*lock = CurrentGpuAccess::Exclusive { num: 1 };
Ok(())
} else {
let lock = match self.access.try_read() {
Some(lock) => lock,
None => return Err(AccessError::AlreadyInUse),
};
match *lock {
CurrentGpuAccess::Exclusive { .. } => return Err(AccessError::AlreadyInUse),
CurrentGpuAccess::NonExclusive { ref num } => num.fetch_add(1, Ordering::SeqCst),
};
Ok(())
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
// First, handle if we have a non-exclusive access.
{
// Since the buffer is in use by the GPU, it is invalid to hold a write-lock to
// the buffer. The buffer can still be briefly in a write-locked state for the duration
// of the check though.
let read_lock = self.access.read();
if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock {
let prev = num.fetch_add(1, Ordering::SeqCst);
debug_assert!(prev >= 1);
return;
}
}
// If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`.
{
// Same remark as above, but for writing.
let mut write_lock = self.access.write();
if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock {
*num += 1;
} else {
unreachable!()
}
}
}
#[inline]
unsafe fn unlock(&self) {
// First, handle if we had a non-exclusive access.
{
// Since the buffer is in use by the GPU, it is invalid to hold a write-lock to
// the buffer. The buffer can still be briefly in a write-locked state for the duration
// of the check though.
let read_lock = self.access.read();
if let CurrentGpuAccess::NonExclusive { ref num } = *read_lock {
let prev = num.fetch_sub(1, Ordering::SeqCst);
debug_assert!(prev >= 1);
return;
}
}
// If we reach here, this means that `access` contains `CurrentGpuAccess::Exclusive`.
{
// Same remark as above, but for writing.
let mut write_lock = self.access.write();
if let CurrentGpuAccess::Exclusive { ref mut num } = *write_lock {
if *num != 1 {
*num -= 1;
return;
}
} else {
// Can happen if we lock in exclusive mode N times, and unlock N+1 times with the
// last two unlocks happen simultaneously.
panic!()
}
*write_lock = CurrentGpuAccess::NonExclusive {
num: AtomicUsize::new(0),
};
}
}
}
impl<T, A> BufferAccessObject for Arc<CpuAccessibleBuffer<T, A>>
@ -591,18 +472,35 @@ where
/// Note that this object holds a rwlock read guard on the chunk. If another thread tries to access
/// this buffer's content or tries to submit a GPU command that uses this buffer, it will block.
#[derive(Debug)]
pub struct ReadLock<'a, T>
pub struct ReadLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
inner: &'a CpuAccessibleBuffer<T, A>,
buffer_range: Range<DeviceSize>,
memory_range: Range<DeviceSize>,
data: &'a T,
range: Range<DeviceSize>,
lock: RwLockReadGuard<'a, CurrentGpuAccess>,
}
impl<'a, T> Deref for ReadLock<'a, T>
impl<'a, T, A> Drop for ReadLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
#[inline]
fn drop(&mut self) {
unsafe {
let mut state = self.inner.inner.state();
state.cpu_unlock(self.buffer_range.clone(), false);
}
}
}
impl<'a, T, A> Deref for ReadLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
type Target = T;
@ -617,31 +515,42 @@ where
/// Note that this object holds a rwlock write guard on the chunk. If another thread tries to access
/// this buffer's content or tries to submit a GPU command that uses this buffer, it will block.
#[derive(Debug)]
pub struct WriteLock<'a, T>
pub struct WriteLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
inner: &'a CpuAccessibleBuffer<T, A>,
buffer_range: Range<DeviceSize>,
memory_range: Range<DeviceSize>,
data: &'a mut T,
mapped_memory: &'a MappedDeviceMemory,
range: Range<DeviceSize>,
lock: RwLockWriteGuard<'a, CurrentGpuAccess>,
}
impl<'a, T> Drop for WriteLock<'a, T>
impl<'a, T, A> Drop for WriteLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
#[inline]
fn drop(&mut self) {
unsafe {
self.mapped_memory.flush_range(self.range.clone()).unwrap();
self.inner
.memory
.mapped_memory()
.unwrap()
.flush_range(self.memory_range.clone())
.unwrap();
let mut state = self.inner.inner.state();
state.cpu_unlock(self.buffer_range.clone(), true);
}
}
}
impl<'a, T> Deref for WriteLock<'a, T>
impl<'a, T, A> Deref for WriteLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
type Target = T;
@ -651,9 +560,10 @@ where
}
}
impl<'a, T> DerefMut for WriteLock<'a, T>
impl<'a, T, A> DerefMut for WriteLock<'a, T, A>
where
T: BufferContents + ?Sized + 'a,
A: MemoryPoolAlloc,
{
#[inline]
fn deref_mut(&mut self) -> &mut T {

View File

@ -13,7 +13,7 @@ use super::{
BufferUsage, TypedBufferAccess,
};
use crate::{
device::{Device, DeviceOwned, Queue},
device::{Device, DeviceOwned},
memory::{
pool::{
AllocFromRequirementsFilter, AllocLayout, MappingRequirement, MemoryPoolAlloc,
@ -21,7 +21,6 @@ use crate::{
},
DedicatedAllocation, DeviceMemoryAllocationError, MemoryPool,
},
sync::AccessError,
DeviceSize, OomError,
};
use std::{
@ -146,10 +145,6 @@ struct ActualBufferChunk {
// Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer.
num_cpu_accesses: usize,
// Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer and that have been
// GPU-locked.
num_gpu_accesses: usize,
}
/// A subbuffer allocated from a `CpuBufferPool`.
@ -591,13 +586,12 @@ where
index,
len: occupied_len,
num_cpu_accesses: 1,
num_gpu_accesses: 0,
});
Ok(CpuBufferPoolChunk {
// TODO: remove .clone() once non-lexical borrows land
buffer: current_buffer.clone(),
index: index,
index,
align_offset,
requested_len,
marker: PhantomData,
@ -694,61 +688,6 @@ where
},
)
}
#[inline]
fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> {
if self.requested_len == 0 {
return Ok(());
}
let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
let chunk = chunks_in_use_lock
.iter_mut()
.find(|c| c.index == self.index)
.unwrap();
if chunk.num_gpu_accesses != 0 {
return Err(AccessError::AlreadyInUse);
}
chunk.num_gpu_accesses = 1;
Ok(())
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
if self.requested_len == 0 {
return;
}
let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
let chunk = chunks_in_use_lock
.iter_mut()
.find(|c| c.index == self.index)
.unwrap();
debug_assert!(chunk.num_gpu_accesses >= 1);
chunk.num_gpu_accesses = chunk
.num_gpu_accesses
.checked_add(1)
.expect("Overflow in GPU usages");
}
#[inline]
unsafe fn unlock(&self) {
if self.requested_len == 0 {
return;
}
let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
let chunk = chunks_in_use_lock
.iter_mut()
.find(|c| c.index == self.index)
.unwrap();
debug_assert!(chunk.num_gpu_accesses >= 1);
chunk.num_gpu_accesses -= 1;
}
}
impl<T, A> BufferAccessObject for Arc<CpuBufferPoolChunk<T, A>>
@ -783,7 +722,6 @@ where
if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 {
chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1;
} else {
debug_assert_eq!(chunks_in_use_lock[chunk_num].num_gpu_accesses, 0);
chunks_in_use_lock.remove(chunk_num);
}
}
@ -874,21 +812,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
self.chunk.conflict_key()
}
#[inline]
fn try_gpu_lock(&self, e: bool, q: &Queue) -> Result<(), AccessError> {
self.chunk.try_gpu_lock(e, q)
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
self.chunk.increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self) {
self.chunk.unlock()
}
}
impl<T, A> BufferAccessObject for Arc<CpuBufferPoolSubbuffer<T, A>>

View File

@ -19,7 +19,7 @@ use super::{
BufferUsage, TypedBufferAccess,
};
use crate::{
device::{physical::QueueFamily, Device, DeviceOwned, Queue},
device::{physical::QueueFamily, Device, DeviceOwned},
memory::{
pool::{
alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout,
@ -28,7 +28,7 @@ use crate::{
DedicatedAllocation, DeviceMemoryAllocationError, DeviceMemoryExportError,
ExternalMemoryHandleType, MemoryPool, MemoryRequirements,
},
sync::{AccessError, Sharing},
sync::Sharing,
DeviceSize,
};
use smallvec::SmallVec;
@ -37,7 +37,7 @@ use std::{
hash::{Hash, Hasher},
marker::PhantomData,
mem::size_of,
sync::{Arc, Mutex},
sync::Arc,
};
/// Buffer whose content is in device-local memory.
@ -62,9 +62,6 @@ where
// Queue families allowed to access this buffer.
queue_families: SmallVec<[u32; 4]>,
// Number of times this buffer is locked on the GPU side.
gpu_lock: Mutex<GpuAccess>,
// Necessary to make it compile.
marker: PhantomData<Box<T>>,
}
@ -160,7 +157,7 @@ where
let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?;
let mem = MemoryPool::alloc_from_requirements(
let memory = MemoryPool::alloc_from_requirements(
&Device::standard_pool(&device),
&mem_reqs,
AllocLayout::Linear,
@ -174,14 +171,13 @@ where
}
},
)?;
debug_assert!((mem.offset() % mem_reqs.alignment) == 0);
buffer.bind_memory(mem.memory(), mem.offset())?;
debug_assert!((memory.offset() % mem_reqs.alignment) == 0);
buffer.bind_memory(memory.memory(), memory.offset())?;
Ok(Arc::new(DeviceLocalBuffer {
inner: buffer,
memory: mem,
queue_families: queue_families,
gpu_lock: Mutex::new(GpuAccess::None),
memory,
queue_families,
marker: PhantomData,
}))
}
@ -210,7 +206,7 @@ where
let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_families)?;
let mem = alloc_dedicated_with_exportable_fd(
let memory = alloc_dedicated_with_exportable_fd(
device.clone(),
&mem_reqs,
AllocLayout::Linear,
@ -224,15 +220,14 @@ where
}
},
)?;
let mem_offset = mem.offset();
let mem_offset = memory.offset();
debug_assert!((mem_offset % mem_reqs.alignment) == 0);
buffer.bind_memory(mem.memory(), mem_offset)?;
buffer.bind_memory(memory.memory(), mem_offset)?;
Ok(Arc::new(DeviceLocalBuffer {
inner: buffer,
memory: mem,
queue_families: queue_families,
gpu_lock: Mutex::new(GpuAccess::None),
memory,
queue_families,
marker: PhantomData,
}))
}
@ -329,72 +324,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
(self.inner.key(), 0)
}
#[inline]
fn try_gpu_lock(&self, exclusive: bool, _: &Queue) -> Result<(), AccessError> {
let mut lock = self.gpu_lock.lock().unwrap();
match &mut *lock {
a @ &mut GpuAccess::None => {
if exclusive {
*a = GpuAccess::Exclusive { num: 1 };
} else {
*a = GpuAccess::NonExclusive { num: 1 };
}
Ok(())
}
&mut GpuAccess::NonExclusive { ref mut num } => {
if exclusive {
Err(AccessError::AlreadyInUse)
} else {
*num += 1;
Ok(())
}
}
&mut GpuAccess::Exclusive { .. } => Err(AccessError::AlreadyInUse),
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
let mut lock = self.gpu_lock.lock().unwrap();
match *lock {
GpuAccess::None => panic!(),
GpuAccess::NonExclusive { ref mut num } => {
debug_assert!(*num >= 1);
*num += 1;
}
GpuAccess::Exclusive { ref mut num } => {
debug_assert!(*num >= 1);
*num += 1;
}
}
}
#[inline]
unsafe fn unlock(&self) {
let mut lock = self.gpu_lock.lock().unwrap();
match *lock {
GpuAccess::None => panic!("Tried to unlock a buffer that isn't locked"),
GpuAccess::NonExclusive { ref mut num } => {
assert!(*num >= 1);
*num -= 1;
if *num >= 1 {
return;
}
}
GpuAccess::Exclusive { ref mut num } => {
assert!(*num >= 1);
*num -= 1;
if *num >= 1 {
return;
}
}
};
*lock = GpuAccess::None;
}
}
impl<T, A> BufferAccessObject for Arc<DeviceLocalBuffer<T, A>>

View File

@ -36,7 +36,7 @@ use crate::{
},
DedicatedAllocation, DeviceMemoryAllocationError, MemoryPool,
},
sync::{AccessError, NowFuture, Sharing},
sync::{NowFuture, Sharing},
DeviceSize,
};
use smallvec::SmallVec;
@ -44,10 +44,7 @@ use std::{
hash::{Hash, Hasher},
marker::PhantomData,
mem::size_of,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
sync::Arc,
};
/// Buffer that is written once then read for as long as it is alive.
@ -62,10 +59,6 @@ where
// Memory allocated for the buffer.
memory: A,
// True if the `ImmutableBufferInitialization` object was used by the GPU then dropped.
// This means that the `ImmutableBuffer` can be used as much as we want without any restriction.
initialized: AtomicBool,
// Queue families allowed to access this buffer.
queue_families: SmallVec<[u32; 4]>,
@ -370,13 +363,11 @@ where
inner: buffer,
memory: mem,
queue_families: queue_families,
initialized: AtomicBool::new(false),
marker: PhantomData,
});
let initialization = Arc::new(ImmutableBufferInitialization {
buffer: final_buf.clone(),
used: Arc::new(AtomicBool::new(false)),
});
Ok((final_buf, initialization))
@ -431,25 +422,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
(self.inner.key(), 0)
}
#[inline]
fn try_gpu_lock(&self, exclusive_access: bool, _: &Queue) -> Result<(), AccessError> {
if exclusive_access {
return Err(AccessError::ExclusiveDenied);
}
if !self.initialized.load(Ordering::Relaxed) {
return Err(AccessError::BufferNotInitialized);
}
Ok(())
}
#[inline]
unsafe fn increase_gpu_lock(&self) {}
#[inline]
unsafe fn unlock(&self) {}
}
impl<T, A> BufferAccessObject for Arc<ImmutableBuffer<T, A>>
@ -518,7 +490,6 @@ where
T: BufferContents + ?Sized,
{
buffer: Arc<ImmutableBuffer<T, A>>,
used: Arc<AtomicBool>,
}
unsafe impl<T, A> BufferAccess for ImmutableBufferInitialization<T, A>
@ -540,33 +511,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
(self.buffer.inner.key(), 0)
}
#[inline]
fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> {
if self.buffer.initialized.load(Ordering::Relaxed) {
return Err(AccessError::AlreadyInUse);
}
if !self
.used
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.unwrap_or_else(|e| e)
{
Ok(())
} else {
Err(AccessError::AlreadyInUse)
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
debug_assert!(self.used.load(Ordering::Relaxed));
}
#[inline]
unsafe fn unlock(&self) {
self.buffer.initialized.store(true, Ordering::Relaxed);
}
}
impl<T, A> BufferAccessObject for Arc<ImmutableBufferInitialization<T, A>>
@ -606,7 +550,6 @@ where
fn clone(&self) -> ImmutableBufferInitialization<T, A> {
ImmutableBufferInitialization {
buffer: self.buffer.clone(),
used: self.used.clone(),
}
}
}
@ -720,62 +663,6 @@ mod tests {
}
}
#[test]
fn writing_forbidden() {
let (device, queue) = gfx_dev_and_queue!();
let (buffer, _) =
ImmutableBuffer::from_data(12u32, BufferUsage::all(), queue.clone()).unwrap();
assert_should_panic!({
// TODO: check Result error instead of panicking
let mut cbb = AutoCommandBufferBuilder::primary(
device.clone(),
queue.family(),
CommandBufferUsage::MultipleSubmit,
)
.unwrap();
cbb.fill_buffer(buffer, 50).unwrap();
let _ = cbb
.build()
.unwrap()
.execute(queue.clone())
.unwrap()
.then_signal_fence_and_flush()
.unwrap();
});
}
#[test]
fn read_uninitialized_forbidden() {
let (device, queue) = gfx_dev_and_queue!();
let (buffer, _) = unsafe {
ImmutableBuffer::<u32>::uninitialized(device.clone(), BufferUsage::all()).unwrap()
};
let source =
CpuAccessibleBuffer::from_data(device.clone(), BufferUsage::all(), false, 0).unwrap();
assert_should_panic!({
// TODO: check Result error instead of panicking
let mut cbb = AutoCommandBufferBuilder::primary(
device.clone(),
queue.family(),
CommandBufferUsage::MultipleSubmit,
)
.unwrap();
cbb.copy_buffer(source, buffer).unwrap();
let _ = cbb
.build()
.unwrap()
.execute(queue.clone())
.unwrap()
.then_signal_fence_and_flush()
.unwrap();
});
}
#[test]
fn init_then_read_same_cb() {
let (device, queue) = gfx_dev_and_queue!();

View File

@ -7,23 +7,18 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use super::BufferContents;
use crate::buffer::traits::BufferAccess;
use crate::buffer::traits::BufferAccessObject;
use crate::buffer::traits::BufferInner;
use crate::buffer::traits::TypedBufferAccess;
use crate::device::Device;
use crate::device::DeviceOwned;
use crate::device::Queue;
use crate::sync::AccessError;
use crate::DeviceSize;
use std::hash::Hash;
use std::hash::Hasher;
use std::marker::PhantomData;
use std::mem;
use std::mem::MaybeUninit;
use std::ops::Range;
use std::sync::Arc;
use super::{BufferAccess, BufferAccessObject, BufferContents, BufferInner, TypedBufferAccess};
use crate::{
device::{Device, DeviceOwned},
DeviceSize,
};
use std::{
hash::{Hash, Hasher},
marker::PhantomData,
mem::{size_of, size_of_val, MaybeUninit},
ops::Range,
sync::Arc,
};
/// A subpart of a buffer.
///
@ -123,7 +118,7 @@ impl<T: ?Sized, B> BufferSlice<T, B> {
{
let data: MaybeUninit<&T> = MaybeUninit::zeroed();
let result = f(data.assume_init());
let size = mem::size_of_val(result) as DeviceSize;
let size = size_of_val(result) as DeviceSize;
let result = result as *const R as *const () as DeviceSize;
assert!(result <= self.size());
@ -173,8 +168,8 @@ impl<T, B> BufferSlice<[T], B> {
/// Returns the number of elements in this slice.
#[inline]
pub fn len(&self) -> DeviceSize {
debug_assert_eq!(self.size() % mem::size_of::<T>() as DeviceSize, 0);
self.size() / mem::size_of::<T>() as DeviceSize
debug_assert_eq!(self.size() % size_of::<T>() as DeviceSize, 0);
self.size() / size_of::<T>() as DeviceSize
}
/// Reduces the slice to just one element of the array.
@ -189,8 +184,8 @@ impl<T, B> BufferSlice<[T], B> {
Some(Arc::new(BufferSlice {
marker: PhantomData,
resource: self.resource.clone(),
offset: self.offset + index * mem::size_of::<T>() as DeviceSize,
size: mem::size_of::<T>() as DeviceSize,
offset: self.offset + index * size_of::<T>() as DeviceSize,
size: size_of::<T>() as DeviceSize,
}))
}
@ -206,8 +201,8 @@ impl<T, B> BufferSlice<[T], B> {
Some(Arc::new(BufferSlice {
marker: PhantomData,
resource: self.resource.clone(),
offset: self.offset + range.start * mem::size_of::<T>() as DeviceSize,
size: (range.end - range.start) * mem::size_of::<T>() as DeviceSize,
offset: self.offset + range.start * size_of::<T>() as DeviceSize,
size: (range.end - range.start) * size_of::<T>() as DeviceSize,
}))
}
}
@ -235,21 +230,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
self.resource.conflict_key()
}
#[inline]
fn try_gpu_lock(&self, exclusive_access: bool, queue: &Queue) -> Result<(), AccessError> {
self.resource.try_gpu_lock(exclusive_access, queue)
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
self.resource.increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self) {
self.resource.unlock()
}
}
impl<T, B> BufferAccessObject for Arc<BufferSlice<T, B>>

View File

@ -24,35 +24,40 @@
//! sparse binding.
//! - Type safety.
use crate::check_errors;
use crate::device::Device;
use crate::device::DeviceOwned;
use crate::memory::DeviceMemory;
use crate::memory::DeviceMemoryAllocationError;
use crate::memory::MemoryRequirements;
use crate::sync::Sharing;
use crate::DeviceSize;
use crate::Error;
use crate::OomError;
use crate::VulkanObject;
use crate::{buffer::BufferUsage, Version};
use super::{
cpu_access::{ReadLockError, WriteLockError},
BufferUsage,
};
use crate::{
check_errors,
device::{Device, DeviceOwned},
memory::{DeviceMemory, DeviceMemoryAllocationError, MemoryRequirements},
sync::{AccessError, CurrentAccess, Sharing},
DeviceSize, Error, OomError, Version, VulkanObject,
};
use ash::vk::Handle;
use parking_lot::{Mutex, MutexGuard};
use rangemap::RangeMap;
use smallvec::SmallVec;
use std::error;
use std::fmt;
use std::hash::Hash;
use std::hash::Hasher;
use std::mem::MaybeUninit;
use std::ptr;
use std::sync::Arc;
use std::{
error, fmt,
hash::{Hash, Hasher},
mem::MaybeUninit,
ops::Range,
ptr,
sync::Arc,
};
/// Data storage in a GPU-accessible location.
#[derive(Debug)]
pub struct UnsafeBuffer {
handle: ash::vk::Buffer,
device: Arc<Device>,
size: DeviceSize,
usage: BufferUsage,
state: Mutex<BufferState>,
}
impl UnsafeBuffer {
@ -167,8 +172,11 @@ impl UnsafeBuffer {
let buffer = UnsafeBuffer {
handle,
device,
size,
usage,
state: Mutex::new(BufferState::new(size)),
};
Ok(buffer)
@ -313,6 +321,10 @@ impl UnsafeBuffer {
Ok(())
}
pub(crate) fn state(&self) -> MutexGuard<BufferState> {
self.state.lock()
}
/// Returns the size of the buffer in bytes.
#[inline]
pub fn size(&self) -> DeviceSize {
@ -527,6 +539,273 @@ impl From<SparseLevel> for ash::vk::BufferCreateFlags {
}
}
/// The current state of a buffer.
#[derive(Debug)]
pub(crate) struct BufferState {
ranges: RangeMap<DeviceSize, BufferRangeState>,
}
impl BufferState {
fn new(size: DeviceSize) -> Self {
BufferState {
ranges: [(
0..size,
BufferRangeState {
current_access: CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
},
},
)]
.into_iter()
.collect(),
}
}
pub(crate) fn try_cpu_read(&mut self, range: Range<DeviceSize>) -> Result<(), ReadLockError> {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::CpuExclusive { .. } => return Err(ReadLockError::CpuWriteLocked),
CurrentAccess::GpuExclusive { .. } => return Err(ReadLockError::GpuWriteLocked),
CurrentAccess::Shared { .. } => (),
}
}
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::Shared { cpu_reads, .. } => {
*cpu_reads += 1;
}
_ => unreachable!(),
}
}
Ok(())
}
pub(crate) fn try_cpu_write(&mut self, range: Range<DeviceSize>) -> Result<(), WriteLockError> {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::CpuExclusive => return Err(WriteLockError::CpuLocked),
CurrentAccess::GpuExclusive { .. } => return Err(WriteLockError::GpuLocked),
CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
} => (),
CurrentAccess::Shared { cpu_reads, .. } if *cpu_reads > 0 => {
return Err(WriteLockError::CpuLocked)
}
CurrentAccess::Shared { .. } => return Err(WriteLockError::GpuLocked),
}
}
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
state.current_access = CurrentAccess::CpuExclusive;
}
Ok(())
}
pub(crate) unsafe fn cpu_unlock(&mut self, range: Range<DeviceSize>, write: bool) {
if write {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
state.current_access = CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
}
}
CurrentAccess::GpuExclusive { .. } => {
unreachable!("Buffer is being written by the GPU")
}
CurrentAccess::Shared { .. } => {
unreachable!("Buffer is not being written by the CPU")
}
}
}
} else {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
CurrentAccess::GpuExclusive { .. } => {
unreachable!("Buffer is being written by the GPU")
}
CurrentAccess::Shared { cpu_reads, .. } => *cpu_reads -= 1,
}
}
}
}
/// Locks the resource for usage on the GPU. Returns an error if the lock can't be acquired.
///
/// This function exists to prevent the user from causing a data race by reading and writing
/// to the same resource at the same time.
///
/// If you call this function, you should call `gpu_unlock` once the resource is no longer in
/// use by the GPU. The implementation is not expected to automatically perform any unlocking
/// and can rely on the fact that `gpu_unlock` is going to be called.
pub(crate) fn try_gpu_lock(
&mut self,
range: Range<DeviceSize>,
write: bool,
) -> Result<(), AccessError> {
if write {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
} => (),
_ => return Err(AccessError::AlreadyInUse),
}
}
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
state.current_access = CurrentAccess::GpuExclusive {
gpu_reads: 0,
gpu_writes: 1,
};
}
} else {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::Shared { .. } => (),
_ => return Err(AccessError::AlreadyInUse),
}
}
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads += 1,
_ => unreachable!(),
}
}
}
Ok(())
}
/// Locks the resource for usage on the GPU without checking for errors. Supposes that a
/// future has already granted access to the resource.
///
/// If you call this function, you should call `gpu_unlock` once the resource is no longer in
/// use by the GPU. The implementation is not expected to automatically perform any unlocking
/// and can rely on the fact that `gpu_unlock` is going to be called.
pub(crate) unsafe fn increase_gpu_lock(&mut self, range: Range<DeviceSize>, write: bool) {
if write {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_writes, .. } => *gpu_writes += 1,
&mut CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads,
} => {
state.current_access = CurrentAccess::GpuExclusive {
gpu_reads,
gpu_writes: 1,
}
}
CurrentAccess::Shared { .. } => {
unreachable!("Buffer is being read by the CPU")
}
}
}
} else {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_reads, .. }
| CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads += 1,
}
}
}
}
/// Unlocks the resource previously acquired with `try_gpu_lock` or `increase_gpu_lock`.
///
/// # Safety
///
/// Must only be called once per previous lock.
pub(crate) unsafe fn gpu_unlock(&mut self, range: Range<DeviceSize>, write: bool) {
if write {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
&mut CurrentAccess::GpuExclusive {
gpu_reads,
gpu_writes: 1,
} => {
state.current_access = CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads,
}
}
CurrentAccess::GpuExclusive { gpu_writes, .. } => *gpu_writes -= 1,
CurrentAccess::Shared { .. } => {
unreachable!("Buffer is not being written by the GPU")
}
}
}
} else {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_reads, .. } => *gpu_reads -= 1,
CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads -= 1,
}
}
}
}
}
/// The current state of a specific range of bytes in a buffer.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct BufferRangeState {
current_access: CurrentAccess,
}
#[cfg(test)]
mod tests {
use super::BufferCreationError;

View File

@ -7,22 +7,15 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use super::BufferContents;
use crate::buffer::sys::UnsafeBuffer;
use crate::buffer::BufferSlice;
use crate::device::DeviceOwned;
use crate::device::Queue;
use crate::sync::AccessError;
use crate::DeviceSize;
use crate::SafeDeref;
use crate::VulkanObject;
use std::error;
use std::fmt;
use std::hash::Hash;
use std::hash::Hasher;
use std::num::NonZeroU64;
use std::ops::Range;
use std::sync::Arc;
use super::{sys::UnsafeBuffer, BufferContents, BufferSlice};
use crate::{device::DeviceOwned, DeviceSize, SafeDeref, VulkanObject};
use std::{
error, fmt,
hash::{Hash, Hasher},
num::NonZeroU64,
ops::Range,
sync::Arc,
};
/// Trait for objects that represent a way for the GPU to have access to a buffer or a slice of a
/// buffer.
@ -81,33 +74,6 @@ pub unsafe trait BufferAccess: DeviceOwned + Send + Sync {
/// verify whether they actually overlap.
fn conflict_key(&self) -> (u64, u64);
/// Locks the resource for usage on the GPU. Returns an error if the lock can't be acquired.
///
/// This function exists to prevent the user from causing a data race by reading and writing
/// to the same resource at the same time.
///
/// If you call this function, you should call `unlock()` once the resource is no longer in use
/// by the GPU. The implementation is not expected to automatically perform any unlocking and
/// can rely on the fact that `unlock()` is going to be called.
fn try_gpu_lock(&self, exclusive_access: bool, queue: &Queue) -> Result<(), AccessError>;
/// Locks the resource for usage on the GPU. Supposes that the resource is already locked, and
/// simply increases the lock by one.
///
/// Must only be called after `try_gpu_lock()` succeeded.
///
/// If you call this function, you should call `unlock()` once the resource is no longer in use
/// by the GPU. The implementation is not expected to automatically perform any unlocking and
/// can rely on the fact that `unlock()` is going to be called.
unsafe fn increase_gpu_lock(&self);
/// Unlocks the resource previously acquired with `try_gpu_lock` or `increase_gpu_lock`.
///
/// # Safety
///
/// Must only be called once per previous lock.
unsafe fn unlock(&self);
/// Gets the device address for this buffer.
///
/// # Safety
@ -187,21 +153,6 @@ where
fn conflict_key(&self) -> (u64, u64) {
(**self).conflict_key()
}
#[inline]
fn try_gpu_lock(&self, exclusive_access: bool, queue: &Queue) -> Result<(), AccessError> {
(**self).try_gpu_lock(exclusive_access, queue)
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
(**self).increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self) {
(**self).unlock()
}
}
/// Extension trait for `BufferAccess`. Indicates the type of the content of the buffer.

View File

@ -414,6 +414,10 @@ impl SyncCommandBufferBuilder {
if initial_layout_requirement != start_layout
|| !is_layout_initialized
{
// A layout transition is a write, so if we perform one, we need
// exclusive access.
actually_exclusive = true;
// Note that we transition from `bottom_of_pipe`, which means that we
// wait for all the previous commands to be entirely finished. This is
// suboptimal, but:
@ -426,14 +430,15 @@ impl SyncCommandBufferBuilder {
//
unsafe {
let from_layout = if is_layout_initialized {
actually_exclusive = true;
if initial_layout_requirement != start_layout {
actual_start_layout = initial_layout_requirement;
}
initial_layout_requirement
} else {
actual_start_layout = image.initial_layout();
image.initial_layout()
};
if initial_layout_requirement != start_layout {
actual_start_layout = initial_layout_requirement;
}
let b = &mut self.pending_barrier;
b.add_image_memory_barrier(
image.as_ref(),

View File

@ -135,21 +135,36 @@ impl SyncCommandBuffer {
match &resource_use.resource {
KeyTy::Buffer(buffer) => {
// Can happen with `CpuBufferPool`
if buffer.size() == 0 {
continue;
}
let inner = buffer.inner();
let mut buffer_state = inner.buffer.state();
// Because try_gpu_lock needs to be called first,
// this should never return Ok without first returning Err
let prev_err =
match future.check_buffer_access(buffer.as_ref(), state.exclusive, queue) {
Ok(_) => {
unsafe {
buffer.increase_gpu_lock();
}
Ok(_) => unsafe {
buffer_state.increase_gpu_lock(
inner.offset..inner.offset + buffer.size(),
state.exclusive,
);
locked_resources += 1;
continue;
}
},
Err(err) => err,
};
match (buffer.try_gpu_lock(state.exclusive, queue), prev_err) {
match (
buffer_state.try_gpu_lock(
inner.offset..inner.offset + buffer.size(),
state.exclusive,
),
prev_err,
) {
(Ok(_), _) => (),
(Err(err), AccessCheckError::Unknown)
| (_, AccessCheckError::Denied(err)) => {
@ -167,27 +182,37 @@ impl SyncCommandBuffer {
}
KeyTy::Image(image) => {
let inner = image.inner();
let mut image_state = inner.image.state();
let prev_err = match future.check_image_access(
image.as_ref(),
state.initial_layout,
state.exclusive,
queue,
) {
Ok(_) => {
unsafe {
image.increase_gpu_lock();
}
Ok(_) => unsafe {
image_state.increase_gpu_lock(
inner.image.format().unwrap().aspects(),
image.current_mip_levels_access(),
image.current_array_layers_access(),
state.exclusive,
state.final_layout,
);
locked_resources += 1;
continue;
}
},
Err(err) => err,
};
match (
image.try_gpu_lock(
image_state.try_gpu_lock(
inner.image.format().unwrap().aspects(),
image.current_mip_levels_access(),
image.current_array_layers_access(),
state.exclusive,
state.image_uninitialized_safe.is_safe(),
state.initial_layout,
state.final_layout,
),
prev_err,
) {
@ -218,18 +243,23 @@ impl SyncCommandBuffer {
match &resource_use.resource {
KeyTy::Buffer(buffer) => unsafe {
buffer.unlock();
let inner = buffer.inner();
let mut buffer_state = inner.buffer.state();
buffer_state.gpu_unlock(
inner.offset..inner.offset + buffer.size(),
state.exclusive,
);
},
KeyTy::Image(image) => unsafe {
let inner = image.inner();
let mut image_state = inner.image.state();
image_state.gpu_unlock(
inner.image.format().unwrap().aspects(),
image.current_mip_levels_access(),
image.current_array_layers_access(),
state.exclusive,
)
},
KeyTy::Image(image) => {
let trans = if state.final_layout != state.initial_layout {
Some(state.final_layout)
} else {
None
};
unsafe {
image.unlock(trans);
}
}
}
}
}
@ -253,15 +283,20 @@ impl SyncCommandBuffer {
match &resource_use.resource {
KeyTy::Buffer(buffer) => {
buffer.unlock();
let inner = buffer.inner();
let mut buffer_state = inner.buffer.state();
buffer_state
.gpu_unlock(inner.offset..inner.offset + buffer.size(), state.exclusive);
}
KeyTy::Image(image) => {
let trans = if state.final_layout != state.initial_layout {
Some(state.final_layout)
} else {
None
};
image.unlock(trans);
let inner = image.inner();
let mut image_state = inner.image.state();
image_state.gpu_unlock(
inner.image.format().unwrap().aspects(),
image.current_mip_levels_access(),
image.current_array_layers_access(),
state.exclusive,
)
}
}
}

View File

@ -12,7 +12,7 @@ use std::ops::BitOr;
/// An individual data type within an image.
///
/// Most images have only the `Color` aspect, but some may have several.
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
#[repr(u32)]
pub enum ImageAspect {
Color = ash::vk::ImageAspectFlags::COLOR.as_raw(),
@ -92,6 +92,36 @@ impl ImageAspects {
&& (memory_plane1 || !other.memory_plane1)
&& (memory_plane2 || !other.memory_plane2)
}
pub fn iter(&self) -> impl Iterator<Item = ImageAspect> {
let Self {
color,
depth,
stencil,
metadata,
plane0,
plane1,
plane2,
memory_plane0,
memory_plane1,
memory_plane2,
} = *self;
[
color.then(|| ImageAspect::Color),
depth.then(|| ImageAspect::Depth),
stencil.then(|| ImageAspect::Stencil),
metadata.then(|| ImageAspect::Metadata),
plane0.then(|| ImageAspect::Plane0),
plane1.then(|| ImageAspect::Plane1),
plane2.then(|| ImageAspect::Plane2),
memory_plane0.then(|| ImageAspect::MemoryPlane0),
memory_plane1.then(|| ImageAspect::MemoryPlane1),
memory_plane2.then(|| ImageAspect::MemoryPlane2),
]
.into_iter()
.flatten()
}
}
impl BitOr for ImageAspects {

View File

@ -7,41 +7,34 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use crate::device::Device;
use crate::format::ClearValue;
use crate::format::Format;
use crate::image::sys::ImageCreationError;
use crate::image::sys::UnsafeImage;
use crate::image::sys::UnsafeImageCreateInfo;
use crate::image::traits::ImageAccess;
use crate::image::traits::ImageClearValue;
use crate::image::traits::ImageContent;
use crate::image::ImageDescriptorLayouts;
use crate::image::ImageDimensions;
use crate::image::ImageInner;
use crate::image::ImageLayout;
use crate::image::ImageUsage;
use crate::image::SampleCount;
use crate::memory::pool::alloc_dedicated_with_exportable_fd;
use crate::memory::pool::AllocFromRequirementsFilter;
use crate::memory::pool::AllocLayout;
use crate::memory::pool::MappingRequirement;
use crate::memory::pool::MemoryPool;
use crate::memory::pool::MemoryPoolAlloc;
use crate::memory::pool::PotentialDedicatedAllocation;
use crate::memory::pool::StdMemoryPoolAlloc;
use crate::memory::DedicatedAllocation;
use crate::memory::ExternalMemoryHandleType;
use crate::memory::{DeviceMemoryExportError, ExternalMemoryHandleTypes};
use crate::sync::AccessError;
use crate::DeviceSize;
use std::fs::File;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::atomic::AtomicBool;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use super::{
sys::UnsafeImage,
traits::{ImageClearValue, ImageContent},
ImageAccess, ImageCreationError, ImageDescriptorLayouts, ImageInner, ImageLayout, ImageUsage,
SampleCount,
};
use crate::{
device::Device,
format::{ClearValue, Format},
image::{sys::UnsafeImageCreateInfo, ImageDimensions},
memory::{
pool::{
alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout,
MappingRequirement, MemoryPoolAlloc, PotentialDedicatedAllocation, StdMemoryPoolAlloc,
},
DedicatedAllocation, DeviceMemoryExportError, ExternalMemoryHandleType,
ExternalMemoryHandleTypes, MemoryPool,
},
DeviceSize,
};
use std::{
fs::File,
hash::{Hash, Hasher},
sync::{
atomic::{AtomicBool, AtomicUsize, Ordering},
Arc,
},
};
/// ImageAccess whose purpose is to be used as a framebuffer attachment.
///
@ -598,64 +591,6 @@ where
self.image.key()
}
#[inline]
fn try_gpu_lock(
&self,
_: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
if expected_layout != self.attachment_layout && expected_layout != ImageLayout::Undefined {
if self.initialized.load(Ordering::SeqCst) {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: self.attachment_layout,
});
} else {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: ImageLayout::Undefined,
});
}
}
if !uninitialized_safe && expected_layout != ImageLayout::Undefined {
if !self.initialized.load(Ordering::SeqCst) {
return Err(AccessError::ImageNotInitialized {
requested: expected_layout,
});
}
}
if self
.gpu_lock
.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst)
.unwrap_or_else(|e| e)
== 0
{
Ok(())
} else {
Err(AccessError::AlreadyInUse)
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
let val = self.gpu_lock.fetch_add(1, Ordering::SeqCst);
debug_assert!(val >= 1);
}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
if let Some(new_layout) = new_layout {
debug_assert_eq!(new_layout, self.attachment_layout);
self.initialized.store(true, Ordering::SeqCst);
}
let prev_val = self.gpu_lock.fetch_sub(1, Ordering::SeqCst);
debug_assert!(prev_val >= 1);
}
#[inline]
unsafe fn layout_initialized(&self) {
self.initialized.store(true, Ordering::SeqCst);

View File

@ -28,16 +28,13 @@ use crate::{
DedicatedAllocation, MemoryPool,
},
sampler::Filter,
sync::{AccessError, NowFuture, Sharing},
sync::{NowFuture, Sharing},
};
use smallvec::SmallVec;
use std::{
hash::{Hash, Hasher},
ops::Range,
sync::{
atomic::{AtomicBool, Ordering},
Arc,
},
sync::Arc,
};
/// Image whose purpose is to be used for read-only purposes. You can write to the image once,
@ -49,7 +46,6 @@ pub struct ImmutableImage<A = PotentialDedicatedAllocation<StdMemoryPoolAlloc>>
dimensions: ImageDimensions,
memory: A,
format: Format,
initialized: AtomicBool,
layout: ImageLayout,
}
@ -96,7 +92,6 @@ impl SubImage {
// Must not implement Clone, as that would lead to multiple `used` values.
pub struct ImmutableImageInitialization<A = PotentialDedicatedAllocation<StdMemoryPoolAlloc>> {
image: Arc<ImmutableImage<A>>,
used: AtomicBool,
mip_levels_access: std::ops::Range<u32>,
array_layers_access: std::ops::Range<u32>,
}
@ -203,7 +198,6 @@ impl ImmutableImage {
ImageLayout::ShaderReadOnlyOptimal,
queue_families,
)?;
image.initialized.store(true, Ordering::Relaxed); // Allow uninitialized access for backwards compatibility
Ok(image)
}
@ -278,13 +272,11 @@ impl ImmutableImage {
memory,
dimensions,
format,
initialized: AtomicBool::new(false),
layout,
});
let init = Arc::new(ImmutableImageInitialization {
image: image.clone(),
used: AtomicBool::new(false),
mip_levels_access: 0..image.mip_levels(),
array_layers_access: 0..image.dimensions().array_layers(),
});
@ -390,8 +382,6 @@ impl ImmutableImage {
Err(e) => unreachable!("{:?}", e),
};
image.initialized.store(true, Ordering::Relaxed);
Ok((image, future))
}
}
@ -441,39 +431,6 @@ where
self.image.key()
}
#[inline]
fn try_gpu_lock(
&self,
exclusive_access: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
if expected_layout != self.layout && expected_layout != ImageLayout::Undefined {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: self.layout,
});
}
if exclusive_access {
return Err(AccessError::ExclusiveDenied);
}
if !self.initialized.load(Ordering::Relaxed) {
return Err(AccessError::BufferNotInitialized);
}
Ok(())
}
#[inline]
unsafe fn increase_gpu_lock(&self) {}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
debug_assert!(new_layout.is_none());
}
#[inline]
fn current_mip_levels_access(&self) -> std::ops::Range<u32> {
0..self.mip_levels()
@ -528,33 +485,6 @@ unsafe impl ImageAccess for SubImage {
fn conflict_key(&self) -> u64 {
self.image.conflict_key()
}
#[inline]
fn try_gpu_lock(
&self,
exclusive_access: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
if expected_layout != self.layout && expected_layout != ImageLayout::Undefined {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: self.layout,
});
}
Ok(())
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
self.image.increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
self.image.unlock(new_layout)
}
}
impl<A> PartialEq for ImmutableImage<A>
@ -608,47 +538,6 @@ where
self.image.image.key()
}
#[inline]
fn try_gpu_lock(
&self,
_: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
if expected_layout != ImageLayout::Undefined {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: ImageLayout::Undefined,
});
}
if self.image.initialized.load(Ordering::Relaxed) {
return Err(AccessError::AlreadyInUse);
}
// FIXME: Mipmapped textures require multiple writes to initialize
if !self
.used
.compare_exchange(false, true, Ordering::Relaxed, Ordering::Relaxed)
.unwrap_or_else(|e| e)
{
Ok(())
} else {
Err(AccessError::AlreadyInUse)
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
debug_assert!(self.used.load(Ordering::Relaxed));
}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
assert_eq!(new_layout, Some(self.image.layout));
self.image.initialized.store(true, Ordering::Relaxed);
}
#[inline]
fn current_mip_levels_access(&self) -> std::ops::Range<u32> {
self.mip_levels_access.clone()

View File

@ -7,43 +7,33 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use crate::device::physical::QueueFamily;
use crate::device::Device;
use crate::format::ClearValue;
use crate::format::Format;
use crate::image::sys::ImageCreationError;
use crate::image::sys::UnsafeImage;
use crate::image::sys::UnsafeImageCreateInfo;
use crate::image::traits::ImageAccess;
use crate::image::traits::ImageClearValue;
use crate::image::traits::ImageContent;
use crate::image::ImageCreateFlags;
use crate::image::ImageDescriptorLayouts;
use crate::image::ImageDimensions;
use crate::image::ImageInner;
use crate::image::ImageLayout;
use crate::image::ImageUsage;
use crate::memory::pool::alloc_dedicated_with_exportable_fd;
use crate::memory::pool::AllocFromRequirementsFilter;
use crate::memory::pool::AllocLayout;
use crate::memory::pool::MappingRequirement;
use crate::memory::pool::MemoryPool;
use crate::memory::pool::MemoryPoolAlloc;
use crate::memory::pool::PotentialDedicatedAllocation;
use crate::memory::pool::StdMemoryPool;
use crate::memory::DedicatedAllocation;
use crate::memory::ExternalMemoryHandleType;
use crate::memory::{DeviceMemoryExportError, ExternalMemoryHandleTypes};
use crate::sync::AccessError;
use crate::sync::Sharing;
use crate::DeviceSize;
use super::{
sys::UnsafeImage,
traits::{ImageClearValue, ImageContent},
ImageAccess, ImageCreateFlags, ImageCreationError, ImageDescriptorLayouts, ImageDimensions,
ImageInner, ImageLayout, ImageUsage,
};
use crate::{
device::{physical::QueueFamily, Device},
format::{ClearValue, Format},
image::sys::UnsafeImageCreateInfo,
memory::{
pool::{
alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout,
MappingRequirement, MemoryPoolAlloc, PotentialDedicatedAllocation, StdMemoryPool,
},
DedicatedAllocation, DeviceMemoryExportError, ExternalMemoryHandleType,
ExternalMemoryHandleTypes, MemoryPool,
},
sync::Sharing,
DeviceSize,
};
use smallvec::SmallVec;
use std::fs::File;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::atomic::AtomicUsize;
use std::sync::atomic::Ordering;
use std::sync::Arc;
use std::{
fs::File,
hash::{Hash, Hasher},
sync::Arc,
};
/// General-purpose image in device memory. Can be used for any usage, but will be slower than a
/// specialized image.
@ -66,9 +56,6 @@ where
// Queue families allowed to access this image.
queue_families: SmallVec<[u32; 4]>,
// Number of times this image is locked on the GPU side.
gpu_lock: AtomicUsize,
}
impl StorageImage {
@ -167,7 +154,6 @@ impl StorageImage {
dimensions,
format,
queue_families,
gpu_lock: AtomicUsize::new(0),
}))
}
@ -236,7 +222,6 @@ impl StorageImage {
dimensions,
format,
queue_families,
gpu_lock: AtomicUsize::new(0),
}))
}
@ -294,44 +279,6 @@ where
self.image.key()
}
#[inline]
fn try_gpu_lock(
&self,
_: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
// TODO: handle initial layout transition
if expected_layout != ImageLayout::General && expected_layout != ImageLayout::Undefined {
return Err(AccessError::UnexpectedImageLayout {
requested: expected_layout,
allowed: ImageLayout::General,
});
}
let val = self
.gpu_lock
.compare_exchange(0, 1, Ordering::SeqCst, Ordering::SeqCst)
.unwrap_or_else(|e| e);
if val == 0 {
Ok(())
} else {
Err(AccessError::AlreadyInUse)
}
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
let val = self.gpu_lock.fetch_add(1, Ordering::SeqCst);
debug_assert!(val >= 1);
}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
assert!(new_layout.is_none() || new_layout == Some(ImageLayout::General));
self.gpu_lock.fetch_sub(1, Ordering::SeqCst);
}
#[inline]
fn current_mip_levels_access(&self) -> std::ops::Range<u32> {
0..self.mip_levels()

View File

@ -7,19 +7,15 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use crate::format::ClearValue;
use crate::image::traits::ImageAccess;
use crate::image::traits::ImageClearValue;
use crate::image::traits::ImageContent;
use crate::image::ImageDescriptorLayouts;
use crate::image::ImageInner;
use crate::image::ImageLayout;
use crate::swapchain::Swapchain;
use crate::sync::AccessError;
use crate::OomError;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::Arc;
use super::{
traits::{ImageClearValue, ImageContent},
ImageAccess, ImageDescriptorLayouts, ImageInner, ImageLayout,
};
use crate::{format::ClearValue, swapchain::Swapchain, OomError};
use std::{
hash::{Hash, Hasher},
sync::Arc,
};
/// An image that is part of a swapchain.
///
@ -113,16 +109,6 @@ where
self.my_image().image.key()
}
#[inline]
fn try_gpu_lock(&self, _: bool, _: bool, _: ImageLayout) -> Result<(), AccessError> {
if self.swapchain.is_full_screen_exclusive() {
Ok(())
} else {
// Swapchain image are only accessible after being acquired.
Err(AccessError::SwapchainImageAcquireOnly)
}
}
#[inline]
unsafe fn layout_initialized(&self) {
self.layout_initialized();
@ -133,14 +119,6 @@ where
self.is_layout_initialized()
}
#[inline]
unsafe fn increase_gpu_lock(&self) {}
#[inline]
unsafe fn unlock(&self, _: Option<ImageLayout>) {
// TODO: store that the image was initialized
}
#[inline]
fn current_mip_levels_access(&self) -> std::ops::Range<u32> {
0..self.mip_levels()

View File

@ -14,10 +14,11 @@
//! that you create must wrap around the types in this module.
use super::{
ImageAspect, ImageCreateFlags, ImageDimensions, ImageLayout, ImageTiling, ImageUsage,
SampleCount, SampleCounts,
ImageAspect, ImageAspects, ImageCreateFlags, ImageDimensions, ImageLayout, ImageTiling,
ImageUsage, SampleCount, SampleCounts,
};
use crate::{
buffer::cpu_access::{ReadLockError, WriteLockError},
check_errors,
device::{Device, DeviceOwned},
format::{ChromaSampling, Format, FormatFeatures, NumericType},
@ -26,15 +27,19 @@ use crate::{
DeviceMemory, DeviceMemoryAllocationError, ExternalMemoryHandleType,
ExternalMemoryHandleTypes, MemoryRequirements,
},
sync::Sharing,
sync::{AccessError, CurrentAccess, Sharing},
DeviceSize, Error, OomError, Version, VulkanObject,
};
use ash::vk::Handle;
use parking_lot::{Mutex, MutexGuard};
use rangemap::RangeMap;
use smallvec::{smallvec, SmallVec};
use std::{
error, fmt,
hash::{Hash, Hasher},
iter::{FusedIterator, Peekable},
mem::MaybeUninit,
ops::Range,
ptr,
sync::Arc,
};
@ -71,6 +76,7 @@ pub struct UnsafeImage {
// `vkDestroyImage` is called only if `needs_destruction` is true.
needs_destruction: bool,
state: Mutex<ImageState>,
}
impl UnsafeImage {
@ -114,6 +120,8 @@ impl UnsafeImage {
_ne: _,
} = create_info;
let aspects = format.unwrap().aspects();
let image = UnsafeImage {
device,
handle,
@ -132,6 +140,12 @@ impl UnsafeImage {
block_texel_view_compatible,
needs_destruction: true,
state: Mutex::new(ImageState::new(
aspects,
mip_levels,
dimensions.array_layers(),
initial_layout,
)),
};
Ok(image)
@ -843,6 +857,9 @@ impl UnsafeImage {
// TODO: check that usage is correct in regard to `output`?
let aspects = format.aspects();
let initial_layout = ImageLayout::Undefined; // TODO: Maybe this should be passed in?
UnsafeImage {
handle,
device: device.clone(),
@ -850,7 +867,7 @@ impl UnsafeImage {
dimensions,
format: Some(format),
format_features,
initial_layout: ImageLayout::Undefined, // TODO: Maybe this should be passed in?
initial_layout,
mip_levels,
samples,
tiling,
@ -861,6 +878,12 @@ impl UnsafeImage {
block_texel_view_compatible: flags.block_texel_view_compatible,
needs_destruction: false, // TODO: pass as parameter
state: Mutex::new(ImageState::new(
aspects,
mip_levels,
dimensions.array_layers(),
initial_layout,
)),
}
}
@ -955,6 +978,10 @@ impl UnsafeImage {
Ok(())
}
pub(crate) fn state(&self) -> MutexGuard<ImageState> {
self.state.lock()
}
/// Returns the dimensions of the image.
#[inline]
pub fn dimensions(&self) -> ImageDimensions {
@ -1595,14 +1622,591 @@ pub struct LinearLayout {
pub depth_pitch: DeviceSize,
}
/// The current state of an image.
#[derive(Debug)]
pub(crate) struct ImageState {
ranges: RangeMap<DeviceSize, ImageRangeState>,
subresources: Subresources,
}
impl ImageState {
fn new(
aspects: ImageAspects,
mip_levels: u32,
array_layers: u32,
initial_layout: ImageLayout,
) -> Self {
let subresources = Subresources::new(aspects, mip_levels, array_layers);
ImageState {
ranges: [(
subresources.range(),
ImageRangeState {
current_access: CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
},
layout: initial_layout,
},
)]
.into_iter()
.collect(),
subresources,
}
}
pub(crate) fn try_cpu_read(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
) -> Result<(), ReadLockError> {
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
for range in iter.clone() {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::CpuExclusive { .. } => {
return Err(ReadLockError::CpuWriteLocked)
}
CurrentAccess::GpuExclusive { .. } => {
return Err(ReadLockError::GpuWriteLocked)
}
CurrentAccess::Shared { .. } => (),
}
}
}
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::Shared { cpu_reads, .. } => {
*cpu_reads += 1;
}
_ => unreachable!(),
}
}
}
Ok(())
}
pub(crate) fn try_cpu_write(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
) -> Result<(), WriteLockError> {
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
for range in iter.clone() {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::CpuExclusive => return Err(WriteLockError::CpuLocked),
CurrentAccess::GpuExclusive { .. } => return Err(WriteLockError::GpuLocked),
CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
} => (),
CurrentAccess::Shared { cpu_reads, .. } if *cpu_reads > 0 => {
return Err(WriteLockError::CpuLocked)
}
CurrentAccess::Shared { .. } => return Err(WriteLockError::GpuLocked),
}
}
}
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
state.current_access = CurrentAccess::CpuExclusive;
}
}
Ok(())
}
pub(crate) unsafe fn cpu_unlock(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
write: bool,
) {
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
if write {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
state.current_access = CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
}
}
CurrentAccess::GpuExclusive { .. } => {
unreachable!("Image is being written by the GPU")
}
CurrentAccess::Shared { .. } => {
unreachable!("Image is not being written by the CPU")
}
}
}
}
} else {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Image is being written by the CPU")
}
CurrentAccess::GpuExclusive { .. } => {
unreachable!("Image is being written by the GPU")
}
CurrentAccess::Shared { cpu_reads, .. } => *cpu_reads -= 1,
}
}
}
}
}
/// Locks the resource for usage on the GPU. Returns an error if the lock can't be acquired.
///
/// After this function returns `Ok`, you are authorized to use the image on the GPU. If the
/// GPU operation requires write access to the image (which includes image layout transitions)
/// then `write` should be true.
///
/// The `expected_layout` is the layout we expect the image to be in when we lock it. If the
/// actual layout doesn't match this expected layout, then an error should be returned. If
/// `Undefined` is passed, that means that the caller doesn't care about the actual layout,
/// and that a layout mismatch shouldn't return an error.
///
/// This function exists to prevent the user from causing a data race by reading and writing
/// to the same resource at the same time.
///
/// If you call this function, you should call `gpu_unlock` once the resource is no longer in
/// use by the GPU. The implementation is not expected to automatically perform any unlocking
/// and can rely on the fact that `gpu_unlock` is going to be called.
pub(crate) fn try_gpu_lock(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
write: bool,
expected_layout: ImageLayout,
destination_layout: ImageLayout,
) -> Result<(), AccessError> {
debug_assert!(!matches!(
destination_layout,
ImageLayout::Undefined | ImageLayout::Preinitialized
));
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
if write {
for range in iter.clone() {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads: 0,
} => (),
_ => return Err(AccessError::AlreadyInUse),
}
if expected_layout != ImageLayout::Undefined && state.layout != expected_layout
{
return Err(AccessError::UnexpectedImageLayout {
allowed: state.layout,
requested: expected_layout,
});
}
}
}
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
state.current_access = CurrentAccess::GpuExclusive {
gpu_reads: 0,
gpu_writes: 1,
};
state.layout = destination_layout;
}
}
} else {
debug_assert_eq!(expected_layout, destination_layout);
for range in iter.clone() {
for (_range, state) in self.ranges.range(&range) {
match &state.current_access {
CurrentAccess::Shared { .. } => (),
_ => return Err(AccessError::AlreadyInUse),
}
if expected_layout != ImageLayout::Undefined && state.layout != expected_layout
{
return Err(AccessError::UnexpectedImageLayout {
allowed: state.layout,
requested: expected_layout,
});
}
}
}
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads += 1,
_ => unreachable!(),
}
state.layout = destination_layout;
}
}
}
Ok(())
}
/// Locks the resource for usage on the GPU without checking for errors. Supposes that a
/// future has already granted access to the resource.
///
/// If you call this function, you should call `gpu_unlock` once the resource is no longer in
/// use by the GPU. The implementation is not expected to automatically perform any unlocking
/// and can rely on the fact that `gpu_unlock` is going to be called.
pub(crate) unsafe fn increase_gpu_lock(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
write: bool,
destination_layout: ImageLayout,
) {
debug_assert!(!matches!(
destination_layout,
ImageLayout::Undefined | ImageLayout::Preinitialized
));
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
if write {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Image is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_writes, .. } => *gpu_writes += 1,
&mut CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads,
} => {
state.current_access = CurrentAccess::GpuExclusive {
gpu_reads,
gpu_writes: 1,
}
}
CurrentAccess::Shared { .. } => {
unreachable!("Image is being read by the CPU")
}
}
state.layout = destination_layout;
}
}
} else {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Image is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_reads, .. }
| CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads += 1,
}
state.layout = destination_layout;
}
}
}
}
/// Unlocks the resource previously acquired with `try_gpu_lock` or `increase_gpu_lock`.
///
/// # Safety
///
/// - Must only be called once per previous lock.
pub(crate) unsafe fn gpu_unlock(
&mut self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
write: bool,
) {
let iter = self
.subresources
.iter_ranges(aspects, mip_levels, array_layers);
if write {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Image is being written by the CPU")
}
&mut CurrentAccess::GpuExclusive {
gpu_reads,
gpu_writes: 1,
} => {
state.current_access = CurrentAccess::Shared {
cpu_reads: 0,
gpu_reads,
}
}
CurrentAccess::GpuExclusive { gpu_writes, .. } => *gpu_writes -= 1,
CurrentAccess::Shared { .. } => {
unreachable!("Image is not being written by the GPU")
}
}
}
}
} else {
for range in iter {
self.ranges.split_at(&range.start);
self.ranges.split_at(&range.end);
for (_range, state) in self.ranges.range_mut(&range) {
match &mut state.current_access {
CurrentAccess::CpuExclusive => {
unreachable!("Buffer is being written by the CPU")
}
CurrentAccess::GpuExclusive { gpu_reads, .. } => *gpu_reads -= 1,
CurrentAccess::Shared { gpu_reads, .. } => *gpu_reads -= 1,
}
}
}
}
}
}
/// The current state of a specific subresource range in an image.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct ImageRangeState {
current_access: CurrentAccess,
layout: ImageLayout,
}
/// Helper type for the subresources of an image.
///
/// In ranges, the subresources are "flattened" to `DeviceSize`, where each index in the range
/// is a single array layer. The layers are arranged hierarchically: aspects at the top level,
/// with the mip levels in that aspect, and the array layers in that mip level.
#[derive(Debug)]
struct Subresources {
aspects: ImageAspects,
aspect_list: SmallVec<[ImageAspect; 4]>,
aspect_size: DeviceSize,
mip_levels: u32,
mip_level_size: DeviceSize,
array_layers: u32,
}
impl Subresources {
#[inline]
fn new(aspects: ImageAspects, mip_levels: u32, array_layers: u32) -> Self {
let mip_level_size = array_layers as DeviceSize;
let aspect_size = mip_level_size * mip_levels as DeviceSize;
let aspect_list: SmallVec<[ImageAspect; 4]> = aspects.iter().collect();
Self {
aspects,
aspect_list,
aspect_size,
mip_levels,
mip_level_size,
array_layers,
}
}
/// Returns a range representing all subresources of the image.
#[inline]
fn range(&self) -> Range<DeviceSize> {
0..self.aspect_list.len() as DeviceSize * self.aspect_size
}
fn iter_ranges(
&self,
aspects: ImageAspects,
mip_levels: Range<u32>,
array_layers: Range<u32>,
) -> SubresourceRangeIterator {
assert!(self.aspects.contains(&aspects));
assert!(!mip_levels.is_empty());
assert!(mip_levels.end <= self.mip_levels);
assert!(!array_layers.is_empty());
assert!(array_layers.end <= self.array_layers);
let next_fn = if array_layers.start != 0 || array_layers.end != self.array_layers {
SubresourceRangeIterator::next_some_layers
} else if mip_levels.start != 0 || mip_levels.end != self.mip_levels {
SubresourceRangeIterator::next_some_levels_all_layers
} else {
SubresourceRangeIterator::next_all_levels_all_layers
};
let mut aspect_nums = aspects
.iter()
.map(|aspect| self.aspect_list.iter().position(|&a| a == aspect).unwrap())
.collect::<SmallVec<[usize; 4]>>()
.into_iter()
.peekable();
assert!(aspect_nums.len() != 0);
let current_aspect_num = aspect_nums.next();
let current_mip_level = mip_levels.start;
SubresourceRangeIterator {
subresources: self,
next_fn,
aspect_nums,
current_aspect_num,
mip_levels,
current_mip_level,
array_layers,
}
}
}
#[derive(Clone)]
struct SubresourceRangeIterator<'a> {
subresources: &'a Subresources,
next_fn: fn(&mut Self) -> Option<Range<DeviceSize>>,
aspect_nums: Peekable<smallvec::IntoIter<[usize; 4]>>,
current_aspect_num: Option<usize>,
mip_levels: Range<u32>,
current_mip_level: u32,
array_layers: Range<u32>,
}
impl<'a> SubresourceRangeIterator<'a> {
/// Used when the requested range contains only a subset of the array layers in the image.
/// The iterator returns one range for each mip level and aspect, each covering the range of
/// array layers of that mip level and aspect.
fn next_some_layers(&mut self) -> Option<Range<DeviceSize>> {
self.current_aspect_num.map(|aspect_num| {
let mip_level_offset = aspect_num as DeviceSize * self.subresources.aspect_size
+ self.current_mip_level as DeviceSize * self.subresources.mip_level_size;
self.current_mip_level += 1;
if self.current_mip_level >= self.mip_levels.end {
self.current_mip_level = self.mip_levels.start;
self.current_aspect_num = self.aspect_nums.next();
}
let start = mip_level_offset + self.array_layers.start as DeviceSize;
let end = mip_level_offset + self.array_layers.end as DeviceSize;
start..end
})
}
/// Used when the requested range contains all array layers in the image, but not all mip
/// levels. The iterator returns one range for each aspect, each covering all layers of the
/// range of mip levels of that aspect.
fn next_some_levels_all_layers(&mut self) -> Option<Range<DeviceSize>> {
self.current_aspect_num.map(|aspect_num| {
let aspect_offset = aspect_num as DeviceSize * self.subresources.aspect_size;
self.current_aspect_num = self.aspect_nums.next();
let start = aspect_offset
+ self.mip_levels.start as DeviceSize * self.subresources.mip_level_size;
let end = aspect_offset
+ self.mip_levels.end as DeviceSize * self.subresources.mip_level_size;
start..end
})
}
/// Used when the requested range contains all array layers and mip levels in the image.
/// The iterator returns one range for each series of adjacent aspect numbers, each covering
/// all mip levels and all layers of those aspects. If the range contains the whole image, then
/// exactly one range is returned since all aspect numbers will be adjacent.
fn next_all_levels_all_layers(&mut self) -> Option<Range<DeviceSize>> {
self.current_aspect_num.map(|aspect_num_start| {
self.current_aspect_num = self.aspect_nums.next();
let mut aspect_num_end = aspect_num_start + 1;
while self.current_aspect_num == Some(aspect_num_end) {
self.current_aspect_num = self.aspect_nums.next();
aspect_num_end += 1;
}
let start = aspect_num_start as DeviceSize * self.subresources.aspect_size;
let end = aspect_num_end as DeviceSize * self.subresources.aspect_size;
start..end
})
}
}
impl<'a> Iterator for SubresourceRangeIterator<'a> {
type Item = Range<DeviceSize>;
fn next(&mut self) -> Option<Self::Item> {
(self.next_fn)(self)
}
}
impl<'a> FusedIterator for SubresourceRangeIterator<'a> {}
#[cfg(test)]
mod tests {
use super::ImageCreationError;
use super::ImageState;
use super::ImageUsage;
use super::UnsafeImage;
use super::UnsafeImageCreateInfo;
use crate::format::Format;
use crate::image::ImageAspects;
use crate::image::ImageDimensions;
use crate::image::ImageLayout;
use crate::image::SampleCount;
#[test]
@ -1820,4 +2424,122 @@ mod tests {
_ => panic!(),
};
}
#[test]
fn subresource_range_iterator() {
// A fictitious set of aspects that no real image would actually ever have.
let image_state = ImageState::new(
ImageAspects {
color: true,
depth: true,
stencil: true,
plane0: true,
..ImageAspects::none()
},
6,
8,
ImageLayout::Undefined,
);
let mip = image_state.subresources.mip_level_size;
let asp = image_state.subresources.aspect_size;
assert_eq!(mip, 8);
assert_eq!(asp, 8 * 6);
// Whole image
let mut iter = image_state.subresources.iter_ranges(
ImageAspects {
color: true,
depth: true,
stencil: true,
plane0: true,
..ImageAspects::none()
},
0..6,
0..8,
);
assert_eq!(iter.next(), Some(0 * asp..4 * asp));
assert_eq!(iter.next(), None);
// Only some aspects
let mut iter = image_state.subresources.iter_ranges(
ImageAspects {
color: true,
depth: true,
stencil: false,
plane0: true,
..ImageAspects::none()
},
0..6,
0..8,
);
assert_eq!(iter.next(), Some(0 * asp..2 * asp));
assert_eq!(iter.next(), Some(3 * asp..4 * asp));
assert_eq!(iter.next(), None);
// Two aspects, and only some of the mip levels
let mut iter = image_state.subresources.iter_ranges(
ImageAspects {
color: false,
depth: true,
stencil: true,
plane0: false,
..ImageAspects::none()
},
2..4,
0..8,
);
assert_eq!(iter.next(), Some(1 * asp + 2 * mip..1 * asp + 4 * mip));
assert_eq!(iter.next(), Some(2 * asp + 2 * mip..2 * asp + 4 * mip));
assert_eq!(iter.next(), None);
// One aspect, one mip level, only some of the array layers
let mut iter = image_state.subresources.iter_ranges(
ImageAspects {
color: true,
depth: false,
stencil: false,
plane0: false,
..ImageAspects::none()
},
0..1,
2..4,
);
assert_eq!(
iter.next(),
Some(0 * asp + 0 * mip + 2..0 * asp + 0 * mip + 4)
);
assert_eq!(iter.next(), None);
// Two aspects, two mip levels, only some of the array layers
let mut iter = image_state.subresources.iter_ranges(
ImageAspects {
color: false,
depth: true,
stencil: true,
plane0: false,
..ImageAspects::none()
},
2..4,
6..8,
);
assert_eq!(
iter.next(),
Some(1 * asp + 2 * mip + 6..1 * asp + 2 * mip + 8)
);
assert_eq!(
iter.next(),
Some(1 * asp + 3 * mip + 6..1 * asp + 3 * mip + 8)
);
assert_eq!(
iter.next(),
Some(2 * asp + 2 * mip + 6..2 * asp + 2 * mip + 8)
);
assert_eq!(
iter.next(),
Some(2 * asp + 3 * mip + 6..2 * asp + 3 * mip + 8)
);
assert_eq!(iter.next(), None);
}
}

View File

@ -7,19 +7,15 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use crate::format::ClearValue;
use crate::format::Format;
use crate::format::FormatFeatures;
use crate::image::sys::UnsafeImage;
use crate::image::ImageDescriptorLayouts;
use crate::image::ImageDimensions;
use crate::image::ImageLayout;
use crate::image::SampleCount;
use crate::sync::AccessError;
use crate::SafeDeref;
use std::hash::Hash;
use std::hash::Hasher;
use std::sync::Arc;
use super::{sys::UnsafeImage, ImageDescriptorLayouts, ImageDimensions, ImageLayout, SampleCount};
use crate::{
format::{ClearValue, Format, FormatFeatures},
SafeDeref,
};
use std::{
hash::{Hash, Hasher},
sync::Arc,
};
/// Trait for types that represent the way a GPU can access an image.
pub unsafe trait ImageAccess: Send + Sync {
@ -140,61 +136,6 @@ pub unsafe trait ImageAccess: Send + Sync {
/// Returns the current array layer that is accessed by the gpu
fn current_array_layers_access(&self) -> std::ops::Range<u32>;
/// Locks the resource for usage on the GPU. Returns an error if the lock can't be acquired.
///
/// After this function returns `Ok`, you are authorized to use the image on the GPU. If the
/// GPU operation requires an exclusive access to the image (which includes image layout
/// transitions) then `exclusive_access` should be true.
///
/// The `expected_layout` is the layout we expect the image to be in when we lock it. If the
/// actual layout doesn't match this expected layout, then an error should be returned. If
/// `Undefined` is passed, that means that the caller doesn't care about the actual layout,
/// and that a layout mismatch shouldn't return an error.
///
/// This function exists to prevent the user from causing a data race by reading and writing
/// to the same resource at the same time.
///
/// If you call this function, you should call `unlock()` once the resource is no longer in use
/// by the GPU. The implementation is not expected to automatically perform any unlocking and
/// can rely on the fact that `unlock()` is going to be called.
fn try_gpu_lock(
&self,
exclusive_access: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError>;
/// Locks the resource for usage on the GPU. Supposes that the resource is already locked, and
/// simply increases the lock by one.
///
/// Must only be called after `try_gpu_lock()` succeeded.
///
/// If you call this function, you should call `unlock()` once the resource is no longer in use
/// by the GPU. The implementation is not expected to automatically perform any unlocking and
/// can rely on the fact that `unlock()` is going to be called.
unsafe fn increase_gpu_lock(&self);
/// Unlocks the resource previously acquired with `try_gpu_lock` or `increase_gpu_lock`.
///
/// If the GPU operation that we unlock from transitioned the image to another layout, then
/// it should be passed as parameter.
///
/// A layout transition requires exclusive access to the image, which means two things:
///
/// - The implementation can panic if it finds out that the layout is not the same as it
/// currently is and that it is not locked in exclusive mode.
/// - There shouldn't be any possible race between `unlock` and `try_gpu_lock`, since
/// `try_gpu_lock` should fail if the image is already locked in exclusive mode.
///
/// # Safety
///
/// - Must only be called once per previous lock.
/// - The transitioned layout must be supported by the image (eg. the layout shouldn't be
/// `ColorAttachmentOptimal` if the image wasn't created with the `color_attachment` usage).
/// - The transitioned layout must not be `Undefined`.
///
unsafe fn unlock(&self, transitioned_layout: Option<ImageLayout>);
}
/// Inner information about an image.
@ -273,27 +214,6 @@ where
self.image.conflict_key()
}
#[inline]
fn try_gpu_lock(
&self,
exclusive_access: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
self.image
.try_gpu_lock(exclusive_access, uninitialized_safe, expected_layout)
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
self.image.increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self, new_layout: Option<ImageLayout>) {
self.image.unlock(new_layout)
}
fn current_mip_levels_access(&self) -> std::ops::Range<u32> {
self.image.current_mip_levels_access()
}
@ -367,26 +287,6 @@ where
(**self).conflict_key()
}
#[inline]
fn try_gpu_lock(
&self,
exclusive_access: bool,
uninitialized_safe: bool,
expected_layout: ImageLayout,
) -> Result<(), AccessError> {
(**self).try_gpu_lock(exclusive_access, uninitialized_safe, expected_layout)
}
#[inline]
unsafe fn increase_gpu_lock(&self) {
(**self).increase_gpu_lock()
}
#[inline]
unsafe fn unlock(&self, transitioned_layout: Option<ImageLayout>) {
(**self).unlock(transitioned_layout)
}
#[inline]
unsafe fn layout_initialized(&self) {
(**self).layout_initialized();

View File

@ -7,8 +7,7 @@
// notice may not be copied, modified, or distributed except
// according to those terms.
use crate::buffer::BufferAccess;
use crate::buffer::BufferAccessObject;
use crate::buffer::{BufferAccess, BufferAccessObject};
use std::sync::Arc;
/// A collection of vertex buffers.
@ -88,8 +87,6 @@ mod tests {
use crate::buffer::BufferInner;
use crate::device::Device;
use crate::device::DeviceOwned;
use crate::device::Queue;
use crate::sync::AccessError;
use crate::DeviceSize;
use std::sync::Arc;
@ -108,18 +105,6 @@ mod tests {
fn conflict_key(&self) -> (u64, u64) {
unimplemented!()
}
fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> {
unimplemented!()
}
unsafe fn increase_gpu_lock(&self) {
unimplemented!()
}
unsafe fn unlock(&self) {
unimplemented!()
}
}
unsafe impl DeviceOwned for DummyBufferA {
@ -146,18 +131,6 @@ mod tests {
fn conflict_key(&self) -> (u64, u64) {
unimplemented!()
}
fn try_gpu_lock(&self, _: bool, _: &Queue) -> Result<(), AccessError> {
unimplemented!()
}
unsafe fn increase_gpu_lock(&self) {
unimplemented!()
}
unsafe fn unlock(&self) {
unimplemented!()
}
}
unsafe impl DeviceOwned for DummyBufferB {

View File

@ -165,3 +165,19 @@ where
/// The resource is used in multiple queue families. Can be slower than `Exclusive`.
Concurrent(I),
}
/// How the memory of a resource is currently being accessed.
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum CurrentAccess {
/// The resource is currently being accessed exclusively by the CPU.
CpuExclusive,
/// The resource is currently being accessed exclusively by the GPU.
/// The GPU can have multiple exclusive accesses, if they are separated by synchronization.
///
/// `gpu_writes` must not be 0. If it's decremented to 0, switch to `Shared`.
GpuExclusive { gpu_reads: usize, gpu_writes: usize },
/// The resource is not currently being accessed, or is being accessed for reading only.
Shared { cpu_reads: usize, gpu_reads: usize },
}