mirror of
https://github.com/vulkano-rs/vulkano.git
synced 2024-11-25 16:25:31 +00:00
CpuBufferPool
revamp (#2076)
* `CpuBufferPool` revamp * Fix oopsie * Fix docs
This commit is contained in:
parent
c5c6bf0f09
commit
fe01ddd5e3
@ -7,17 +7,7 @@
|
||||
// notice may not be copied, modified, or distributed except
|
||||
// according to those terms.
|
||||
|
||||
// BufferPool Example
|
||||
//
|
||||
// Modified triangle example to show BufferPool
|
||||
// Using a pool allows multiple buffers to be "in-flight" simultaneously
|
||||
// and is suited to highly dynamic, similar sized chunks of data
|
||||
//
|
||||
// NOTE:(jdnewman85) ATM (5/4/2020) CpuBufferPool.next() and .chunk() have identical documentation
|
||||
// I was unable to get next() to work. The compiler complained that the resulting buffer
|
||||
// didn't implement VertexSource. Similar issues have been reported.
|
||||
// See: https://github.com/vulkano-rs/vulkano/issues/1221
|
||||
// Finally, I have not profiled CpuBufferPool against CpuAccessibleBuffer
|
||||
// Modified triangle example to show `CpuBufferAllocator`.
|
||||
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use std::{
|
||||
@ -25,7 +15,10 @@ use std::{
|
||||
time::{SystemTime, UNIX_EPOCH},
|
||||
};
|
||||
use vulkano::{
|
||||
buffer::CpuBufferPool,
|
||||
buffer::{
|
||||
allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo},
|
||||
BufferUsage,
|
||||
},
|
||||
command_buffer::{
|
||||
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
|
||||
RenderPassBeginInfo, SubpassContents,
|
||||
@ -171,8 +164,16 @@ fn main() {
|
||||
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
|
||||
|
||||
// Vertex Buffer Pool
|
||||
let buffer_pool: CpuBufferPool<Vertex> = CpuBufferPool::vertex_buffer(memory_allocator);
|
||||
// Using a buffer allocator allows multiple buffers to be "in-flight" simultaneously and is
|
||||
// suited to highly dynamic data like vertex, index and uniform buffers.
|
||||
let buffer_allocator = CpuBufferAllocator::new(
|
||||
memory_allocator,
|
||||
CpuBufferAllocatorCreateInfo {
|
||||
// We want to use the allocated subbuffers as vertex buffers.
|
||||
buffer_usage: BufferUsage::VERTEX_BUFFER,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
mod vs {
|
||||
vulkano_shaders::shader! {
|
||||
@ -335,8 +336,8 @@ fn main() {
|
||||
];
|
||||
let num_vertices = data.len() as u32;
|
||||
|
||||
// Allocate a new chunk from buffer_pool
|
||||
let buffer = buffer_pool.from_iter(data.to_vec()).unwrap();
|
||||
// Allocate a new subbuffer using the buffer allocator.
|
||||
let buffer = buffer_allocator.from_iter(data.iter().copied()).unwrap();
|
||||
let mut builder = AutoCommandBufferBuilder::primary(
|
||||
&command_buffer_allocator,
|
||||
queue.queue_family_index(),
|
@ -27,7 +27,10 @@
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
use std::sync::Arc;
|
||||
use vulkano::{
|
||||
buffer::{BufferUsage, CpuBufferPool},
|
||||
buffer::{
|
||||
allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo},
|
||||
BufferUsage,
|
||||
},
|
||||
command_buffer::{
|
||||
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
|
||||
DrawIndirectCommand, RenderPassBeginInfo, SubpassContents,
|
||||
@ -42,7 +45,7 @@ use vulkano::{
|
||||
image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage},
|
||||
impl_vertex,
|
||||
instance::{Instance, InstanceCreateInfo},
|
||||
memory::allocator::{MemoryUsage, StandardMemoryAllocator},
|
||||
memory::allocator::StandardMemoryAllocator,
|
||||
pipeline::{
|
||||
graphics::{
|
||||
input_assembly::InputAssemblyState,
|
||||
@ -256,17 +259,21 @@ fn main() {
|
||||
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
|
||||
|
||||
// Each frame we generate a new set of vertices and each frame we need a new DrawIndirectCommand struct to
|
||||
// set the number of vertices to draw
|
||||
let indirect_args_pool: CpuBufferPool<DrawIndirectCommand> = CpuBufferPool::new(
|
||||
// Each frame we generate a new set of vertices and each frame we need a new
|
||||
// DrawIndirectCommand struct to set the number of vertices to draw.
|
||||
let indirect_args_pool = CpuBufferAllocator::new(
|
||||
memory_allocator.clone(),
|
||||
BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER,
|
||||
MemoryUsage::Upload,
|
||||
CpuBufferAllocatorCreateInfo {
|
||||
buffer_usage: BufferUsage::INDIRECT_BUFFER | BufferUsage::STORAGE_BUFFER,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
let vertex_pool: CpuBufferPool<Vertex> = CpuBufferPool::new(
|
||||
let vertex_pool = CpuBufferAllocator::new(
|
||||
memory_allocator,
|
||||
BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER,
|
||||
MemoryUsage::Upload,
|
||||
CpuBufferAllocatorCreateInfo {
|
||||
buffer_usage: BufferUsage::STORAGE_BUFFER | BufferUsage::VERTEX_BUFFER,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
let compute_pipeline = ComputePipeline::new(
|
||||
|
@ -11,7 +11,10 @@ use cgmath::{Matrix3, Matrix4, Point3, Rad, Vector3};
|
||||
use examples::{Normal, Vertex, INDICES, NORMALS, VERTICES};
|
||||
use std::{sync::Arc, time::Instant};
|
||||
use vulkano::{
|
||||
buffer::{BufferUsage, CpuAccessibleBuffer, CpuBufferPool, TypedBufferAccess},
|
||||
buffer::{
|
||||
allocator::{CpuBufferAllocator, CpuBufferAllocatorCreateInfo},
|
||||
BufferUsage, CpuAccessibleBuffer, TypedBufferAccess,
|
||||
},
|
||||
command_buffer::{
|
||||
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
|
||||
RenderPassBeginInfo, SubpassContents,
|
||||
@ -26,7 +29,7 @@ use vulkano::{
|
||||
format::Format,
|
||||
image::{view::ImageView, AttachmentImage, ImageAccess, ImageUsage, SwapchainImage},
|
||||
instance::{Instance, InstanceCreateInfo},
|
||||
memory::allocator::{MemoryUsage, StandardMemoryAllocator},
|
||||
memory::allocator::StandardMemoryAllocator,
|
||||
pipeline::{
|
||||
graphics::{
|
||||
depth_stencil::DepthStencilState,
|
||||
@ -180,10 +183,12 @@ fn main() {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let uniform_buffer = CpuBufferPool::<vs::ty::Data>::new(
|
||||
let uniform_buffer = CpuBufferAllocator::new(
|
||||
memory_allocator.clone(),
|
||||
BufferUsage::UNIFORM_BUFFER,
|
||||
MemoryUsage::Upload,
|
||||
CpuBufferAllocatorCreateInfo {
|
||||
buffer_usage: BufferUsage::UNIFORM_BUFFER,
|
||||
..Default::default()
|
||||
},
|
||||
);
|
||||
|
||||
let vs = vs::load(device.clone()).unwrap();
|
||||
|
566
vulkano/src/buffer/allocator.rs
Normal file
566
vulkano/src/buffer/allocator.rs
Normal file
@ -0,0 +1,566 @@
|
||||
// Copyright (c) 2017 The vulkano developers
|
||||
// Licensed under the Apache License, Version 2.0
|
||||
// <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
// at your option. All files in the project carrying such
|
||||
// notice may not be copied, modified, or distributed except
|
||||
// according to those terms.
|
||||
|
||||
//! Efficiently suballocates buffers into smaller subbuffers.
|
||||
|
||||
use super::{
|
||||
sys::{Buffer, BufferCreateInfo, RawBuffer},
|
||||
BufferAccess, BufferAccessObject, BufferContents, BufferError, BufferInner, BufferUsage,
|
||||
TypedBufferAccess,
|
||||
};
|
||||
use crate::{
|
||||
buffer::sys::BufferMemory,
|
||||
device::{Device, DeviceOwned},
|
||||
memory::{
|
||||
allocator::{
|
||||
align_up, AllocationCreateInfo, AllocationCreationError, AllocationType,
|
||||
MemoryAllocatePreference, MemoryAllocator, MemoryUsage, StandardMemoryAllocator,
|
||||
},
|
||||
DedicatedAllocation,
|
||||
},
|
||||
DeviceSize,
|
||||
};
|
||||
use crossbeam_queue::ArrayQueue;
|
||||
use std::{
|
||||
cell::UnsafeCell,
|
||||
marker::PhantomData,
|
||||
mem::{align_of, size_of, ManuallyDrop},
|
||||
num::NonZeroU64,
|
||||
ptr,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
const MAX_ARENAS: usize = 32;
|
||||
|
||||
// TODO: Add `CpuSubbuffer::read` to read the content of a subbuffer.
|
||||
// But that's hard to do because we must prevent `increase_gpu_lock` from working while a
|
||||
// a buffer is locked.
|
||||
|
||||
/// Efficiently suballocates buffers into smaller subbuffers.
|
||||
///
|
||||
/// This allocator is especially suitable when you want to upload or download some data regularly
|
||||
/// (for example, at each frame for a video game).
|
||||
///
|
||||
/// # Algorithm
|
||||
///
|
||||
/// The allocator keeps a pool of *arenas*. An arena is simply a buffer in which *arena allocation*
|
||||
/// takes place, also known as *bump allocation* or *linear allocation*. Every time you allocate,
|
||||
/// one of these arenas is suballocated. If there is no arena that is currently available, one will
|
||||
/// be allocated. After all subbuffers allocated from an arena are dropped, the arena is
|
||||
/// automatically returned to the arena pool. If you try to allocate a subbuffer larger than the
|
||||
/// current size of an arena, the arenas are automatically resized.
|
||||
///
|
||||
/// No memory is allocated when the allocator is created, be it on the Vulkan or Rust side. That
|
||||
/// only happens once you allocate a subbuffer.
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// Ideally, one arena should be able to fit all data you need to update per frame, so that each
|
||||
/// arena is submitted and freed once per frame. This way, the arena pool would also contain as
|
||||
/// many arenas as there are frames in flight on the thread. Otherwise, if your arenas are not able
|
||||
/// to fit everything each frame, what will likely happen is that each subbuffer will be
|
||||
/// allocated from an individual arena. This can impact efficiency both in terms of memory usage
|
||||
/// (because each arena has the same size, even if some of the subbuffers are way smaller) as well
|
||||
/// as performance, because the data could end up more physically separated in memory, which means
|
||||
/// the GPU would need to hop from place to place a lot more during a frame.
|
||||
///
|
||||
/// Ideally the result is something roughly like this:
|
||||
///
|
||||
/// ```plain
|
||||
/// +---------------------------------------------------------------------------------------------+
|
||||
/// | Memory Block |
|
||||
/// |-----+------+-----------------------+---------+-----------------------+------+---------+-----|
|
||||
/// | | | Frame 1 Arena | | Frame 2 Arena | | | |
|
||||
/// | ••• | Tex. |-------+-------+-------| Attach. |-------+-------+-------| Tex. | Attach. | ••• |
|
||||
/// | | | Vert. | Indx. | Unif. | | Vert. | Indx. | Unif. | | | |
|
||||
/// +-----+------+-------+-------+-------+---------+-------+-------+-------+------+---------+-----+
|
||||
/// ```
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use vulkano::buffer::allocator::CpuBufferAllocator;
|
||||
/// use vulkano::command_buffer::{
|
||||
/// AutoCommandBufferBuilder, CommandBufferUsage, PrimaryCommandBufferAbstract,
|
||||
/// };
|
||||
/// use vulkano::sync::GpuFuture;
|
||||
/// # let queue: std::sync::Arc<vulkano::device::Queue> = return;
|
||||
/// # let memory_allocator: std::sync::Arc<vulkano::memory::allocator::StandardMemoryAllocator> = return;
|
||||
/// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return;
|
||||
///
|
||||
/// // Create the buffer allocator.
|
||||
/// let buffer_allocator = CpuBufferAllocator::new(memory_allocator.clone(), Default::default());
|
||||
///
|
||||
/// for n in 0..25u32 {
|
||||
/// // Each loop allocates a new subbuffer and stores `data` in it.
|
||||
/// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0];
|
||||
/// let subbuffer = buffer_allocator.from_data(data).unwrap();
|
||||
///
|
||||
/// // You can then use `subbuffer` as if it was an entirely separate buffer.
|
||||
/// AutoCommandBufferBuilder::primary(
|
||||
/// &command_buffer_allocator,
|
||||
/// queue.queue_family_index(),
|
||||
/// CommandBufferUsage::OneTimeSubmit,
|
||||
/// )
|
||||
/// .unwrap()
|
||||
/// // For the sake of the example we just call `update_buffer` on the buffer, even though
|
||||
/// // it is pointless to do that.
|
||||
/// .update_buffer(&[0.2, 0.3, 0.4, 0.5], subbuffer.clone(), 0)
|
||||
/// .unwrap()
|
||||
/// .build().unwrap()
|
||||
/// .execute(queue.clone())
|
||||
/// .unwrap()
|
||||
/// .then_signal_fence_and_flush()
|
||||
/// .unwrap();
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct CpuBufferAllocator<A = Arc<StandardMemoryAllocator>> {
|
||||
state: UnsafeCell<CpuBufferAllocatorState<A>>,
|
||||
}
|
||||
|
||||
impl<A> CpuBufferAllocator<A>
|
||||
where
|
||||
A: MemoryAllocator,
|
||||
{
|
||||
/// Creates a new `CpuBufferAllocator`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `create_info.memory_usage` is [`MemoryUsage::GpuOnly`].
|
||||
pub fn new(memory_allocator: A, create_info: CpuBufferAllocatorCreateInfo) -> Self {
|
||||
let CpuBufferAllocatorCreateInfo {
|
||||
arena_size,
|
||||
buffer_usage,
|
||||
memory_usage,
|
||||
_ne: _,
|
||||
} = create_info;
|
||||
|
||||
assert!(memory_usage != MemoryUsage::GpuOnly);
|
||||
|
||||
let properties = memory_allocator.device().physical_device().properties();
|
||||
let buffer_alignment = [
|
||||
buffer_usage
|
||||
.contains(BufferUsage::UNIFORM_BUFFER)
|
||||
.then_some(properties.min_uniform_buffer_offset_alignment),
|
||||
buffer_usage
|
||||
.contains(BufferUsage::STORAGE_BUFFER)
|
||||
.then_some(properties.min_storage_buffer_offset_alignment),
|
||||
]
|
||||
.into_iter()
|
||||
.flatten()
|
||||
.max()
|
||||
.unwrap_or(1);
|
||||
|
||||
CpuBufferAllocator {
|
||||
state: UnsafeCell::new(CpuBufferAllocatorState {
|
||||
memory_allocator,
|
||||
buffer_usage,
|
||||
memory_usage,
|
||||
buffer_alignment,
|
||||
arena_size,
|
||||
arena: None,
|
||||
free_start: 0,
|
||||
reserve: None,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the current size of the arenas.
|
||||
pub fn arena_size(&self) -> DeviceSize {
|
||||
unsafe { &*self.state.get() }.arena_size
|
||||
}
|
||||
|
||||
/// Sets the arena size to the provided `size`.
|
||||
///
|
||||
/// The next time you allocate a subbuffer, a new arena will be allocated with the new size,
|
||||
/// and all subsequently allocated arenas will also share the new size.
|
||||
pub fn set_arena_size(&self, size: DeviceSize) {
|
||||
let state = unsafe { &mut *self.state.get() };
|
||||
state.arena_size = size;
|
||||
state.arena = None;
|
||||
state.reserve = None;
|
||||
}
|
||||
|
||||
/// Ensures that the size of the current arena is at least `size`.
|
||||
///
|
||||
/// If `size` is greater than the current arena size, then a new arena will be allocated with
|
||||
/// the new size, and all subsequently allocated arenas will also share the new size. Otherwise
|
||||
/// this has no effect.
|
||||
pub fn reserve(&self, size: DeviceSize) -> Result<(), AllocationCreationError> {
|
||||
if size > self.arena_size() {
|
||||
let state = unsafe { &mut *self.state.get() };
|
||||
state.arena_size = size;
|
||||
state.reserve = None;
|
||||
state.arena = Some(state.next_arena()?);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Allocates a subbuffer and writes `data` in it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
/// - Panics if `T` has an alignment greater than `64`.
|
||||
pub fn from_data<T>(&self, data: T) -> Result<Arc<CpuSubbuffer<T>>, AllocationCreationError>
|
||||
where
|
||||
T: BufferContents,
|
||||
{
|
||||
assert!(size_of::<T>() > 0);
|
||||
assert!(align_of::<T>() <= 64);
|
||||
|
||||
let state = unsafe { &mut *self.state.get() };
|
||||
|
||||
let size = size_of::<T>() as DeviceSize;
|
||||
let offset = state.allocate(size, align_of::<T>() as DeviceSize)?;
|
||||
let arena = state.arena.as_ref().unwrap().clone();
|
||||
let allocation = match arena.inner.memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => unreachable!(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
let bytes = allocation.write(offset..offset + size).unwrap();
|
||||
let mapping = T::from_bytes_mut(bytes).unwrap();
|
||||
|
||||
ptr::write(mapping, data);
|
||||
|
||||
if let Some(atom_size) = allocation.atom_size() {
|
||||
let size = align_up(size, atom_size.get());
|
||||
let end = DeviceSize::min(offset + size, allocation.size());
|
||||
allocation.flush_range(offset..end).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Arc::new(CpuSubbuffer {
|
||||
id: CpuSubbuffer::<T>::next_id(),
|
||||
offset,
|
||||
size,
|
||||
arena,
|
||||
_marker: PhantomData,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Allocates a subbuffer and writes all elements of `iter` in it.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
/// - Panics if `T` has an alignment greater than `64`.
|
||||
pub fn from_iter<T, I>(
|
||||
&self,
|
||||
iter: I,
|
||||
) -> Result<Arc<CpuSubbuffer<[T]>>, AllocationCreationError>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
I: IntoIterator<Item = T>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
assert!(size_of::<T>() > 0);
|
||||
assert!(align_of::<T>() <= 64);
|
||||
|
||||
let iter = iter.into_iter();
|
||||
let state = unsafe { &mut *self.state.get() };
|
||||
|
||||
let size = (size_of::<T>() * iter.len()) as DeviceSize;
|
||||
let offset = state.allocate(size, align_of::<T>() as DeviceSize)?;
|
||||
let arena = state.arena.as_ref().unwrap().clone();
|
||||
let allocation = match arena.inner.memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => unreachable!(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
let bytes = allocation.write(offset..offset + size).unwrap();
|
||||
let mapping = <[T]>::from_bytes_mut(bytes).unwrap();
|
||||
|
||||
for (o, i) in mapping.iter_mut().zip(iter) {
|
||||
ptr::write(o, i);
|
||||
}
|
||||
|
||||
if let Some(atom_size) = allocation.atom_size() {
|
||||
let size = align_up(size, atom_size.get());
|
||||
let end = DeviceSize::min(offset + size, allocation.size());
|
||||
allocation.flush_range(offset..end).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
Ok(Arc::new(CpuSubbuffer {
|
||||
id: CpuSubbuffer::<T>::next_id(),
|
||||
offset,
|
||||
size,
|
||||
arena,
|
||||
_marker: PhantomData,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct CpuBufferAllocatorState<A> {
|
||||
memory_allocator: A,
|
||||
buffer_usage: BufferUsage,
|
||||
memory_usage: MemoryUsage,
|
||||
// The alignment required for the subbuffers.
|
||||
buffer_alignment: DeviceSize,
|
||||
// The current size of the arenas.
|
||||
arena_size: DeviceSize,
|
||||
// Contains the buffer that is currently being suballocated.
|
||||
arena: Option<Arc<Arena>>,
|
||||
// Offset pointing to the start of free memory within the arena.
|
||||
free_start: DeviceSize,
|
||||
// When an `Arena` is dropped, it returns itself here for reuse.
|
||||
reserve: Option<Arc<ArrayQueue<Arc<Buffer>>>>,
|
||||
}
|
||||
|
||||
impl<A> CpuBufferAllocatorState<A>
|
||||
where
|
||||
A: MemoryAllocator,
|
||||
{
|
||||
fn allocate(
|
||||
&mut self,
|
||||
size: DeviceSize,
|
||||
alignment: DeviceSize,
|
||||
) -> Result<DeviceSize, AllocationCreationError> {
|
||||
let alignment = DeviceSize::max(alignment, self.buffer_alignment);
|
||||
|
||||
loop {
|
||||
if self.arena.is_none() {
|
||||
// If the requested size is larger than the arenas, we need to resize them.
|
||||
if self.arena_size < size {
|
||||
self.arena_size = size * 2;
|
||||
// We need to drop our reference to the old pool to make sure the arenas are
|
||||
// dropped once no longer in use, and replace it with a new pool that will not
|
||||
// be polluted with the outdates arenas.
|
||||
self.reserve = None;
|
||||
}
|
||||
self.arena = Some(self.next_arena()?);
|
||||
self.free_start = 0;
|
||||
}
|
||||
|
||||
let arena = self.arena.as_ref().unwrap();
|
||||
let allocation = match arena.inner.memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => unreachable!(),
|
||||
};
|
||||
let arena_offset = allocation.offset();
|
||||
let atom_size = allocation.atom_size().map(NonZeroU64::get).unwrap_or(1);
|
||||
|
||||
let alignment = DeviceSize::max(alignment, atom_size);
|
||||
let offset = align_up(arena_offset + self.free_start, alignment);
|
||||
|
||||
if offset + size <= arena_offset + self.arena_size {
|
||||
let offset = offset - arena_offset;
|
||||
self.free_start = offset + size;
|
||||
|
||||
return Ok(offset);
|
||||
}
|
||||
|
||||
// We reached the end of the arena, grab the next one.
|
||||
self.arena = None;
|
||||
}
|
||||
}
|
||||
|
||||
fn next_arena(&mut self) -> Result<Arc<Arena>, AllocationCreationError> {
|
||||
if self.reserve.is_none() {
|
||||
self.reserve = Some(Arc::new(ArrayQueue::new(MAX_ARENAS)));
|
||||
}
|
||||
let reserve = self.reserve.as_ref().unwrap();
|
||||
|
||||
reserve
|
||||
.pop()
|
||||
.map(Ok)
|
||||
.unwrap_or_else(|| self.create_arena())
|
||||
.map(|inner| {
|
||||
Arc::new(Arena {
|
||||
inner: ManuallyDrop::new(inner),
|
||||
reserve: reserve.clone(),
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
fn create_arena(&self) -> Result<Arc<Buffer>, AllocationCreationError> {
|
||||
let raw_buffer = RawBuffer::new(
|
||||
self.memory_allocator.device().clone(),
|
||||
BufferCreateInfo {
|
||||
size: self.arena_size,
|
||||
usage: self.buffer_usage,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.map_err(|err| match err {
|
||||
BufferError::AllocError(err) => err,
|
||||
// We don't use sparse-binding, therefore the other errors can't happen.
|
||||
_ => unreachable!(),
|
||||
})?;
|
||||
let mut requirements = *raw_buffer.memory_requirements();
|
||||
requirements.alignment = DeviceSize::max(requirements.alignment, self.buffer_alignment);
|
||||
let create_info = AllocationCreateInfo {
|
||||
requirements,
|
||||
allocation_type: AllocationType::Linear,
|
||||
usage: self.memory_usage,
|
||||
allocate_preference: MemoryAllocatePreference::Unknown,
|
||||
dedicated_allocation: Some(DedicatedAllocation::Buffer(&raw_buffer)),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
match unsafe { self.memory_allocator.allocate_unchecked(create_info) } {
|
||||
Ok(mut alloc) => {
|
||||
debug_assert!(alloc.offset() % requirements.alignment == 0);
|
||||
debug_assert!(alloc.size() == requirements.size);
|
||||
alloc.shrink(self.arena_size);
|
||||
let inner = Arc::new(
|
||||
unsafe { raw_buffer.bind_memory_unchecked(alloc) }
|
||||
.map_err(|(err, _, _)| err)?,
|
||||
);
|
||||
|
||||
Ok(inner)
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct Arena {
|
||||
inner: ManuallyDrop<Arc<Buffer>>,
|
||||
// Where we return the arena in our `Drop` impl.
|
||||
reserve: Arc<ArrayQueue<Arc<Buffer>>>,
|
||||
}
|
||||
|
||||
impl Drop for Arena {
|
||||
fn drop(&mut self) {
|
||||
let inner = unsafe { ManuallyDrop::take(&mut self.inner) };
|
||||
let _ = self.reserve.push(inner);
|
||||
}
|
||||
}
|
||||
|
||||
/// Parameters to create a new [`CpuBufferAllocator`].
|
||||
pub struct CpuBufferAllocatorCreateInfo {
|
||||
/// Initial size of an arena in bytes.
|
||||
///
|
||||
/// Ideally this should fit all the data you need to update per frame. So for example, if you
|
||||
/// need to allocate buffers of size 1K, 2K and 5K each frame, then this should be 8K. If your
|
||||
/// data is dynamically-sized then try to make an educated guess or simply leave the default.
|
||||
///
|
||||
/// The default value is `0`.
|
||||
pub arena_size: DeviceSize,
|
||||
|
||||
/// The buffer usage that all allocated buffers should have.
|
||||
///
|
||||
/// The default value is [`BufferUsage::TRANSFER_SRC`].
|
||||
pub buffer_usage: BufferUsage,
|
||||
|
||||
/// The memory usage that all buffers should be allocated with.
|
||||
///
|
||||
/// Must not be [`MemoryUsage::GpuOnly`].
|
||||
///
|
||||
/// The default value is [`MemoryUsage::Upload`].
|
||||
pub memory_usage: MemoryUsage,
|
||||
|
||||
pub _ne: crate::NonExhaustive,
|
||||
}
|
||||
|
||||
impl Default for CpuBufferAllocatorCreateInfo {
|
||||
#[inline]
|
||||
fn default() -> Self {
|
||||
CpuBufferAllocatorCreateInfo {
|
||||
arena_size: 0,
|
||||
buffer_usage: BufferUsage::TRANSFER_SRC,
|
||||
memory_usage: MemoryUsage::Upload,
|
||||
_ne: crate::NonExhaustive(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A subbuffer allocated using a [`CpuBufferAllocator`].
|
||||
#[derive(Debug)]
|
||||
pub struct CpuSubbuffer<T: ?Sized> {
|
||||
id: NonZeroU64,
|
||||
// Offset within the arena.
|
||||
offset: DeviceSize,
|
||||
// Size of the subbuffer.
|
||||
size: DeviceSize,
|
||||
// We need to keep a reference to the arena so it won't be reset.
|
||||
arena: Arc<Arena>,
|
||||
_marker: PhantomData<Box<T>>,
|
||||
}
|
||||
|
||||
unsafe impl<T> BufferAccess for CpuSubbuffer<T>
|
||||
where
|
||||
T: BufferContents + ?Sized,
|
||||
{
|
||||
fn inner(&self) -> BufferInner<'_> {
|
||||
BufferInner {
|
||||
buffer: &self.arena.inner,
|
||||
offset: self.offset,
|
||||
}
|
||||
}
|
||||
|
||||
fn size(&self) -> DeviceSize {
|
||||
self.size
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> BufferAccessObject for Arc<CpuSubbuffer<T>>
|
||||
where
|
||||
T: BufferContents + ?Sized,
|
||||
{
|
||||
fn as_buffer_access_object(&self) -> Arc<dyn BufferAccess> {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T> TypedBufferAccess for CpuSubbuffer<T>
|
||||
where
|
||||
T: BufferContents + ?Sized,
|
||||
{
|
||||
type Content = T;
|
||||
}
|
||||
|
||||
unsafe impl<T> DeviceOwned for CpuSubbuffer<T>
|
||||
where
|
||||
T: ?Sized,
|
||||
{
|
||||
fn device(&self) -> &Arc<Device> {
|
||||
self.arena.inner.device()
|
||||
}
|
||||
}
|
||||
|
||||
crate::impl_id_counter!(CpuSubbuffer<T>);
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn reserve() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = StandardMemoryAllocator::new_default(device);
|
||||
|
||||
let buffer_allocator = CpuBufferAllocator::new(memory_allocator, Default::default());
|
||||
assert_eq!(buffer_allocator.arena_size(), 0);
|
||||
|
||||
buffer_allocator.reserve(83).unwrap();
|
||||
assert_eq!(buffer_allocator.arena_size(), 83);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capacity_increase() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = StandardMemoryAllocator::new_default(device);
|
||||
|
||||
let buffer_allocator = CpuBufferAllocator::new(memory_allocator, Default::default());
|
||||
assert_eq!(buffer_allocator.arena_size(), 0);
|
||||
|
||||
buffer_allocator.from_data(12u32).unwrap();
|
||||
assert_eq!(buffer_allocator.arena_size(), 8);
|
||||
}
|
||||
}
|
@ -1,931 +0,0 @@
|
||||
// Copyright (c) 2017 The vulkano developers
|
||||
// Licensed under the Apache License, Version 2.0
|
||||
// <LICENSE-APACHE or
|
||||
// https://www.apache.org/licenses/LICENSE-2.0> or the MIT
|
||||
// license <LICENSE-MIT or https://opensource.org/licenses/MIT>,
|
||||
// at your option. All files in the project carrying such
|
||||
// notice may not be copied, modified, or distributed except
|
||||
// according to those terms.
|
||||
|
||||
use super::{
|
||||
sys::{Buffer, BufferCreateInfo, RawBuffer},
|
||||
BufferAccess, BufferAccessObject, BufferContents, BufferError, BufferInner, BufferUsage,
|
||||
TypedBufferAccess,
|
||||
};
|
||||
use crate::{
|
||||
buffer::sys::BufferMemory,
|
||||
device::{Device, DeviceOwned},
|
||||
memory::{
|
||||
allocator::{
|
||||
AllocationCreateInfo, AllocationCreationError, AllocationType,
|
||||
MemoryAllocatePreference, MemoryAllocator, MemoryUsage, StandardMemoryAllocator,
|
||||
},
|
||||
DedicatedAllocation,
|
||||
},
|
||||
DeviceSize, VulkanError,
|
||||
};
|
||||
use std::{
|
||||
hash::{Hash, Hasher},
|
||||
marker::PhantomData,
|
||||
mem::size_of,
|
||||
ptr,
|
||||
sync::{
|
||||
atomic::{AtomicU64, Ordering},
|
||||
Arc, Mutex, MutexGuard,
|
||||
},
|
||||
};
|
||||
|
||||
// TODO: Add `CpuBufferPoolSubbuffer::read` to read the content of a subbuffer.
|
||||
// But that's hard to do because we must prevent `increase_gpu_lock` from working while a
|
||||
// a buffer is locked.
|
||||
|
||||
/// Ring buffer from which "sub-buffers" can be individually allocated.
|
||||
///
|
||||
/// This buffer is especially suitable when you want to upload or download some data regularly
|
||||
/// (for example, at each frame for a video game).
|
||||
///
|
||||
/// # Usage
|
||||
///
|
||||
/// A `CpuBufferPool` is similar to a ring buffer. You start by creating an empty pool, then you
|
||||
/// grab elements from the pool and use them, and if the pool is full it will automatically grow
|
||||
/// in size.
|
||||
///
|
||||
/// Contrary to a `Vec`, elements automatically free themselves when they are dropped (ie. usually
|
||||
/// when you call `cleanup_finished()` on a future, or when you drop that future).
|
||||
///
|
||||
/// # Arc-like
|
||||
///
|
||||
/// The `CpuBufferPool` struct internally contains an `Arc`. You can clone the `CpuBufferPool` for
|
||||
/// a cheap cost, and all the clones will share the same underlying buffer.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use vulkano::buffer::CpuBufferPool;
|
||||
/// use vulkano::command_buffer::AutoCommandBufferBuilder;
|
||||
/// use vulkano::command_buffer::CommandBufferUsage;
|
||||
/// use vulkano::command_buffer::PrimaryCommandBufferAbstract;
|
||||
/// use vulkano::sync::GpuFuture;
|
||||
/// # let queue: std::sync::Arc<vulkano::device::Queue> = return;
|
||||
/// # let memory_allocator: std::sync::Arc<vulkano::memory::allocator::StandardMemoryAllocator> = return;
|
||||
/// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return;
|
||||
///
|
||||
/// // Create the ring buffer.
|
||||
/// let buffer = CpuBufferPool::upload(memory_allocator);
|
||||
///
|
||||
/// for n in 0 .. 25u32 {
|
||||
/// // Each loop grabs a new entry from that ring buffer and stores ` data` in it.
|
||||
/// let data: [f32; 4] = [1.0, 0.5, n as f32 / 24.0, 0.0];
|
||||
/// let sub_buffer = buffer.from_data(data).unwrap();
|
||||
///
|
||||
/// // You can then use `sub_buffer` as if it was an entirely separate buffer.
|
||||
/// AutoCommandBufferBuilder::primary(
|
||||
/// &command_buffer_allocator,
|
||||
/// queue.queue_family_index(),
|
||||
/// CommandBufferUsage::OneTimeSubmit,
|
||||
/// )
|
||||
/// .unwrap()
|
||||
/// // For the sake of the example we just call `update_buffer` on the buffer, even though
|
||||
/// // it is pointless to do that.
|
||||
/// .update_buffer(&[0.2, 0.3, 0.4, 0.5], sub_buffer.clone(), 0)
|
||||
/// .unwrap()
|
||||
/// .build().unwrap()
|
||||
/// .execute(queue.clone())
|
||||
/// .unwrap()
|
||||
/// .then_signal_fence_and_flush()
|
||||
/// .unwrap();
|
||||
/// }
|
||||
/// ```
|
||||
pub struct CpuBufferPool<T, A = StandardMemoryAllocator>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
A: MemoryAllocator + ?Sized,
|
||||
{
|
||||
// The memory pool to use for allocations.
|
||||
allocator: Arc<A>,
|
||||
|
||||
// Current buffer from which elements are grabbed.
|
||||
current_buffer: Mutex<Option<Arc<ActualBuffer>>>,
|
||||
|
||||
// Buffer usage.
|
||||
buffer_usage: BufferUsage,
|
||||
|
||||
memory_usage: MemoryUsage,
|
||||
|
||||
// Necessary to make it compile.
|
||||
marker: PhantomData<Box<T>>,
|
||||
}
|
||||
|
||||
// One buffer of the pool.
|
||||
#[derive(Debug)]
|
||||
struct ActualBuffer {
|
||||
inner: Arc<Buffer>,
|
||||
|
||||
// List of the chunks that are reserved.
|
||||
chunks_in_use: Mutex<Vec<ActualBufferChunk>>,
|
||||
|
||||
// The index of the chunk that should be available next for the ring buffer.
|
||||
next_index: AtomicU64,
|
||||
|
||||
// Number of elements in the buffer.
|
||||
capacity: DeviceSize,
|
||||
}
|
||||
|
||||
// Access pattern of one subbuffer.
|
||||
#[derive(Debug)]
|
||||
struct ActualBufferChunk {
|
||||
// First element number within the actual buffer.
|
||||
index: DeviceSize,
|
||||
|
||||
// Number of occupied elements within the actual buffer.
|
||||
len: DeviceSize,
|
||||
|
||||
// Number of `CpuBufferPoolSubbuffer` objects that point to this subbuffer.
|
||||
num_cpu_accesses: usize,
|
||||
}
|
||||
|
||||
/// A subbuffer allocated from a `CpuBufferPool`.
|
||||
///
|
||||
/// When this object is destroyed, the subbuffer is automatically reclaimed by the pool.
|
||||
pub struct CpuBufferPoolChunk<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
buffer: Arc<ActualBuffer>,
|
||||
|
||||
// Index of the subbuffer within `buffer`. In number of elements.
|
||||
index: DeviceSize,
|
||||
|
||||
// Number of bytes to add to `index * mem::size_of::<T>()` to obtain the start of the data in
|
||||
// the buffer. Necessary for alignment purposes.
|
||||
align_offset: DeviceSize,
|
||||
|
||||
// Size of the subbuffer in number of elements, as requested by the user.
|
||||
// If this is 0, then no entry was added to `chunks_in_use`.
|
||||
requested_len: DeviceSize,
|
||||
|
||||
// Necessary to make it compile.
|
||||
marker: PhantomData<Box<T>>,
|
||||
}
|
||||
|
||||
/// A subbuffer allocated from a `CpuBufferPool`.
|
||||
///
|
||||
/// When this object is destroyed, the subbuffer is automatically reclaimed by the pool.
|
||||
pub struct CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
// This struct is just a wrapper around `CpuBufferPoolChunk`.
|
||||
chunk: CpuBufferPoolChunk<T>,
|
||||
}
|
||||
|
||||
impl<T, A> CpuBufferPool<T, A>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
A: MemoryAllocator + ?Sized,
|
||||
{
|
||||
/// Builds a `CpuBufferPool`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
/// - Panics if `memory_usage` is [`MemoryUsage::GpuOnly`].
|
||||
pub fn new(
|
||||
allocator: Arc<A>,
|
||||
buffer_usage: BufferUsage,
|
||||
memory_usage: MemoryUsage,
|
||||
) -> CpuBufferPool<T, A> {
|
||||
assert!(size_of::<T>() > 0);
|
||||
assert!(memory_usage != MemoryUsage::GpuOnly);
|
||||
|
||||
CpuBufferPool {
|
||||
allocator,
|
||||
current_buffer: Mutex::new(None),
|
||||
buffer_usage,
|
||||
memory_usage,
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a `CpuBufferPool` meant for simple uploads.
|
||||
///
|
||||
/// Shortcut for a pool that can only be used as transfer source and with exclusive queue
|
||||
/// family accesses.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
pub fn upload(allocator: Arc<A>) -> CpuBufferPool<T, A> {
|
||||
CpuBufferPool::new(allocator, BufferUsage::TRANSFER_SRC, MemoryUsage::Upload)
|
||||
}
|
||||
|
||||
/// Builds a `CpuBufferPool` meant for simple downloads.
|
||||
///
|
||||
/// Shortcut for a pool that can only be used as transfer destination and with exclusive queue
|
||||
/// family accesses.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
pub fn download(allocator: Arc<A>) -> CpuBufferPool<T, A> {
|
||||
CpuBufferPool::new(allocator, BufferUsage::TRANSFER_DST, MemoryUsage::Download)
|
||||
}
|
||||
|
||||
/// Builds a `CpuBufferPool` meant for usage as a uniform buffer.
|
||||
///
|
||||
/// Shortcut for a pool that can only be used as uniform buffer and with exclusive queue
|
||||
/// family accesses.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
pub fn uniform_buffer(allocator: Arc<A>) -> CpuBufferPool<T, A> {
|
||||
CpuBufferPool::new(allocator, BufferUsage::UNIFORM_BUFFER, MemoryUsage::Upload)
|
||||
}
|
||||
|
||||
/// Builds a `CpuBufferPool` meant for usage as a vertex buffer.
|
||||
///
|
||||
/// Shortcut for a pool that can only be used as vertex buffer and with exclusive queue
|
||||
/// family accesses.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
pub fn vertex_buffer(allocator: Arc<A>) -> CpuBufferPool<T, A> {
|
||||
CpuBufferPool::new(allocator, BufferUsage::VERTEX_BUFFER, MemoryUsage::Upload)
|
||||
}
|
||||
|
||||
/// Builds a `CpuBufferPool` meant for usage as a indirect buffer.
|
||||
///
|
||||
/// Shortcut for a pool that can only be used as indirect buffer and with exclusive queue
|
||||
/// family accesses.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - Panics if `T` has zero size.
|
||||
pub fn indirect_buffer(allocator: Arc<A>) -> CpuBufferPool<T, A> {
|
||||
CpuBufferPool::new(allocator, BufferUsage::INDIRECT_BUFFER, MemoryUsage::Upload)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T, A> CpuBufferPool<T, A>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
A: MemoryAllocator + ?Sized,
|
||||
{
|
||||
/// Returns the current capacity of the pool, in number of elements.
|
||||
pub fn capacity(&self) -> DeviceSize {
|
||||
match *self.current_buffer.lock().unwrap() {
|
||||
None => 0,
|
||||
Some(ref buf) => buf.capacity,
|
||||
}
|
||||
}
|
||||
|
||||
/// Makes sure that the capacity is at least `capacity`. Allocates memory if it is not the
|
||||
/// case.
|
||||
///
|
||||
/// Since this can involve a memory allocation, an `OomError` can happen.
|
||||
pub fn reserve(&self, capacity: DeviceSize) -> Result<(), AllocationCreationError> {
|
||||
if capacity == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let mut cur_buf = self.current_buffer.lock().unwrap();
|
||||
|
||||
// Check current capacity.
|
||||
match *cur_buf {
|
||||
Some(ref buf) if buf.capacity >= capacity => {
|
||||
return Ok(());
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
|
||||
self.reset_buf(&mut cur_buf, capacity)
|
||||
}
|
||||
|
||||
/// Grants access to a new subbuffer and puts `data` in it.
|
||||
///
|
||||
/// If no subbuffer is available (because they are still in use by the GPU), a new buffer will
|
||||
/// automatically be allocated.
|
||||
///
|
||||
/// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not
|
||||
/// > large enough, a new chunk of memory is automatically allocated.
|
||||
pub fn from_data(
|
||||
&self,
|
||||
data: T,
|
||||
) -> Result<Arc<CpuBufferPoolSubbuffer<T>>, AllocationCreationError> {
|
||||
Ok(Arc::new(CpuBufferPoolSubbuffer {
|
||||
chunk: self.chunk_impl([data].into_iter())?,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Grants access to a new subbuffer and puts all elements of `iter` in it.
|
||||
///
|
||||
/// If no subbuffer is available (because they are still in use by the GPU), a new buffer will
|
||||
/// automatically be allocated.
|
||||
///
|
||||
/// > **Note**: You can think of it like a `Vec`. If you insert elements and the `Vec` is not
|
||||
/// > large enough, a new chunk of memory is automatically allocated.
|
||||
///
|
||||
/// # Panic
|
||||
///
|
||||
/// Panics if the length of the iterator didn't match the actual number of elements.
|
||||
pub fn from_iter<I>(
|
||||
&self,
|
||||
iter: I,
|
||||
) -> Result<Arc<CpuBufferPoolChunk<T>>, AllocationCreationError>
|
||||
where
|
||||
I: IntoIterator<Item = T>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
self.chunk_impl(iter.into_iter()).map(Arc::new)
|
||||
}
|
||||
|
||||
fn chunk_impl(
|
||||
&self,
|
||||
data: impl ExactSizeIterator<Item = T>,
|
||||
) -> Result<CpuBufferPoolChunk<T>, AllocationCreationError> {
|
||||
let mut mutex = self.current_buffer.lock().unwrap();
|
||||
|
||||
let data = match self.try_next_impl(&mut mutex, data) {
|
||||
Ok(n) => return Ok(n),
|
||||
Err(d) => d,
|
||||
};
|
||||
|
||||
let next_capacity = match *mutex {
|
||||
Some(ref b) if (data.len() as DeviceSize) < b.capacity => 2 * b.capacity,
|
||||
_ => 2 * data.len().max(1) as DeviceSize,
|
||||
};
|
||||
|
||||
self.reset_buf(&mut mutex, next_capacity)?;
|
||||
|
||||
match self.try_next_impl(&mut mutex, data) {
|
||||
Ok(n) => Ok(n),
|
||||
Err(_) => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Grants access to a new subbuffer and puts `data` in it.
|
||||
///
|
||||
/// Returns `None` if no subbuffer is available.
|
||||
///
|
||||
/// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use
|
||||
/// `try_next` the first time you use it.
|
||||
pub fn try_next(&self, data: T) -> Option<Arc<CpuBufferPoolSubbuffer<T>>> {
|
||||
let mut mutex = self.current_buffer.lock().unwrap();
|
||||
self.try_next_impl(&mut mutex, [data])
|
||||
.map(|c| Arc::new(CpuBufferPoolSubbuffer { chunk: c }))
|
||||
.ok()
|
||||
}
|
||||
|
||||
// Creates a new buffer and sets it as current. The capacity is in number of elements.
|
||||
//
|
||||
// `cur_buf_mutex` must be an active lock of `self.current_buffer`.
|
||||
fn reset_buf(
|
||||
&self,
|
||||
cur_buf_mutex: &mut MutexGuard<'_, Option<Arc<ActualBuffer>>>,
|
||||
capacity: DeviceSize,
|
||||
) -> Result<(), AllocationCreationError> {
|
||||
let size = match (size_of::<T>() as DeviceSize).checked_mul(capacity) {
|
||||
Some(s) => s,
|
||||
None => {
|
||||
return Err(AllocationCreationError::VulkanError(
|
||||
VulkanError::OutOfDeviceMemory,
|
||||
))
|
||||
}
|
||||
};
|
||||
|
||||
let raw_buffer = RawBuffer::new(
|
||||
self.device().clone(),
|
||||
BufferCreateInfo {
|
||||
size,
|
||||
usage: self.buffer_usage,
|
||||
..Default::default()
|
||||
},
|
||||
)
|
||||
.map_err(|err| match err {
|
||||
BufferError::AllocError(err) => err,
|
||||
// We don't use sparse-binding, therefore the other errors can't happen.
|
||||
_ => unreachable!(),
|
||||
})?;
|
||||
let requirements = *raw_buffer.memory_requirements();
|
||||
let create_info = AllocationCreateInfo {
|
||||
requirements,
|
||||
allocation_type: AllocationType::Linear,
|
||||
usage: self.memory_usage,
|
||||
allocate_preference: MemoryAllocatePreference::Unknown,
|
||||
dedicated_allocation: Some(DedicatedAllocation::Buffer(&raw_buffer)),
|
||||
..Default::default()
|
||||
};
|
||||
|
||||
match unsafe { self.allocator.allocate_unchecked(create_info) } {
|
||||
Ok(mut alloc) => {
|
||||
debug_assert!(alloc.offset() % requirements.alignment == 0);
|
||||
debug_assert!(alloc.size() == requirements.size);
|
||||
alloc.shrink(size);
|
||||
let inner = unsafe {
|
||||
Arc::new(
|
||||
raw_buffer
|
||||
.bind_memory_unchecked(alloc)
|
||||
.map_err(|(err, _, _)| err)?,
|
||||
)
|
||||
};
|
||||
|
||||
**cur_buf_mutex = Some(Arc::new(ActualBuffer {
|
||||
inner,
|
||||
chunks_in_use: Mutex::new(vec![]),
|
||||
next_index: AtomicU64::new(0),
|
||||
capacity,
|
||||
}));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
// Tries to lock a subbuffer from the current buffer.
|
||||
//
|
||||
// `cur_buf_mutex` must be an active lock of `self.current_buffer`.
|
||||
//
|
||||
// Returns `data` wrapped inside an `Err` if there is no slot available in the current buffer.
|
||||
//
|
||||
// # Panic
|
||||
//
|
||||
// Panics if the length of the iterator didn't match the actual number of element.
|
||||
fn try_next_impl<I>(
|
||||
&self,
|
||||
cur_buf_mutex: &mut MutexGuard<'_, Option<Arc<ActualBuffer>>>,
|
||||
data: I,
|
||||
) -> Result<CpuBufferPoolChunk<T>, I::IntoIter>
|
||||
where
|
||||
I: IntoIterator<Item = T>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let mut data = data.into_iter();
|
||||
|
||||
// Grab the current buffer. Return `Err` if the pool wasn't "initialized" yet.
|
||||
let current_buffer = match cur_buf_mutex.clone() {
|
||||
Some(b) => b,
|
||||
None => return Err(data),
|
||||
};
|
||||
|
||||
let mut chunks_in_use = current_buffer.chunks_in_use.lock().unwrap();
|
||||
debug_assert!(!chunks_in_use.iter().any(|c| c.len == 0));
|
||||
|
||||
// Number of elements requested by the user.
|
||||
let requested_len = data.len() as DeviceSize;
|
||||
|
||||
// We special case when 0 elements are requested. Polluting the list of allocated chunks
|
||||
// with chunks of length 0 means that we will have troubles deallocating.
|
||||
if requested_len == 0 {
|
||||
assert!(
|
||||
data.next().is_none(),
|
||||
"Expected iterator passed to CpuBufferPool::chunk to be empty"
|
||||
);
|
||||
return Ok(CpuBufferPoolChunk {
|
||||
// TODO: remove .clone() once non-lexical borrows land
|
||||
buffer: current_buffer.clone(),
|
||||
index: 0,
|
||||
align_offset: 0,
|
||||
requested_len: 0,
|
||||
marker: PhantomData,
|
||||
});
|
||||
}
|
||||
|
||||
// Find a suitable offset and len, or returns if none available.
|
||||
let (index, occupied_len, align_offset) = {
|
||||
let (tentative_index, tentative_len, tentative_align_offset) = {
|
||||
// Since the only place that touches `next_index` is this code, and since we
|
||||
// own a mutex lock to the buffer, it means that `next_index` can't be accessed
|
||||
// concurrently.
|
||||
// TODO: ^ eventually should be put inside the mutex
|
||||
let idx = current_buffer.next_index.load(Ordering::SeqCst);
|
||||
|
||||
// Find the required alignment in bytes.
|
||||
let align_uniform = if self.buffer_usage.intersects(BufferUsage::UNIFORM_BUFFER) {
|
||||
self.device()
|
||||
.physical_device()
|
||||
.properties()
|
||||
.min_uniform_buffer_offset_alignment
|
||||
} else {
|
||||
1
|
||||
};
|
||||
let align_storage = if self.buffer_usage.intersects(BufferUsage::STORAGE_BUFFER) {
|
||||
self.device()
|
||||
.physical_device()
|
||||
.properties()
|
||||
.min_storage_buffer_offset_alignment
|
||||
} else {
|
||||
1
|
||||
};
|
||||
let align_bytes = align_uniform.max(align_storage);
|
||||
|
||||
let tentative_align_offset = (align_bytes
|
||||
- ((idx * size_of::<T>() as DeviceSize) % align_bytes))
|
||||
% align_bytes;
|
||||
let additional_len = if tentative_align_offset == 0 {
|
||||
0
|
||||
} else {
|
||||
1 + (tentative_align_offset - 1) / size_of::<T>() as DeviceSize
|
||||
};
|
||||
|
||||
(idx, requested_len + additional_len, tentative_align_offset)
|
||||
};
|
||||
|
||||
// Find out whether any chunk in use overlaps this range.
|
||||
if tentative_index + tentative_len <= current_buffer.capacity
|
||||
&& !chunks_in_use.iter().any(|c| {
|
||||
(c.index >= tentative_index && c.index < tentative_index + tentative_len)
|
||||
|| (c.index <= tentative_index && c.index + c.len > tentative_index)
|
||||
})
|
||||
{
|
||||
(tentative_index, tentative_len, tentative_align_offset)
|
||||
} else {
|
||||
// Impossible to allocate at `tentative_index`. Let's try 0 instead.
|
||||
if requested_len <= current_buffer.capacity
|
||||
&& !chunks_in_use.iter().any(|c| c.index < requested_len)
|
||||
{
|
||||
(0, requested_len, 0)
|
||||
} else {
|
||||
// Buffer is full. Return.
|
||||
return Err(data);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Write `data` in the memory.
|
||||
unsafe {
|
||||
let range = (index * size_of::<T>() as DeviceSize + align_offset)
|
||||
..((index + requested_len) * size_of::<T>() as DeviceSize + align_offset);
|
||||
|
||||
let allocation = match current_buffer.inner.memory() {
|
||||
BufferMemory::Normal(a) => a,
|
||||
BufferMemory::Sparse => unreachable!(),
|
||||
};
|
||||
|
||||
let bytes = allocation.write(range.clone()).unwrap();
|
||||
let mapping = <[T]>::from_bytes_mut(bytes).unwrap();
|
||||
|
||||
let mut written = 0;
|
||||
for (o, i) in mapping.iter_mut().zip(data) {
|
||||
ptr::write(o, i);
|
||||
written += 1;
|
||||
}
|
||||
|
||||
allocation.flush_range(range).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
written, requested_len,
|
||||
"Iterator passed to CpuBufferPool::chunk has a mismatch between reported \
|
||||
length and actual number of elements"
|
||||
);
|
||||
}
|
||||
|
||||
// Mark the chunk as in use.
|
||||
current_buffer
|
||||
.next_index
|
||||
.store(index + occupied_len, Ordering::SeqCst);
|
||||
chunks_in_use.push(ActualBufferChunk {
|
||||
index,
|
||||
len: occupied_len,
|
||||
num_cpu_accesses: 1,
|
||||
});
|
||||
|
||||
Ok(CpuBufferPoolChunk {
|
||||
// TODO: remove .clone() once non-lexical borrows land
|
||||
buffer: current_buffer.clone(),
|
||||
index,
|
||||
align_offset,
|
||||
requested_len,
|
||||
marker: PhantomData,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// Can't automatically derive `Clone`, otherwise the compiler adds a `T: Clone` requirement.
|
||||
impl<T, A> Clone for CpuBufferPool<T, A>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
A: MemoryAllocator + ?Sized,
|
||||
{
|
||||
fn clone(&self) -> Self {
|
||||
let buf = self.current_buffer.lock().unwrap();
|
||||
|
||||
CpuBufferPool {
|
||||
allocator: self.allocator.clone(),
|
||||
current_buffer: Mutex::new(buf.clone()),
|
||||
buffer_usage: self.buffer_usage,
|
||||
memory_usage: self.memory_usage,
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T, A> DeviceOwned for CpuBufferPool<T, A>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
A: MemoryAllocator + ?Sized,
|
||||
{
|
||||
fn device(&self) -> &Arc<Device> {
|
||||
self.allocator.device()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Clone for CpuBufferPoolChunk<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn clone(&self) -> CpuBufferPoolChunk<T> {
|
||||
let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
|
||||
let chunk = chunks_in_use_lock
|
||||
.iter_mut()
|
||||
.find(|c| c.index == self.index)
|
||||
.unwrap();
|
||||
|
||||
debug_assert!(chunk.num_cpu_accesses >= 1);
|
||||
chunk.num_cpu_accesses = chunk
|
||||
.num_cpu_accesses
|
||||
.checked_add(1)
|
||||
.expect("Overflow in CPU accesses");
|
||||
|
||||
CpuBufferPoolChunk {
|
||||
buffer: self.buffer.clone(),
|
||||
index: self.index,
|
||||
align_offset: self.align_offset,
|
||||
requested_len: self.requested_len,
|
||||
marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T> BufferAccess for CpuBufferPoolChunk<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn inner(&self) -> BufferInner<'_> {
|
||||
BufferInner {
|
||||
buffer: &self.buffer.inner,
|
||||
offset: self.index * size_of::<T>() as DeviceSize + self.align_offset,
|
||||
}
|
||||
}
|
||||
|
||||
fn size(&self) -> DeviceSize {
|
||||
self.requested_len * size_of::<T>() as DeviceSize
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> BufferAccessObject for Arc<CpuBufferPoolChunk<T>>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn as_buffer_access_object(&self) -> Arc<dyn BufferAccess> {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for CpuBufferPoolChunk<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn drop(&mut self) {
|
||||
// If `requested_len` is 0, then no entry was added in the chunks.
|
||||
if self.requested_len == 0 {
|
||||
return;
|
||||
}
|
||||
|
||||
let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap();
|
||||
let chunk_num = chunks_in_use_lock
|
||||
.iter_mut()
|
||||
.position(|c| c.index == self.index)
|
||||
.unwrap();
|
||||
|
||||
if chunks_in_use_lock[chunk_num].num_cpu_accesses >= 2 {
|
||||
chunks_in_use_lock[chunk_num].num_cpu_accesses -= 1;
|
||||
} else {
|
||||
chunks_in_use_lock.remove(chunk_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T> TypedBufferAccess for CpuBufferPoolChunk<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
type Content = [T];
|
||||
}
|
||||
|
||||
unsafe impl<T> DeviceOwned for CpuBufferPoolChunk<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn device(&self) -> &Arc<Device> {
|
||||
self.buffer.inner.device()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialEq for CpuBufferPoolChunk<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.inner() == other.inner() && self.size() == other.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Eq for CpuBufferPoolChunk<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
}
|
||||
|
||||
impl<T> Hash for CpuBufferPoolChunk<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.inner().hash(state);
|
||||
self.size().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Clone for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn clone(&self) -> CpuBufferPoolSubbuffer<T> {
|
||||
CpuBufferPoolSubbuffer {
|
||||
chunk: self.chunk.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T> BufferAccess for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn inner(&self) -> BufferInner<'_> {
|
||||
self.chunk.inner()
|
||||
}
|
||||
|
||||
fn size(&self) -> DeviceSize {
|
||||
self.chunk.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> BufferAccessObject for Arc<CpuBufferPoolSubbuffer<T>>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn as_buffer_access_object(&self) -> Arc<dyn BufferAccess> {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl<T> TypedBufferAccess for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
T: BufferContents,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
type Content = T;
|
||||
}
|
||||
|
||||
unsafe impl<T> DeviceOwned for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn device(&self) -> &Arc<Device> {
|
||||
self.chunk.buffer.inner.device()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialEq for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.inner() == other.inner() && self.size() == other.size()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Eq for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
}
|
||||
|
||||
impl<T> Hash for CpuBufferPoolSubbuffer<T>
|
||||
where
|
||||
T: Send + Sync,
|
||||
[T]: BufferContents,
|
||||
{
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.inner().hash(state);
|
||||
self.size().hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::mem;
|
||||
|
||||
#[test]
|
||||
fn basic_create() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
let _ = CpuBufferPool::<u8>::upload(memory_allocator);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reserve() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
|
||||
let pool = CpuBufferPool::<u8>::upload(memory_allocator);
|
||||
assert_eq!(pool.capacity(), 0);
|
||||
|
||||
pool.reserve(83).unwrap();
|
||||
assert_eq!(pool.capacity(), 83);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn capacity_increase() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
|
||||
let pool = CpuBufferPool::upload(memory_allocator);
|
||||
assert_eq!(pool.capacity(), 0);
|
||||
|
||||
pool.from_data(12).unwrap();
|
||||
let first_cap = pool.capacity();
|
||||
assert!(first_cap >= 1);
|
||||
|
||||
for _ in 0..first_cap + 5 {
|
||||
mem::forget(pool.from_data(12).unwrap());
|
||||
}
|
||||
|
||||
assert!(pool.capacity() > first_cap);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reuse_subbuffers() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
|
||||
let pool = CpuBufferPool::upload(memory_allocator);
|
||||
assert_eq!(pool.capacity(), 0);
|
||||
|
||||
let mut capacity = None;
|
||||
for _ in 0..64 {
|
||||
pool.from_data(12).unwrap();
|
||||
|
||||
let new_cap = pool.capacity();
|
||||
assert!(new_cap >= 1);
|
||||
match capacity {
|
||||
None => capacity = Some(new_cap),
|
||||
Some(c) => assert_eq!(c, new_cap),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chunk_loopback() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
|
||||
let pool = CpuBufferPool::<u8>::upload(memory_allocator);
|
||||
pool.reserve(5).unwrap();
|
||||
|
||||
let a = pool.from_iter(vec![0, 0]).unwrap();
|
||||
let b = pool.from_iter(vec![0, 0]).unwrap();
|
||||
assert_eq!(b.index, 2);
|
||||
drop(a);
|
||||
|
||||
let c = pool.from_iter(vec![0, 0]).unwrap();
|
||||
assert_eq!(c.index, 0);
|
||||
|
||||
assert_eq!(pool.capacity(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn chunk_0_elems_doesnt_pollute() {
|
||||
let (device, _) = gfx_dev_and_queue!();
|
||||
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device));
|
||||
|
||||
let pool = CpuBufferPool::<u8>::upload(memory_allocator);
|
||||
|
||||
let _ = pool.from_iter(vec![]).unwrap();
|
||||
let _ = pool.from_iter(vec![0, 0]).unwrap();
|
||||
}
|
||||
}
|
@ -14,11 +14,12 @@
|
||||
//! between a Vulkan buffer and a regular buffer is that the content of a Vulkan buffer is
|
||||
//! accessible from the GPU.
|
||||
//!
|
||||
//! Vulkano does not perform any specific marshalling of buffer data. The representation of the buffer in
|
||||
//! memory is identical between the CPU and GPU. Because the Rust compiler is allowed to reorder struct
|
||||
//! fields at will by default when using `#[repr(Rust)]`, it is advised to mark each struct requiring
|
||||
//! imput assembly as `#[repr(C)]`. This forces Rust to follow the standard C procedure. Each element is
|
||||
//! laid out in memory in the order of declaration and aligned to a multiple of their alignment.
|
||||
//! Vulkano does not perform any specific marshalling of buffer data. The representation of the
|
||||
//! buffer in memory is identical between the CPU and GPU. Because the Rust compiler is allowed to
|
||||
//! reorder struct fields at will by default when using `#[repr(Rust)]`, it is advised to mark each
|
||||
//! struct requiring imput assembly as `#[repr(C)]`. This forces Rust to follow the standard C
|
||||
//! procedure. Each element is laid out in memory in the order of declaration and aligned to a
|
||||
//! multiple of their alignment.
|
||||
//!
|
||||
//! # Various kinds of buffers
|
||||
//!
|
||||
@ -28,26 +29,24 @@
|
||||
//! Instead you are encouraged to use one of the high-level wrappers that vulkano provides. Which
|
||||
//! wrapper to use depends on the way you are going to use the buffer:
|
||||
//!
|
||||
//! - A [`DeviceLocalBuffer`](crate::buffer::device_local::DeviceLocalBuffer) designates a buffer
|
||||
//! usually located in video memory and whose content can't be directly accessed by your
|
||||
//! application. Accessing this buffer from the GPU is generally faster compared to accessing a
|
||||
//! CPU-accessible buffer.
|
||||
//! - A [`CpuBufferPool`](crate::buffer::cpu_pool::CpuBufferPool) is a ring buffer that can be used to
|
||||
//! transfer data between the CPU and the GPU at a high rate.
|
||||
//! - A [`CpuAccessibleBuffer`](crate::buffer::cpu_access::CpuAccessibleBuffer) is a simple buffer that
|
||||
//! can be used to prototype.
|
||||
//! - A [`DeviceLocalBuffer`] designates a buffer usually located in video memory and whose content
|
||||
//! can't be directly accessed by your application. Accessing this buffer from the GPU is
|
||||
//! generally faster compared to accessing a CPU-accessible buffer.
|
||||
//! - A [`CpuBufferAllocator`] can be used to transfer data between the CPU and the GPU at a high
|
||||
//! rate.
|
||||
//! - A [`CpuAccessibleBuffer`] is a simple buffer that can be used to prototype.
|
||||
//!
|
||||
//! Here is a quick way to choose which buffer to use. Do you often need to read or write
|
||||
//! the content of the buffer? If so, use a `CpuBufferPool`. Otherwise, do you need to have access
|
||||
//! Here is a quick way to choose which buffer to use. Do you often need to read or write the
|
||||
//! content of the buffer? If so, use a `CpuBufferAllocator`. Otherwise, do you need to have access
|
||||
//! to the buffer on the CPU? Then use `CpuAccessibleBuffer`. Otherwise, use a `DeviceLocalBuffer`.
|
||||
//!
|
||||
//! Another example: if a buffer is under constant access by the GPU but you need to
|
||||
//! read its content on the CPU from time to time, it may be a good idea to use a
|
||||
//! `DeviceLocalBuffer` as the main buffer and a `CpuBufferPool` for when you need to read it.
|
||||
//! Then whenever you need to read the main buffer, ask the GPU to copy from the device-local
|
||||
//! buffer to the CPU buffer pool, and read the CPU buffer pool instead.
|
||||
//! Another example: if a buffer is under constant access by the GPU but you need to read its
|
||||
//! content on the CPU from time to time, it may be a good idea to use a `DeviceLocalBuffer` as the
|
||||
//! main buffer and a `CpuAccessibleBuffer` for when you need to read it. Then whenever you need to
|
||||
//! read the main buffer, ask the GPU to copy from the device-local buffer to the CPU-accessible
|
||||
//! buffer, and read the CPU-accessible buffer instead.
|
||||
//!
|
||||
//! # Buffers usage
|
||||
//! # Buffer usage
|
||||
//!
|
||||
//! When you create a buffer object, you have to specify its *usage*. In other words, you have to
|
||||
//! specify the way it is going to be used. Trying to use a buffer in a way that wasn't specified
|
||||
@ -64,18 +63,18 @@
|
||||
//!
|
||||
//! - As a uniform buffer. Uniform buffers are read-only.
|
||||
//! - As a storage buffer. Storage buffers can be read and written.
|
||||
//! - As a uniform texel buffer. Contrary to a uniform buffer, the data is interpreted by the
|
||||
//! GPU and can be for example normalized.
|
||||
//! - As a uniform texel buffer. Contrary to a uniform buffer, the data is interpreted by the GPU
|
||||
//! and can be for example normalized.
|
||||
//! - As a storage texel buffer. Additionally, some data formats can be modified with atomic
|
||||
//! operations.
|
||||
//!
|
||||
//! Using uniform/storage texel buffers requires creating a *buffer view*. See the `view` module
|
||||
//! for how to create a buffer view.
|
||||
//!
|
||||
//! [`CpuBufferAllocator`]: allocator::CpuBufferAllocator
|
||||
|
||||
pub use self::{
|
||||
cpu_access::CpuAccessibleBuffer,
|
||||
cpu_pool::CpuBufferPool,
|
||||
device_local::DeviceLocalBuffer,
|
||||
slice::BufferSlice,
|
||||
sys::BufferError,
|
||||
@ -95,8 +94,8 @@ use bytemuck::{
|
||||
};
|
||||
use std::mem::size_of;
|
||||
|
||||
pub mod allocator;
|
||||
pub mod cpu_access;
|
||||
pub mod cpu_pool;
|
||||
pub mod device_local;
|
||||
pub mod sys;
|
||||
pub mod view;
|
||||
@ -164,7 +163,7 @@ pub unsafe trait BufferContents: Send + Sync + 'static {
|
||||
/// Converts an immutable reference to `Self` to an immutable byte slice.
|
||||
fn as_bytes(&self) -> &[u8];
|
||||
|
||||
/// Converts a mutable reference to `Self` to an mutable byte slice.
|
||||
/// Converts a mutable reference to `Self` to a mutable byte slice.
|
||||
fn as_bytes_mut(&mut self) -> &mut [u8];
|
||||
|
||||
/// Converts an immutable byte slice into an immutable reference to `Self`.
|
||||
|
@ -1624,6 +1624,16 @@ impl From<RequirementNotMet> for GenericMemoryAllocatorCreationError {
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn align_up(val: DeviceSize, alignment: DeviceSize) -> DeviceSize {
|
||||
align_down(val + alignment - 1, alignment)
|
||||
}
|
||||
|
||||
pub(crate) fn align_down(val: DeviceSize, alignment: DeviceSize) -> DeviceSize {
|
||||
debug_assert!(alignment.is_power_of_two());
|
||||
|
||||
val & !(alignment - 1)
|
||||
}
|
||||
|
||||
mod array_vec {
|
||||
use std::ops::{Deref, DerefMut};
|
||||
|
||||
|
@ -14,7 +14,9 @@
|
||||
//! [the parent module]: super
|
||||
|
||||
use self::host::SlotId;
|
||||
use super::{array_vec::ArrayVec, AllocationCreateInfo, AllocationCreationError};
|
||||
use super::{
|
||||
align_down, align_up, array_vec::ArrayVec, AllocationCreateInfo, AllocationCreationError,
|
||||
};
|
||||
use crate::{
|
||||
device::{Device, DeviceOwned},
|
||||
image::ImageTiling,
|
||||
@ -205,6 +207,10 @@ impl MemoryAlloc {
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn atom_size(&self) -> Option<NonZeroU64> {
|
||||
self.atom_size
|
||||
}
|
||||
|
||||
/// Invalidates the host (CPU) cache for a range of the allocation.
|
||||
///
|
||||
/// You must call this method before the memory is read by the host, if the device previously
|
||||
@ -239,8 +245,7 @@ impl MemoryAlloc {
|
||||
.result()
|
||||
.map_err(VulkanError::from)?;
|
||||
} else {
|
||||
// FIXME:
|
||||
// self.debug_validate_memory_range(&range);
|
||||
self.debug_validate_memory_range(&range);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -280,8 +285,7 @@ impl MemoryAlloc {
|
||||
.result()
|
||||
.map_err(VulkanError::from)?;
|
||||
} else {
|
||||
// FIXME:
|
||||
// self.debug_validate_memory_range(&range);
|
||||
self.debug_validate_memory_range(&range);
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -330,18 +334,22 @@ impl MemoryAlloc {
|
||||
/// This exists because even if no cache control is required, the parameters should still be
|
||||
/// valid, otherwise you might have bugs in your code forever just because your memory happens
|
||||
/// to be host-coherent.
|
||||
#[allow(dead_code)]
|
||||
fn debug_validate_memory_range(&self, range: &Range<DeviceSize>) {
|
||||
debug_assert!(!range.is_empty() && range.end <= self.size);
|
||||
debug_assert!({
|
||||
let atom_size = self
|
||||
.device()
|
||||
.physical_device()
|
||||
.properties()
|
||||
.non_coherent_atom_size;
|
||||
debug_assert!(
|
||||
{
|
||||
let atom_size = self
|
||||
.device()
|
||||
.physical_device()
|
||||
.properties()
|
||||
.non_coherent_atom_size;
|
||||
|
||||
range.start % atom_size == 0 && (range.end % atom_size == 0 || range.end == self.size)
|
||||
});
|
||||
range.start % atom_size == 0
|
||||
&& (range.end % atom_size == 0 || range.end == self.size)
|
||||
},
|
||||
"attempted to invalidate or flush a memory range that is not aligned to the \
|
||||
non-coherent atom size",
|
||||
);
|
||||
}
|
||||
|
||||
/// Returns the underlying block of [`DeviceMemory`].
|
||||
@ -925,17 +933,17 @@ impl Display for SuballocationCreationError {
|
||||
/// });
|
||||
/// ```
|
||||
///
|
||||
/// For use in allocating buffers for [`CpuBufferPool`]:
|
||||
/// For use in allocating arenas for [`CpuBufferAllocator`]:
|
||||
///
|
||||
/// ```
|
||||
/// use std::sync::Arc;
|
||||
/// use vulkano::buffer::CpuBufferPool;
|
||||
/// use vulkano::buffer::allocator::CpuBufferAllocator;
|
||||
/// use vulkano::memory::allocator::StandardMemoryAllocator;
|
||||
/// # let device: std::sync::Arc<vulkano::device::Device> = return;
|
||||
///
|
||||
/// // We need to wrap the allocator in an `Arc` so that we can share ownership of it.
|
||||
/// let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
|
||||
/// let buffer_pool = CpuBufferPool::<u32>::upload(memory_allocator.clone());
|
||||
/// let buffer_allocator = CpuBufferAllocator::new(memory_allocator.clone(), Default::default());
|
||||
///
|
||||
/// // You can continue using `memory_allocator` for other things.
|
||||
/// ```
|
||||
@ -978,7 +986,7 @@ impl Display for SuballocationCreationError {
|
||||
/// [alignment requirements]: super#alignment
|
||||
/// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
|
||||
/// [`StandardMemoryAllocator`]: super::StandardMemoryAllocator
|
||||
/// [`CpuBufferPool`]: crate::buffer::CpuBufferPool
|
||||
/// [`CpuBufferAllocator`]: crate::buffer::allocator::CpuBufferAllocator
|
||||
#[derive(Debug)]
|
||||
pub struct FreeListAllocator {
|
||||
region: MemoryAlloc,
|
||||
@ -2430,16 +2438,6 @@ impl Display for BumpAllocatorResetError {
|
||||
}
|
||||
}
|
||||
|
||||
fn align_up(val: DeviceSize, alignment: DeviceSize) -> DeviceSize {
|
||||
align_down(val + alignment - 1, alignment)
|
||||
}
|
||||
|
||||
fn align_down(val: DeviceSize, alignment: DeviceSize) -> DeviceSize {
|
||||
debug_assert!(alignment.is_power_of_two());
|
||||
|
||||
val & !(alignment - 1)
|
||||
}
|
||||
|
||||
/// Checks if resouces A and B share a page.
|
||||
///
|
||||
/// > **Note**: Assumes `a_offset + a_size > 0` and `a_offset + a_size <= b_offset`.
|
||||
|
Loading…
Reference in New Issue
Block a user