Make command buffer/descriptor set allocators Sync again (#2046)

* Make cmd buffer/desc set allocators `Sync` again

* Ingles
This commit is contained in:
marc0246 2022-10-27 20:59:47 +02:00 committed by GitHub
parent 99aea525c0
commit 8a1c91f556
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
18 changed files with 250 additions and 93 deletions

View File

@ -8,7 +8,7 @@
// according to those terms.
use bytemuck::{Pod, Zeroable};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -40,8 +40,8 @@ pub struct AmbientLightingSystem {
vertex_buffer: Arc<CpuAccessibleBuffer<[Vertex]>>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
}
impl AmbientLightingSystem {
@ -50,8 +50,8 @@ impl AmbientLightingSystem {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> AmbientLightingSystem {
// TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could
// hard-code these values in the shader

View File

@ -9,7 +9,7 @@
use bytemuck::{Pod, Zeroable};
use cgmath::Vector3;
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -41,8 +41,8 @@ pub struct DirectionalLightingSystem {
vertex_buffer: Arc<CpuAccessibleBuffer<[Vertex]>>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
}
impl DirectionalLightingSystem {
@ -51,8 +51,8 @@ impl DirectionalLightingSystem {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> DirectionalLightingSystem {
// TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could
// hard-code these values in the shader

View File

@ -9,7 +9,7 @@
use bytemuck::{Pod, Zeroable};
use cgmath::{Matrix4, Vector3};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -40,8 +40,8 @@ pub struct PointLightingSystem {
vertex_buffer: Arc<CpuAccessibleBuffer<[Vertex]>>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
}
impl PointLightingSystem {
@ -50,8 +50,8 @@ impl PointLightingSystem {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> PointLightingSystem {
// TODO: vulkano doesn't allow us to draw without a vertex buffer, otherwise we could
// hard-code these values in the shader

View File

@ -13,7 +13,7 @@ use super::{
point_lighting_system::PointLightingSystem,
};
use cgmath::{Matrix4, SquareMatrix, Vector3};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
command_buffer::{
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
@ -40,7 +40,7 @@ pub struct FrameSystem {
render_pass: Arc<RenderPass>,
memory_allocator: Arc<StandardMemoryAllocator>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
// Intermediate render target that will contain the albedo of each pixel of the scene.
diffuse_buffer: Arc<ImageView<AttachmentImage>>,
@ -74,7 +74,7 @@ impl FrameSystem {
gfx_queue: Arc<Queue>,
final_output_format: Format,
memory_allocator: Arc<StandardMemoryAllocator>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
) -> FrameSystem {
// Creating the render pass.
//
@ -196,7 +196,7 @@ impl FrameSystem {
)
.unwrap();
let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new(
let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new(
gfx_queue.device().clone(),
));

View File

@ -30,7 +30,7 @@ use crate::{
triangle_draw_system::TriangleDrawSystem,
};
use cgmath::{Matrix4, SquareMatrix, Vector3};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
command_buffer::allocator::StandardCommandBufferAllocator,
device::{
@ -166,7 +166,7 @@ fn main() {
};
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone()));
let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new(device.clone()));
let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new(device.clone()));
// Here is the basic initialization for the deferred system.
let mut frame_system = FrameSystem::new(

View File

@ -8,7 +8,7 @@
// according to those terms.
use bytemuck::{Pod, Zeroable};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -35,7 +35,7 @@ pub struct TriangleDrawSystem {
vertex_buffer: Arc<CpuAccessibleBuffer<[Vertex]>>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
}
impl TriangleDrawSystem {
@ -44,7 +44,7 @@ impl TriangleDrawSystem {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &StandardMemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
) -> TriangleDrawSystem {
let vertices = [
Vertex {

View File

@ -10,8 +10,8 @@
use crate::fractal_compute_pipeline::FractalComputePipeline;
use crate::place_over_frame::RenderPassPlaceOverFrame;
use cgmath::Vector2;
use std::sync::Arc;
use std::time::Instant;
use std::{rc::Rc, sync::Arc};
use vulkano::command_buffer::allocator::StandardCommandBufferAllocator;
use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator;
use vulkano::device::Queue;
@ -64,10 +64,10 @@ impl FractalApp {
let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(
gfx_queue.device().clone(),
));
let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new(
let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new(
gfx_queue.device().clone(),
));
let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new(
let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new(
gfx_queue.device().clone(),
));

View File

@ -9,7 +9,7 @@
use cgmath::Vector2;
use rand::Rng;
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer},
command_buffer::{
@ -31,8 +31,8 @@ pub struct FractalComputePipeline {
queue: Arc<Queue>,
pipeline: Arc<ComputePipeline>,
memory_allocator: Arc<StandardMemoryAllocator>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
palette: Arc<CpuAccessibleBuffer<[[f32; 4]]>>,
palette_size: i32,
end_color: [f32; 4],
@ -42,8 +42,8 @@ impl FractalComputePipeline {
pub fn new(
queue: Arc<Queue>,
memory_allocator: Arc<StandardMemoryAllocator>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> FractalComputePipeline {
// Initial colors
let colors = vec![

View File

@ -8,7 +8,7 @@
// according to those terms.
use bytemuck::{Pod, Zeroable};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -72,8 +72,8 @@ pub struct PixelsDrawPipeline {
gfx_queue: Arc<Queue>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
vertices: Arc<CpuAccessibleBuffer<[TexturedVertex]>>,
indices: Arc<CpuAccessibleBuffer<[u32]>>,
}
@ -83,8 +83,8 @@ impl PixelsDrawPipeline {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> PixelsDrawPipeline {
let (vertices, indices) = textured_quad(2.0, 2.0);
let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter(

View File

@ -8,7 +8,7 @@
// according to those terms.
use crate::pixels_draw_pipeline::PixelsDrawPipeline;
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
command_buffer::{
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
@ -29,15 +29,15 @@ pub struct RenderPassPlaceOverFrame {
gfx_queue: Arc<Queue>,
render_pass: Arc<RenderPass>,
pixels_draw_pipeline: PixelsDrawPipeline,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
}
impl RenderPassPlaceOverFrame {
pub fn new(
gfx_queue: Arc<Queue>,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
output_format: Format,
) -> RenderPassPlaceOverFrame {
let render_pass = vulkano::single_pass_renderpass!(gfx_queue.device().clone(),

View File

@ -11,7 +11,7 @@ use crate::{
game_of_life::GameOfLifeComputePipeline, render_pass::RenderPassPlaceOverFrame, SCALING,
WINDOW2_HEIGHT, WINDOW2_WIDTH, WINDOW_HEIGHT, WINDOW_WIDTH,
};
use std::{collections::HashMap, rc::Rc, sync::Arc};
use std::{collections::HashMap, sync::Arc};
use vulkano::command_buffer::allocator::StandardCommandBufferAllocator;
use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator;
use vulkano::memory::allocator::StandardMemoryAllocator;
@ -33,10 +33,10 @@ impl RenderPipeline {
swapchain_format: Format,
) -> RenderPipeline {
let memory_allocator = StandardMemoryAllocator::new_default(gfx_queue.device().clone());
let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new(
let command_buffer_allocator = Arc::new(StandardCommandBufferAllocator::new(
gfx_queue.device().clone(),
));
let descriptor_set_allocator = Rc::new(StandardDescriptorSetAllocator::new(
let descriptor_set_allocator = Arc::new(StandardDescriptorSetAllocator::new(
gfx_queue.device().clone(),
));

View File

@ -9,7 +9,7 @@
use cgmath::Vector2;
use rand::Rng;
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::command_buffer::allocator::StandardCommandBufferAllocator;
use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator;
use vulkano::image::{ImageUsage, StorageImage};
@ -34,8 +34,8 @@ use vulkano_util::renderer::DeviceImageView;
pub struct GameOfLifeComputePipeline {
compute_queue: Arc<Queue>,
compute_life_pipeline: Arc<ComputePipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
life_in: Arc<CpuAccessibleBuffer<[u32]>>,
life_out: Arc<CpuAccessibleBuffer<[u32]>>,
image: DeviceImageView,
@ -63,8 +63,8 @@ impl GameOfLifeComputePipeline {
pub fn new(
compute_queue: Arc<Queue>,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
size: [u32; 2],
) -> GameOfLifeComputePipeline {
let life_in = rand_grid(memory_allocator, size);

View File

@ -8,7 +8,7 @@
// according to those terms.
use bytemuck::{Pod, Zeroable};
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
buffer::{BufferUsage, CpuAccessibleBuffer, TypedBufferAccess},
command_buffer::{
@ -72,8 +72,8 @@ pub struct PixelsDrawPipeline {
gfx_queue: Arc<Queue>,
subpass: Subpass,
pipeline: Arc<GraphicsPipeline>,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
vertices: Arc<CpuAccessibleBuffer<[TexturedVertex]>>,
indices: Arc<CpuAccessibleBuffer<[u32]>>,
}
@ -83,8 +83,8 @@ impl PixelsDrawPipeline {
gfx_queue: Arc<Queue>,
subpass: Subpass,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
) -> PixelsDrawPipeline {
let (vertices, indices) = textured_quad(2.0, 2.0);
let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter(

View File

@ -8,7 +8,7 @@
// according to those terms.
use crate::pixels_draw::PixelsDrawPipeline;
use std::{rc::Rc, sync::Arc};
use std::sync::Arc;
use vulkano::{
command_buffer::{
allocator::StandardCommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferUsage,
@ -29,15 +29,15 @@ pub struct RenderPassPlaceOverFrame {
gfx_queue: Arc<Queue>,
render_pass: Arc<RenderPass>,
pixels_draw_pipeline: PixelsDrawPipeline,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
}
impl RenderPassPlaceOverFrame {
pub fn new(
gfx_queue: Arc<Queue>,
memory_allocator: &impl MemoryAllocator,
command_buffer_allocator: Rc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Rc<StandardDescriptorSetAllocator>,
command_buffer_allocator: Arc<StandardCommandBufferAllocator>,
descriptor_set_allocator: Arc<StandardDescriptorSetAllocator>,
output_format: Format,
) -> RenderPassPlaceOverFrame {
let render_pass = vulkano::single_pass_renderpass!(gfx_queue.device().clone(),

View File

@ -25,6 +25,7 @@ libloading = "0.7"
nalgebra = { version = "0.31.0", optional = true }
parking_lot = { version = "0.12", features = ["send_guard"] }
smallvec = "1.8"
thread_local = "1.1"
[target.'cfg(target_os = "ios")'.dependencies]
objc = "0.2.5"

View File

@ -30,6 +30,7 @@ use crate::{
use crossbeam_queue::SegQueue;
use smallvec::SmallVec;
use std::{cell::UnsafeCell, marker::PhantomData, mem::ManuallyDrop, sync::Arc, vec::IntoIter};
use thread_local::ThreadLocal;
/// Types that manage the memory of command buffers.
///
@ -100,9 +101,20 @@ pub unsafe trait CommandBufferAlloc: DeviceOwned + Send + Sync + 'static {
/// Standard implementation of a command buffer allocator.
///
/// A thread can have as many `StandardCommandBufferAllocator`s as needed, but they can't be shared
/// between threads. This is done so that there are no locks involved when creating command
/// buffers. You are encouraged to create one allocator per frame in flight per thread.
/// The intended way to use this allocator is to have one that is used globally for the duration of
/// the program, in order to avoid creating and destroying [`CommandPool`]s, as that is expensive.
///
/// Internally, this allocator keeps one `CommandPool` per queue family index per thread, using
/// Thread-Local Storage. When a thread first allocates, an entry is reserved for it in the TLS.
/// After a thread exits and the allocator wasn't dropped yet, its entry is freed, but the pools
/// it used are not dropped. The next time a new thread allocates for the first time, the entry is
/// reused along with the pools. If all threads drop their reference to the allocator, all entries
/// along with the allocator are dropped, even if the threads didn't exit yet, which is why you
/// should keep the allocator alive for as long as you need to allocate so that the pools can keep
/// being reused.
///
/// This allocator only needs to lock when a thread first allocates or when a thread that
/// previously allocated exits. In all other cases, allocation is lock-free.
///
/// Command buffers can't be moved between threads during the building process, but finished command
/// buffers can. When a command buffer is dropped, it is returned back to the pool for reuse.
@ -110,21 +122,28 @@ pub unsafe trait CommandBufferAlloc: DeviceOwned + Send + Sync + 'static {
pub struct StandardCommandBufferAllocator {
device: Arc<Device>,
/// Each queue family index points directly to its pool.
pools: SmallVec<[UnsafeCell<Option<Arc<Pool>>>; 8]>,
pools: ThreadLocal<SmallVec<[UnsafeCell<Option<Pool>>; 8]>>,
}
#[derive(Debug)]
struct Pool {
inner: Arc<PoolInner>,
}
// This is needed because of the blanket impl of `Send` on `Arc<T>`, which requires that `T` is
// `Send + Sync`. `PoolInner` is `Send + !Sync` because `CommandPool` is `!Sync`. That's fine
// however because we never access the `CommandPool` concurrently, only drop it once the `Arc`
// containing it is dropped.
unsafe impl Send for Pool {}
impl StandardCommandBufferAllocator {
/// Creates a new `StandardCommandBufferAllocator`.
#[inline]
pub fn new(device: Arc<Device>) -> Self {
let pools = device
.physical_device()
.queue_family_properties()
.iter()
.map(|_| UnsafeCell::new(None))
.collect();
StandardCommandBufferAllocator { device, pools }
StandardCommandBufferAllocator {
device,
pools: ThreadLocal::new(),
}
}
}
@ -135,6 +154,10 @@ unsafe impl CommandBufferAllocator for StandardCommandBufferAllocator {
type Alloc = StandardCommandBufferAlloc;
/// Allocates command buffers.
///
/// Returns an iterator that contains the requested amount of allocated command buffers.
///
/// # Panics
///
/// - Panics if the queue family index is not active on the device.
@ -151,12 +174,26 @@ unsafe impl CommandBufferAllocator for StandardCommandBufferAllocator {
.active_queue_family_indices()
.contains(&queue_family_index));
let pool = unsafe { &mut *self.pools[queue_family_index as usize].get() };
let pools = self.pools.get_or(|| {
self.device
.physical_device()
.queue_family_properties()
.iter()
.map(|_| UnsafeCell::new(None))
.collect()
});
let pool = unsafe { &mut *pools[queue_family_index as usize].get() };
if pool.is_none() {
*pool = Some(Pool::new(self.device.clone(), queue_family_index)?);
*pool = Some(Pool {
inner: PoolInner::new(self.device.clone(), queue_family_index)?,
});
}
pool.as_ref().unwrap().allocate(level, command_buffer_count)
pool.as_ref()
.unwrap()
.inner
.allocate(level, command_buffer_count)
}
}
@ -168,7 +205,7 @@ unsafe impl DeviceOwned for StandardCommandBufferAllocator {
}
#[derive(Debug)]
struct Pool {
struct PoolInner {
// The Vulkan pool specific to a device's queue family.
inner: CommandPool,
// List of existing primary command buffers that are available for reuse.
@ -177,7 +214,7 @@ struct Pool {
secondary_pool: SegQueue<CommandPoolAlloc>,
}
impl Pool {
impl PoolInner {
fn new(device: Arc<Device>, queue_family_index: u32) -> Result<Arc<Self>, OomError> {
CommandPool::new(
device,
@ -188,7 +225,7 @@ impl Pool {
},
)
.map(|inner| {
Arc::new(Pool {
Arc::new(PoolInner {
inner,
primary_pool: Default::default(),
secondary_pool: Default::default(),
@ -224,7 +261,7 @@ impl Pool {
cmd: ManuallyDrop::new(cmd),
pool: self.clone(),
},
dummy_avoid_send_sync: PhantomData,
_marker: PhantomData,
});
} else {
break;
@ -249,7 +286,7 @@ impl Pool {
cmd: ManuallyDrop::new(cmd),
pool: self.clone(),
},
dummy_avoid_send_sync: PhantomData,
_marker: PhantomData,
});
}
}
@ -267,7 +304,7 @@ pub struct StandardCommandBufferBuilderAlloc {
// Therefore we just share the structs.
inner: StandardCommandBufferAlloc,
// Unimplemented `Send` and `Sync` from the builder.
dummy_avoid_send_sync: PhantomData<*const u8>,
_marker: PhantomData<*const ()>,
}
unsafe impl CommandBufferBuilderAlloc for StandardCommandBufferBuilderAlloc {
@ -301,7 +338,7 @@ pub struct StandardCommandBufferAlloc {
// The actual command buffer. Extracted in the `Drop` implementation.
cmd: ManuallyDrop<CommandPoolAlloc>,
// We hold a reference to the command pool for our destructor.
pool: Arc<Pool>,
pool: Arc<PoolInner>,
}
unsafe impl Send for StandardCommandBufferAlloc {}
@ -340,10 +377,9 @@ impl Drop for StandardCommandBufferAlloc {
#[cfg(test)]
mod tests {
use super::{
CommandBufferAllocator, CommandBufferBuilderAlloc, StandardCommandBufferAllocator,
};
use crate::{command_buffer::CommandBufferLevel, VulkanObject};
use super::*;
use crate::VulkanObject;
use std::thread;
#[test]
fn reuse_command_buffers() {
@ -366,4 +402,36 @@ mod tests {
.unwrap();
assert_eq!(raw, cb2.inner().handle());
}
#[test]
fn threads_use_different_pools() {
let (device, queue) = gfx_dev_and_queue!();
let allocator = StandardCommandBufferAllocator::new(device);
let pool1 = allocator
.allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 1)
.unwrap()
.next()
.unwrap()
.into_alloc()
.pool
.inner
.handle();
thread::spawn(move || {
let pool2 = allocator
.allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 1)
.unwrap()
.next()
.unwrap()
.into_alloc()
.pool
.inner
.handle();
assert_ne!(pool1, pool2);
})
.join()
.unwrap();
}
}

View File

@ -30,6 +30,7 @@ use crate::{
};
use ahash::HashMap;
use std::{cell::UnsafeCell, sync::Arc};
use thread_local::ThreadLocal;
/// Types that manage the memory of descriptor sets.
///
@ -69,12 +70,26 @@ pub trait DescriptorSetAlloc: Send + Sync {
/// Standard implementation of a descriptor set allocator.
///
/// Internally, this implementation uses one [`SingleLayoutDescriptorSetPool`] /
/// [`SingleLayoutVariableDescriptorSetPool`] per descriptor set layout.
/// The intended way to use this allocator is to have one that is used globally for the duration of
/// the program, in order to avoid creating and destroying [`DescriptorPool`]s, as that is
/// expensive.
///
/// Internally, this allocator uses one [`SingleLayoutDescriptorSetPool`] /
/// [`SingleLayoutVariableDescriptorSetPool`] per descriptor set layout per thread, using
/// Thread-Local Storage. When a thread first allocates, an entry is reserved for it in the TLS.
/// After a thread exits and the allocator wasn't dropped yet, its entry is freed, but the pools
/// it used are not dropped. The next time a new thread allocates for the first time, the entry is
/// reused along with the pools. If all threads drop their reference to the allocator, all entries
/// along with the allocator are dropped, even if the threads didn't exit yet, which is why you
/// should keep the allocator alive for as long as you need to allocate so that the pools can keep
/// being reused.
///
/// This allocator only needs to lock when a thread first allocates or when a thread that
/// previously allocated exits. In all other cases, allocation is lock-free.
#[derive(Debug)]
pub struct StandardDescriptorSetAllocator {
device: Arc<Device>,
pools: UnsafeCell<HashMap<Arc<DescriptorSetLayout>, Pool>>,
pools: ThreadLocal<UnsafeCell<HashMap<Arc<DescriptorSetLayout>, Pool>>>,
}
#[derive(Debug)]
@ -89,7 +104,7 @@ impl StandardDescriptorSetAllocator {
pub fn new(device: Arc<Device>) -> StandardDescriptorSetAllocator {
StandardDescriptorSetAllocator {
device,
pools: UnsafeCell::new(HashMap::default()),
pools: ThreadLocal::new(),
}
}
}
@ -97,6 +112,14 @@ impl StandardDescriptorSetAllocator {
unsafe impl DescriptorSetAllocator for StandardDescriptorSetAllocator {
type Alloc = StandardDescriptorSetAlloc;
/// Allocates a descriptor set.
///
/// # Panics
///
/// - Panics if the provided `layout` is for push descriptors rather than regular descriptor
/// sets.
/// - Panics if the provided `variable_descriptor_count` is greater than the maximum number of
/// variable count descriptors in the set.
#[inline]
fn allocate(
&self,
@ -119,7 +142,8 @@ unsafe impl DescriptorSetAllocator for StandardDescriptorSetAllocator {
max_count,
);
let pools = unsafe { &mut *self.pools.get() };
let pools = self.pools.get_or(|| UnsafeCell::new(HashMap::default()));
let pools = unsafe { &mut *pools.get() };
// We do this instead of using `HashMap::entry` directly because that would involve cloning
// an `Arc` every time. `hash_raw_entry` is still not stabilized >:(
@ -181,3 +205,57 @@ impl DescriptorSetAlloc for StandardDescriptorSetAlloc {
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
descriptor_set::layout::{
DescriptorSetLayoutBinding, DescriptorSetLayoutCreateInfo, DescriptorType,
},
shader::ShaderStages,
VulkanObject,
};
use std::thread;
#[test]
fn threads_use_different_pools() {
let (device, _) = gfx_dev_and_queue!();
let layout = DescriptorSetLayout::new(
device.clone(),
DescriptorSetLayoutCreateInfo {
bindings: [(
0,
DescriptorSetLayoutBinding {
stages: ShaderStages::all_graphics(),
..DescriptorSetLayoutBinding::descriptor_type(DescriptorType::UniformBuffer)
},
)]
.into(),
..Default::default()
},
)
.unwrap();
let allocator = StandardDescriptorSetAllocator::new(device);
let pool1 = if let PoolAlloc::Fixed(alloc) = allocator.allocate(&layout, 0).unwrap().inner {
alloc.pool().handle()
} else {
unreachable!()
};
thread::spawn(move || {
let pool2 =
if let PoolAlloc::Fixed(alloc) = allocator.allocate(&layout, 0).unwrap().inner {
alloc.pool().handle()
} else {
unreachable!()
};
assert_ne!(pool1, pool2);
})
.join()
.unwrap();
}
}

View File

@ -52,8 +52,9 @@ pub struct SingleLayoutDescriptorSetPool {
layout: Arc<DescriptorSetLayout>,
}
// This is needed because of the blanket impl on `Arc<T>`, which requires that `T` is `Send + Sync`.
// `SingleLayoutPool` is `Send + !Sync`.
// This is needed because of the blanket impl of `Send` on `Arc<T>`, which requires that `T` is
// `Send + Sync`. `SingleLayoutPool` is `Send + !Sync` because `DescriptorPool` is `!Sync`. That's
// fine however because we never access the `DescriptorPool`.
unsafe impl Send for SingleLayoutDescriptorSetPool {}
impl SingleLayoutDescriptorSetPool {
@ -191,6 +192,13 @@ pub(crate) struct SingleLayoutPoolAlloc {
pool: Arc<SingleLayoutPool>,
}
impl SingleLayoutPoolAlloc {
#[cfg(test)]
pub(crate) fn pool(&self) -> &DescriptorPool {
&self.pool._inner
}
}
// This is required for the same reason as for `SingleLayoutDescriptorSetPool`.
unsafe impl Send for SingleLayoutPoolAlloc {}
// `DescriptorPool` is `!Sync`, but we never access it, only keep it alive.
@ -278,8 +286,10 @@ pub struct SingleLayoutVariableDescriptorSetPool {
allocated_sets: Cell<usize>,
}
// This is needed because of the blanket impl on `Arc<T>`, which requires that `T` is `Send + Sync`.
// `SingleLayoutVariablePool` is `Send + !Sync`.
// This is needed because of the blanket impl of `Send` on `Arc<T>`, which requires that `T` is
// `Send + Sync`. `SingleLayoutVariablePool` is `Send + !Sync` because `DescriptorPool` is `!Sync`.
// That's fine however because we never access the `DescriptorPool` concurrently, only drop it once
// the `Arc` containing it is dropped.
unsafe impl Send for SingleLayoutVariableDescriptorSetPool {}
impl SingleLayoutVariableDescriptorSetPool {