From 34b709547fe38c218a4f43b950baaebf4167734b Mon Sep 17 00:00:00 2001 From: marc0246 <40955683+marc0246@users.noreply.github.com> Date: Wed, 26 Oct 2022 16:25:01 +0200 Subject: [PATCH] Memory allocation revamp (#1997) * Add suballocators * Add tests * Retroactively abort `PoolAllocatorCreateInfo` * Use const generic for `PoolAllocator`'s block size * Move `new` and `try_into_region` to `Suballocator` * Move `allocate_unchecked` to `Suballocator` * Fix constructor visibility * Move `free_size` to `Suballocator` * Small fixes * Merge `BumpAllocator` and `SyncBumpAllocator` * Restrict `AllocParent::None` to tests * Rewording * Add dedicated allocations * Add `Suballocator::cleanup` * Make `free_size`s lock-free * Add `Suballocator::largest_free_chunk` * Add `ArrayVec` * Remove useless `unsafe` * Add `MemoryAllocator` * Add `GenericMemoryAllocator` * Small fixes * Retroactively abort `largest_free_chunk` * Small docs adjustments * Rearrange * Add `MemoryAlloc::mapped_ptr` * Fix oopsie * Add support for non-coherent mapped memory * Add `DeviceOwned` subtrait to `Suballocator` * Move granularities to suballocators, fix tests * Add cache control * Fix oopsie where alignment of 0 is possible * Store `Arc` in suballocators * Add `MemoryAllocator::create_{buffer, image}` * Remove `MemoryPool` * Fix examples * Remove `MemoryAlloc::{memory, memory_type_index}` * Minor improvement to `AllocationCreationError` * Add some example docs * Add support for external memory * Swicheroo * Small fix * Shorten sm names, cache atom size in suballocators * Add config for allocation type to generic allocatr * Engrish * Fix a big oopsie * Spliteroo * Inglisch --- examples/src/bin/basic-compute-shader.rs | 4 +- examples/src/bin/buffer-pool.rs | 5 +- examples/src/bin/debug.rs | 5 +- .../deferred/frame/ambient_lighting_system.rs | 4 +- .../frame/directional_lighting_system.rs | 4 +- .../deferred/frame/point_lighting_system.rs | 4 +- examples/src/bin/deferred/frame/system.rs | 19 +- examples/src/bin/deferred/main.rs | 6 +- .../src/bin/deferred/triangle_draw_system.rs | 4 +- examples/src/bin/dynamic-buffers.rs | 6 +- examples/src/bin/dynamic-local-size.rs | 6 +- examples/src/bin/gl-interop.rs | 10 +- examples/src/bin/image-self-copy-blit/main.rs | 9 +- examples/src/bin/image/main.rs | 6 +- examples/src/bin/immutable-sampler/main.rs | 6 +- examples/src/bin/indirect.rs | 9 +- examples/src/bin/instancing.rs | 7 +- examples/src/bin/interactive_fractal/app.rs | 6 + .../fractal_compute_pipeline.rs | 8 +- .../pixels_draw_pipeline.rs | 6 +- .../interactive_fractal/place_over_frame.rs | 7 +- examples/src/bin/msaa-renderpass.rs | 13 +- examples/src/bin/multi-window.rs | 5 +- .../src/bin/multi_window_game_of_life/app.rs | 4 + .../multi_window_game_of_life/game_of_life.rs | 14 +- .../multi_window_game_of_life/pixels_draw.rs | 6 +- .../multi_window_game_of_life/render_pass.rs | 7 +- examples/src/bin/multiview.rs | 11 +- examples/src/bin/occlusion-query.rs | 26 +- examples/src/bin/push-constants.rs | 4 +- examples/src/bin/push-descriptors/main.rs | 6 +- examples/src/bin/runtime-shader/main.rs | 5 +- examples/src/bin/runtime_array/main.rs | 7 +- examples/src/bin/self-copy-buffer.rs | 4 +- examples/src/bin/shader-include/main.rs | 4 +- examples/src/bin/shader-types-sharing.rs | 4 +- examples/src/bin/simple-particles.rs | 6 +- examples/src/bin/specialization-constants.rs | 4 +- examples/src/bin/teapot/main.rs | 25 +- examples/src/bin/tessellation.rs | 5 +- examples/src/bin/texture_array/main.rs | 6 +- examples/src/bin/triangle-v1_3.rs | 5 +- examples/src/bin/triangle.rs | 5 +- vulkano-util/src/context.rs | 11 + vulkano-util/src/renderer.rs | 5 + vulkano-util/src/window.rs | 1 + vulkano/src/buffer/cpu_access.rs | 253 +- vulkano/src/buffer/cpu_pool.rs | 307 +- vulkano/src/buffer/device_local.rs | 347 +- vulkano/src/buffer/sys.rs | 15 +- vulkano/src/buffer/view.rs | 56 +- vulkano/src/command_buffer/auto.rs | 46 +- vulkano/src/command_buffer/synced/mod.rs | 27 +- vulkano/src/device/mod.rs | 24 +- vulkano/src/image/attachment.rs | 304 +- vulkano/src/image/immutable.rs | 162 +- vulkano/src/image/mod.rs | 14 +- vulkano/src/image/storage.rs | 216 +- vulkano/src/image/sys.rs | 22 +- vulkano/src/memory/allocator/mod.rs | 1593 +++++++++ vulkano/src/memory/allocator/suballocator.rs | 3090 +++++++++++++++++ vulkano/src/memory/mod.rs | 12 +- vulkano/src/memory/pool/host_visible.rs | 181 - vulkano/src/memory/pool/mod.rs | 322 -- vulkano/src/memory/pool/non_host_visible.rs | 169 - vulkano/src/memory/pool/pool.rs | 206 -- vulkano/src/pipeline/compute.rs | 4 +- vulkano/src/render_pass/framebuffer.rs | 26 +- vulkano/src/sampler/ycbcr.rs | 2 + 69 files changed, 5837 insertions(+), 1895 deletions(-) create mode 100644 vulkano/src/memory/allocator/mod.rs create mode 100644 vulkano/src/memory/allocator/suballocator.rs delete mode 100644 vulkano/src/memory/pool/host_visible.rs delete mode 100644 vulkano/src/memory/pool/mod.rs delete mode 100644 vulkano/src/memory/pool/non_host_visible.rs delete mode 100644 vulkano/src/memory/pool/pool.rs diff --git a/examples/src/bin/basic-compute-shader.rs b/examples/src/bin/basic-compute-shader.rs index 39ed94d4..17cc7425 100644 --- a/examples/src/bin/basic-compute-shader.rs +++ b/examples/src/bin/basic-compute-shader.rs @@ -25,6 +25,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -144,6 +145,7 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); @@ -153,7 +155,7 @@ fn main() { let data_iter = 0..65536u32; // Builds the buffer and fills it with this iterator. CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/buffer-pool.rs b/examples/src/bin/buffer-pool.rs index 64ed5829..153af157 100644 --- a/examples/src/bin/buffer-pool.rs +++ b/examples/src/bin/buffer-pool.rs @@ -36,6 +36,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -169,8 +170,10 @@ fn main() { .unwrap() }; + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); + // Vertex Buffer Pool - let buffer_pool: CpuBufferPool = CpuBufferPool::vertex_buffer(device.clone()); + let buffer_pool: CpuBufferPool = CpuBufferPool::vertex_buffer(memory_allocator); mod vs { vulkano_shaders::shader! { diff --git a/examples/src/bin/debug.rs b/examples/src/bin/debug.rs index 7e798365..459eb56e 100644 --- a/examples/src/bin/debug.rs +++ b/examples/src/bin/debug.rs @@ -24,6 +24,7 @@ use vulkano::{ }, Instance, InstanceCreateInfo, InstanceExtensions, }, + memory::allocator::StandardMemoryAllocator, VulkanLibrary, }; @@ -175,7 +176,7 @@ fn main() { .expect("failed to create device"); let queue = queues.next().unwrap(); - let command_buffer_allocator = StandardCommandBufferAllocator::new(device); + let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let mut command_buffer_builder = AutoCommandBufferBuilder::primary( &command_buffer_allocator, queue.queue_family_index(), @@ -191,7 +192,9 @@ fn main() { array_layers: 1, }; static DATA: [[u8; 4]; 4096 * 4096] = [[0; 4]; 4096 * 4096]; + let memory_allocator = StandardMemoryAllocator::new_default(device); let _ = ImmutableImage::from_iter( + &memory_allocator, DATA.iter().copied(), dimensions, MipmapsCount::One, diff --git a/examples/src/bin/deferred/frame/ambient_lighting_system.rs b/examples/src/bin/deferred/frame/ambient_lighting_system.rs index 1c86a470..1dca0b55 100644 --- a/examples/src/bin/deferred/frame/ambient_lighting_system.rs +++ b/examples/src/bin/deferred/frame/ambient_lighting_system.rs @@ -21,6 +21,7 @@ use vulkano::{ device::Queue, image::ImageViewAbstract, impl_vertex, + memory::allocator::MemoryAllocator, pipeline::{ graphics::{ color_blend::{AttachmentBlend, BlendFactor, BlendOp, ColorBlendState}, @@ -48,6 +49,7 @@ impl AmbientLightingSystem { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> AmbientLightingSystem { @@ -66,7 +68,7 @@ impl AmbientLightingSystem { ]; let vertex_buffer = { CpuAccessibleBuffer::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/deferred/frame/directional_lighting_system.rs b/examples/src/bin/deferred/frame/directional_lighting_system.rs index eba9c046..efd2fd90 100644 --- a/examples/src/bin/deferred/frame/directional_lighting_system.rs +++ b/examples/src/bin/deferred/frame/directional_lighting_system.rs @@ -22,6 +22,7 @@ use vulkano::{ device::Queue, image::ImageViewAbstract, impl_vertex, + memory::allocator::MemoryAllocator, pipeline::{ graphics::{ color_blend::{AttachmentBlend, BlendFactor, BlendOp, ColorBlendState}, @@ -49,6 +50,7 @@ impl DirectionalLightingSystem { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> DirectionalLightingSystem { @@ -67,7 +69,7 @@ impl DirectionalLightingSystem { ]; let vertex_buffer = { CpuAccessibleBuffer::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/deferred/frame/point_lighting_system.rs b/examples/src/bin/deferred/frame/point_lighting_system.rs index b2f5062f..5337f19c 100644 --- a/examples/src/bin/deferred/frame/point_lighting_system.rs +++ b/examples/src/bin/deferred/frame/point_lighting_system.rs @@ -22,6 +22,7 @@ use vulkano::{ device::Queue, image::ImageViewAbstract, impl_vertex, + memory::allocator::MemoryAllocator, pipeline::{ graphics::{ color_blend::{AttachmentBlend, BlendFactor, BlendOp, ColorBlendState}, @@ -48,6 +49,7 @@ impl PointLightingSystem { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> PointLightingSystem { @@ -66,7 +68,7 @@ impl PointLightingSystem { ]; let vertex_buffer = { CpuAccessibleBuffer::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/deferred/frame/system.rs b/examples/src/bin/deferred/frame/system.rs index bdf8cc1b..d09811c1 100644 --- a/examples/src/bin/deferred/frame/system.rs +++ b/examples/src/bin/deferred/frame/system.rs @@ -24,6 +24,7 @@ use vulkano::{ device::Queue, format::Format, image::{view::ImageView, AttachmentImage, ImageAccess, ImageUsage, ImageViewAbstract}, + memory::allocator::StandardMemoryAllocator, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, sync::GpuFuture, }; @@ -38,6 +39,7 @@ pub struct FrameSystem { // in of a change in the dimensions. render_pass: Arc, + memory_allocator: Arc, command_buffer_allocator: Rc, // Intermediate render target that will contain the albedo of each pixel of the scene. @@ -71,6 +73,7 @@ impl FrameSystem { pub fn new( gfx_queue: Arc, final_output_format: Format, + memory_allocator: Arc, command_buffer_allocator: Rc, ) -> FrameSystem { // Creating the render pass. @@ -152,7 +155,7 @@ impl FrameSystem { // These images will be replaced the first time we call `frame()`. let diffuse_buffer = ImageView::new_default( AttachmentImage::with_usage( - gfx_queue.device().clone(), + &*memory_allocator, [1, 1], Format::A2B10G10R10_UNORM_PACK32, ImageUsage { @@ -166,7 +169,7 @@ impl FrameSystem { .unwrap(); let normals_buffer = ImageView::new_default( AttachmentImage::with_usage( - gfx_queue.device().clone(), + &*memory_allocator, [1, 1], Format::R16G16B16A16_SFLOAT, ImageUsage { @@ -180,7 +183,7 @@ impl FrameSystem { .unwrap(); let depth_buffer = ImageView::new_default( AttachmentImage::with_usage( - gfx_queue.device().clone(), + &*memory_allocator, [1, 1], Format::D16_UNORM, ImageUsage { @@ -203,18 +206,21 @@ impl FrameSystem { let ambient_lighting_system = AmbientLightingSystem::new( gfx_queue.clone(), lighting_subpass.clone(), + &*memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator.clone(), ); let directional_lighting_system = DirectionalLightingSystem::new( gfx_queue.clone(), lighting_subpass.clone(), + &*memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator.clone(), ); let point_lighting_system = PointLightingSystem::new( gfx_queue.clone(), lighting_subpass, + &*memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator, ); @@ -222,6 +228,7 @@ impl FrameSystem { FrameSystem { gfx_queue, render_pass, + memory_allocator, command_buffer_allocator, diffuse_buffer, normals_buffer, @@ -270,7 +277,7 @@ impl FrameSystem { // render pass their content becomes undefined. self.diffuse_buffer = ImageView::new_default( AttachmentImage::with_usage( - self.gfx_queue.device().clone(), + &*self.memory_allocator, img_dims, Format::A2B10G10R10_UNORM_PACK32, ImageUsage { @@ -284,7 +291,7 @@ impl FrameSystem { .unwrap(); self.normals_buffer = ImageView::new_default( AttachmentImage::with_usage( - self.gfx_queue.device().clone(), + &*self.memory_allocator, img_dims, Format::R16G16B16A16_SFLOAT, ImageUsage { @@ -298,7 +305,7 @@ impl FrameSystem { .unwrap(); self.depth_buffer = ImageView::new_default( AttachmentImage::with_usage( - self.gfx_queue.device().clone(), + &*self.memory_allocator, img_dims, Format::D16_UNORM, ImageUsage { diff --git a/examples/src/bin/deferred/main.rs b/examples/src/bin/deferred/main.rs index 6002b494..78231085 100644 --- a/examples/src/bin/deferred/main.rs +++ b/examples/src/bin/deferred/main.rs @@ -30,7 +30,7 @@ use crate::{ triangle_draw_system::TriangleDrawSystem, }; use cgmath::{Matrix4, SquareMatrix, Vector3}; -use std::rc::Rc; +use std::{rc::Rc, sync::Arc}; use vulkano::{ command_buffer::allocator::StandardCommandBufferAllocator, device::{ @@ -38,6 +38,7 @@ use vulkano::{ }, image::{view::ImageView, ImageUsage}, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, swapchain::{ acquire_next_image, AcquireError, Swapchain, SwapchainCreateInfo, SwapchainCreationError, SwapchainPresentInfo, @@ -164,17 +165,20 @@ fn main() { (swapchain, images) }; + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new(device.clone())); // Here is the basic initialization for the deferred system. let mut frame_system = FrameSystem::new( queue.clone(), swapchain.image_format(), + memory_allocator.clone(), command_buffer_allocator.clone(), ); let triangle_draw_system = TriangleDrawSystem::new( queue.clone(), frame_system.deferred_subpass(), + &memory_allocator, command_buffer_allocator, ); diff --git a/examples/src/bin/deferred/triangle_draw_system.rs b/examples/src/bin/deferred/triangle_draw_system.rs index 37467576..25e53892 100644 --- a/examples/src/bin/deferred/triangle_draw_system.rs +++ b/examples/src/bin/deferred/triangle_draw_system.rs @@ -17,6 +17,7 @@ use vulkano::{ }, device::Queue, impl_vertex, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ depth_stencil::DepthStencilState, @@ -42,6 +43,7 @@ impl TriangleDrawSystem { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &StandardMemoryAllocator, command_buffer_allocator: Rc, ) -> TriangleDrawSystem { let vertices = [ @@ -57,7 +59,7 @@ impl TriangleDrawSystem { ]; let vertex_buffer = { CpuAccessibleBuffer::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/dynamic-buffers.rs b/examples/src/bin/dynamic-buffers.rs index a2b25525..179ce495 100644 --- a/examples/src/bin/dynamic-buffers.rs +++ b/examples/src/bin/dynamic-buffers.rs @@ -28,6 +28,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -131,6 +132,7 @@ fn main() { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); @@ -164,7 +166,7 @@ fn main() { }; let input_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { uniform_buffer: true, ..BufferUsage::empty() @@ -175,7 +177,7 @@ fn main() { .unwrap(); let output_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/dynamic-local-size.rs b/examples/src/bin/dynamic-local-size.rs index b1bf2a37..43ce2806 100644 --- a/examples/src/bin/dynamic-local-size.rs +++ b/examples/src/bin/dynamic-local-size.rs @@ -30,6 +30,7 @@ use vulkano::{ format::Format, image::{view::ImageView, ImageDimensions, StorageImage}, instance::{Instance, InstanceCreateInfo, InstanceExtensions}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -198,11 +199,12 @@ fn main() { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let image = StorageImage::new( - device.clone(), + &memory_allocator, ImageDimensions::Dim2d { width: 1024, height: 1024, @@ -223,7 +225,7 @@ fn main() { .unwrap(); let buf = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() diff --git a/examples/src/bin/gl-interop.rs b/examples/src/bin/gl-interop.rs index f1991c66..89a5d160 100644 --- a/examples/src/bin/gl-interop.rs +++ b/examples/src/bin/gl-interop.rs @@ -35,6 +35,7 @@ mod linux { debug::{DebugUtilsMessenger, DebugUtilsMessengerCreateInfo}, Instance, InstanceCreateInfo, InstanceExtensions, }, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -94,11 +95,12 @@ mod linux { mut framebuffers, sampler, pipeline, + memory_allocator, vertex_buffer, ) = vk_setup(display, &event_loop); let image = StorageImage::new_with_exportable_fd( - device.clone(), + &memory_allocator, vulkano::image::ImageDimensions::Dim2d { width: 200, height: 200, @@ -416,6 +418,7 @@ mod linux { Vec>, Arc, Arc, + StandardMemoryAllocator, Arc>, ) { let library = VulkanLibrary::new().unwrap(); @@ -561,6 +564,8 @@ mod linux { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + let vertices = [ Vertex { position: [-0.5, -0.5], @@ -576,7 +581,7 @@ mod linux { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -652,6 +657,7 @@ mod linux { framebuffers, sampler, pipeline, + memory_allocator, vertex_buffer, ) } diff --git a/examples/src/bin/image-self-copy-blit/main.rs b/examples/src/bin/image-self-copy-blit/main.rs index a6c92980..beea17af 100644 --- a/examples/src/bin/image-self-copy-blit/main.rs +++ b/examples/src/bin/image-self-copy-blit/main.rs @@ -30,6 +30,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -160,6 +161,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -182,7 +185,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -237,7 +240,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = StorageImage::new( - device.clone(), + &memory_allocator, dimensions, Format::R8G8B8A8_UNORM, [queue.queue_family_index()], @@ -245,7 +248,7 @@ fn main() { .unwrap(); let buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() diff --git a/examples/src/bin/image/main.rs b/examples/src/bin/image/main.rs index f7e76d0c..0b63e3ce 100644 --- a/examples/src/bin/image/main.rs +++ b/examples/src/bin/image/main.rs @@ -28,6 +28,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -158,6 +159,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -180,7 +183,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -234,6 +237,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = ImmutableImage::from_iter( + &memory_allocator, image_data, dimensions, MipmapsCount::One, diff --git a/examples/src/bin/immutable-sampler/main.rs b/examples/src/bin/immutable-sampler/main.rs index decbd57c..3d33b8e7 100644 --- a/examples/src/bin/immutable-sampler/main.rs +++ b/examples/src/bin/immutable-sampler/main.rs @@ -37,6 +37,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -164,6 +165,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -186,7 +189,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -240,6 +243,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = ImmutableImage::from_iter( + &memory_allocator, image_data, dimensions, MipmapsCount::One, diff --git a/examples/src/bin/indirect.rs b/examples/src/bin/indirect.rs index 78e12369..52f2fc36 100644 --- a/examples/src/bin/indirect.rs +++ b/examples/src/bin/indirect.rs @@ -41,6 +41,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::{MemoryUsage, StandardMemoryAllocator}, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -254,23 +255,27 @@ fn main() { let fs = fs::load(device.clone()).unwrap(); let cs = cs::load(device.clone()).unwrap(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); + // Each frame we generate a new set of vertices and each frame we need a new DrawIndirectCommand struct to // set the number of vertices to draw let indirect_args_pool: CpuBufferPool = CpuBufferPool::new( - device.clone(), + memory_allocator.clone(), BufferUsage { indirect_buffer: true, storage_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ); let vertex_pool: CpuBufferPool = CpuBufferPool::new( - device.clone(), + memory_allocator, BufferUsage { storage_buffer: true, vertex_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ); let compute_pipeline = ComputePipeline::new( diff --git a/examples/src/bin/instancing.rs b/examples/src/bin/instancing.rs index 69f00171..7bc17ca4 100644 --- a/examples/src/bin/instancing.rs +++ b/examples/src/bin/instancing.rs @@ -26,6 +26,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -175,6 +176,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + // We now create a buffer that will store the shape of our triangle. // This triangle is identical to the one in the `triangle.rs` example. let vertices = [ @@ -190,7 +193,7 @@ fn main() { ]; let vertex_buffer = { CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -225,7 +228,7 @@ fn main() { data }; let instance_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/interactive_fractal/app.rs b/examples/src/bin/interactive_fractal/app.rs index ed05ff62..c30f230f 100644 --- a/examples/src/bin/interactive_fractal/app.rs +++ b/examples/src/bin/interactive_fractal/app.rs @@ -15,6 +15,7 @@ use std::{rc::Rc, sync::Arc}; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; use vulkano::device::Queue; +use vulkano::memory::allocator::StandardMemoryAllocator; use vulkano::sync::GpuFuture; use vulkano_util::renderer::{DeviceImageView, VulkanoWindowRenderer}; use vulkano_util::window::WindowDescriptor; @@ -60,6 +61,9 @@ pub struct FractalApp { impl FractalApp { pub fn new(gfx_queue: Arc, image_format: vulkano::format::Format) -> FractalApp { + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default( + gfx_queue.device().clone(), + )); let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new( gfx_queue.device().clone(), )); @@ -70,11 +74,13 @@ impl FractalApp { FractalApp { fractal_pipeline: FractalComputePipeline::new( gfx_queue.clone(), + memory_allocator.clone(), command_buffer_allocator.clone(), descriptor_set_allocator.clone(), ), place_over_frame: RenderPassPlaceOverFrame::new( gfx_queue, + &*memory_allocator, command_buffer_allocator, descriptor_set_allocator, image_format, diff --git a/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs b/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs index 7df0e59b..1d4c9519 100644 --- a/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs +++ b/examples/src/bin/interactive_fractal/fractal_compute_pipeline.rs @@ -21,6 +21,7 @@ use vulkano::{ }, device::Queue, image::ImageAccess, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::GpuFuture, }; @@ -29,6 +30,7 @@ use vulkano_util::renderer::DeviceImageView; pub struct FractalComputePipeline { queue: Arc, pipeline: Arc, + memory_allocator: Arc, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, palette: Arc>, @@ -39,6 +41,7 @@ pub struct FractalComputePipeline { impl FractalComputePipeline { pub fn new( queue: Arc, + memory_allocator: Arc, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> FractalComputePipeline { @@ -53,7 +56,7 @@ impl FractalComputePipeline { ]; let palette_size = colors.len() as i32; let palette = CpuAccessibleBuffer::from_iter( - queue.device().clone(), + &*memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() @@ -79,6 +82,7 @@ impl FractalComputePipeline { FractalComputePipeline { queue, pipeline, + memory_allocator, command_buffer_allocator, descriptor_set_allocator, palette, @@ -98,7 +102,7 @@ impl FractalComputePipeline { colors.push([r, g, b, a]); } self.palette = CpuAccessibleBuffer::from_iter( - self.queue.device().clone(), + &*self.memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs b/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs index 6e4e3e9c..d3f3701c 100644 --- a/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs +++ b/examples/src/bin/interactive_fractal/pixels_draw_pipeline.rs @@ -21,6 +21,7 @@ use vulkano::{ device::Queue, image::ImageViewAbstract, impl_vertex, + memory::allocator::MemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -81,12 +82,13 @@ impl PixelsDrawPipeline { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> PixelsDrawPipeline { let (vertices, indices) = textured_quad(2.0, 2.0); let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -96,7 +98,7 @@ impl PixelsDrawPipeline { ) .unwrap(); let index_buffer = CpuAccessibleBuffer::<[u32]>::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { index_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/interactive_fractal/place_over_frame.rs b/examples/src/bin/interactive_fractal/place_over_frame.rs index a068e18b..7432a911 100644 --- a/examples/src/bin/interactive_fractal/place_over_frame.rs +++ b/examples/src/bin/interactive_fractal/place_over_frame.rs @@ -18,6 +18,7 @@ use vulkano::{ device::Queue, format::Format, image::ImageAccess, + memory::allocator::MemoryAllocator, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, sync::GpuFuture, }; @@ -34,6 +35,7 @@ pub struct RenderPassPlaceOverFrame { impl RenderPassPlaceOverFrame { pub fn new( gfx_queue: Arc, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, output_format: Format, @@ -48,8 +50,8 @@ impl RenderPassPlaceOverFrame { } }, pass: { - color: [color], - depth_stencil: {} + color: [color], + depth_stencil: {} } ) .unwrap(); @@ -57,6 +59,7 @@ impl RenderPassPlaceOverFrame { let pixels_draw_pipeline = PixelsDrawPipeline::new( gfx_queue.clone(), subpass, + memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator, ); diff --git a/examples/src/bin/msaa-renderpass.rs b/examples/src/bin/msaa-renderpass.rs index 392de8f4..8373b938 100644 --- a/examples/src/bin/msaa-renderpass.rs +++ b/examples/src/bin/msaa-renderpass.rs @@ -79,6 +79,7 @@ use vulkano::{ image::{view::ImageView, AttachmentImage, ImageDimensions, SampleCount, StorageImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ multisample::MultisampleState, @@ -151,13 +152,15 @@ fn main() { .unwrap(); let queue = queues.next().unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + // Creating our intermediate multisampled image. // // As explained in the introduction, we pass the same dimensions and format as for the final // image. But we also pass the number of samples-per-pixel, which is 4 here. let intermediary = ImageView::new_default( AttachmentImage::transient_multisampled( - device.clone(), + &memory_allocator, [1024, 1024], SampleCount::Sample4, Format::R8G8B8A8_UNORM, @@ -168,7 +171,7 @@ fn main() { // This is the final image that will receive the anti-aliased triangle. let image = StorageImage::new( - device.clone(), + &memory_allocator, ImageDimensions::Dim2d { width: 1024, height: 1024, @@ -284,7 +287,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -314,10 +317,10 @@ fn main() { depth_range: 0.0..1.0, }; - let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); + let command_buffer_allocator = StandardCommandBufferAllocator::new(device); let buf = CpuAccessibleBuffer::from_iter( - device, + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() diff --git a/examples/src/bin/multi-window.rs b/examples/src/bin/multi-window.rs index 4547581b..5a48ff87 100644 --- a/examples/src/bin/multi-window.rs +++ b/examples/src/bin/multi-window.rs @@ -30,6 +30,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -180,6 +181,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -199,7 +202,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/multi_window_game_of_life/app.rs b/examples/src/bin/multi_window_game_of_life/app.rs index 5fd3a6d9..73c9d7e4 100644 --- a/examples/src/bin/multi_window_game_of_life/app.rs +++ b/examples/src/bin/multi_window_game_of_life/app.rs @@ -14,6 +14,7 @@ use crate::{ use std::{collections::HashMap, rc::Rc, sync::Arc}; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; +use vulkano::memory::allocator::StandardMemoryAllocator; use vulkano::{device::Queue, format::Format}; use vulkano_util::context::{VulkanoConfig, VulkanoContext}; use vulkano_util::window::{VulkanoWindows, WindowDescriptor}; @@ -31,6 +32,7 @@ impl RenderPipeline { size: [u32; 2], swapchain_format: Format, ) -> RenderPipeline { + let memory_allocator = StandardMemoryAllocator::new_default(gfx_queue.device().clone()); let command_buffer_allocator = Rc::new(StandardCommandBufferAllocator::new( gfx_queue.device().clone(), )); @@ -41,12 +43,14 @@ impl RenderPipeline { RenderPipeline { compute: GameOfLifeComputePipeline::new( compute_queue, + &memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator.clone(), size, ), place_over_frame: RenderPassPlaceOverFrame::new( gfx_queue, + &memory_allocator, command_buffer_allocator, descriptor_set_allocator, swapchain_format, diff --git a/examples/src/bin/multi_window_game_of_life/game_of_life.rs b/examples/src/bin/multi_window_game_of_life/game_of_life.rs index dd0b2346..afcc7cfa 100644 --- a/examples/src/bin/multi_window_game_of_life/game_of_life.rs +++ b/examples/src/bin/multi_window_game_of_life/game_of_life.rs @@ -13,6 +13,7 @@ use std::{rc::Rc, sync::Arc}; use vulkano::command_buffer::allocator::StandardCommandBufferAllocator; use vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator; use vulkano::image::{ImageUsage, StorageImage}; +use vulkano::memory::allocator::MemoryAllocator; use vulkano::{ buffer::{BufferUsage, CpuAccessibleBuffer}, command_buffer::{AutoCommandBufferBuilder, CommandBufferUsage, PrimaryAutoCommandBuffer}, @@ -40,9 +41,12 @@ pub struct GameOfLifeComputePipeline { image: DeviceImageView, } -fn rand_grid(compute_queue: &Arc, size: [u32; 2]) -> Arc> { +fn rand_grid( + memory_allocator: &impl MemoryAllocator, + size: [u32; 2], +) -> Arc> { CpuAccessibleBuffer::from_iter( - compute_queue.device().clone(), + memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() @@ -58,12 +62,13 @@ fn rand_grid(compute_queue: &Arc, size: [u32; 2]) -> Arc, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, size: [u32; 2], ) -> GameOfLifeComputePipeline { - let life_in = rand_grid(&compute_queue, size); - let life_out = rand_grid(&compute_queue, size); + let life_in = rand_grid(memory_allocator, size); + let life_out = rand_grid(memory_allocator, size); let compute_life_pipeline = { let shader = compute_life_cs::load(compute_queue.device().clone()).unwrap(); @@ -78,6 +83,7 @@ impl GameOfLifeComputePipeline { }; let image = StorageImage::general_purpose_image_view( + memory_allocator, compute_queue.clone(), size, Format::R8G8B8A8_UNORM, diff --git a/examples/src/bin/multi_window_game_of_life/pixels_draw.rs b/examples/src/bin/multi_window_game_of_life/pixels_draw.rs index 6540f8ea..2a18bfe2 100644 --- a/examples/src/bin/multi_window_game_of_life/pixels_draw.rs +++ b/examples/src/bin/multi_window_game_of_life/pixels_draw.rs @@ -21,6 +21,7 @@ use vulkano::{ device::Queue, image::ImageViewAbstract, impl_vertex, + memory::allocator::MemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -81,12 +82,13 @@ impl PixelsDrawPipeline { pub fn new( gfx_queue: Arc, subpass: Subpass, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, ) -> PixelsDrawPipeline { let (vertices, indices) = textured_quad(2.0, 2.0); let vertex_buffer = CpuAccessibleBuffer::<[TexturedVertex]>::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -96,7 +98,7 @@ impl PixelsDrawPipeline { ) .unwrap(); let index_buffer = CpuAccessibleBuffer::<[u32]>::from_iter( - gfx_queue.device().clone(), + memory_allocator, BufferUsage { index_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/multi_window_game_of_life/render_pass.rs b/examples/src/bin/multi_window_game_of_life/render_pass.rs index 31666c97..6e634e6c 100644 --- a/examples/src/bin/multi_window_game_of_life/render_pass.rs +++ b/examples/src/bin/multi_window_game_of_life/render_pass.rs @@ -18,6 +18,7 @@ use vulkano::{ device::Queue, format::Format, image::ImageAccess, + memory::allocator::MemoryAllocator, render_pass::{Framebuffer, FramebufferCreateInfo, RenderPass, Subpass}, sync::GpuFuture, }; @@ -34,6 +35,7 @@ pub struct RenderPassPlaceOverFrame { impl RenderPassPlaceOverFrame { pub fn new( gfx_queue: Arc, + memory_allocator: &impl MemoryAllocator, command_buffer_allocator: Rc, descriptor_set_allocator: Rc, output_format: Format, @@ -48,8 +50,8 @@ impl RenderPassPlaceOverFrame { } }, pass: { - color: [color], - depth_stencil: {} + color: [color], + depth_stencil: {} } ) .unwrap(); @@ -57,6 +59,7 @@ impl RenderPassPlaceOverFrame { let pixels_draw_pipeline = PixelsDrawPipeline::new( gfx_queue.clone(), subpass, + memory_allocator, command_buffer_allocator.clone(), descriptor_set_allocator, ); diff --git a/examples/src/bin/multiview.rs b/examples/src/bin/multiview.rs index 7d971e0b..6c2cef4e 100644 --- a/examples/src/bin/multiview.rs +++ b/examples/src/bin/multiview.rs @@ -32,6 +32,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo, InstanceExtensions}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -129,8 +130,10 @@ fn main() { let queue = queues.next().unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + let image = StorageImage::with_usage( - device.clone(), + &memory_allocator, ImageDimensions::Dim2d { width: 512, height: 512, @@ -168,7 +171,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -281,7 +284,7 @@ fn main() { let create_buffer = || { CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() @@ -351,7 +354,7 @@ fn main() { let command_buffer = builder.build().unwrap(); - let future = sync::now(device.clone()) + let future = sync::now(device) .then_execute(queue, command_buffer) .unwrap() .then_signal_fence_and_flush() diff --git a/examples/src/bin/occlusion-query.rs b/examples/src/bin/occlusion-query.rs index 39eaab59..fd47b359 100644 --- a/examples/src/bin/occlusion-query.rs +++ b/examples/src/bin/occlusion-query.rs @@ -20,13 +20,13 @@ use vulkano::{ RenderPassBeginInfo, SubpassContents, }, device::{ - physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, DeviceOwned, - QueueCreateInfo, + physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, format::Format, image::{view::ImageView, AttachmentImage, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ depth_stencil::DepthStencilState, @@ -154,6 +154,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -209,7 +211,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -325,7 +327,12 @@ fn main() { let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); - let mut framebuffers = window_size_dependent_setup(&images, render_pass.clone(), &mut viewport); + let mut framebuffers = window_size_dependent_setup( + &images, + render_pass.clone(), + &mut viewport, + &memory_allocator, + ); let mut recreate_swapchain = false; let mut previous_frame_end = Some(sync::now(device.clone()).boxed()); @@ -363,8 +370,12 @@ fn main() { }; swapchain = new_swapchain; - framebuffers = - window_size_dependent_setup(&new_images, render_pass.clone(), &mut viewport); + framebuffers = window_size_dependent_setup( + &new_images, + render_pass.clone(), + &mut viewport, + &memory_allocator, + ); recreate_swapchain = false; } @@ -542,13 +553,14 @@ fn window_size_dependent_setup( images: &[Arc], render_pass: Arc, viewport: &mut Viewport, + memory_allocator: &StandardMemoryAllocator, ) -> Vec> { let dimensions = images[0].dimensions().width_height(); viewport.dimensions = [dimensions[0] as f32, dimensions[1] as f32]; let depth_attachment = ImageView::new_default( AttachmentImage::with_usage( - render_pass.device().clone(), + memory_allocator, dimensions, Format::D16_UNORM, ImageUsage { diff --git a/examples/src/bin/push-constants.rs b/examples/src/bin/push-constants.rs index 9d2582da..3855a020 100644 --- a/examples/src/bin/push-constants.rs +++ b/examples/src/bin/push-constants.rs @@ -24,6 +24,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -129,13 +130,14 @@ fn main() { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let data_buffer = { let data_iter = 0..65536u32; CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/push-descriptors/main.rs b/examples/src/bin/push-descriptors/main.rs index 57a06c99..cff97dc4 100644 --- a/examples/src/bin/push-descriptors/main.rs +++ b/examples/src/bin/push-descriptors/main.rs @@ -26,6 +26,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -154,6 +155,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -176,7 +179,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -229,6 +232,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = ImmutableImage::from_iter( + &memory_allocator, image_data, dimensions, MipmapsCount::One, diff --git a/examples/src/bin/runtime-shader/main.rs b/examples/src/bin/runtime-shader/main.rs index 9e1f9119..08f104b1 100644 --- a/examples/src/bin/runtime-shader/main.rs +++ b/examples/src/bin/runtime-shader/main.rs @@ -33,6 +33,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -221,6 +222,8 @@ fn main() { let mut recreate_swapchain = false; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + let vertices = [ Vertex { position: [-1.0, 1.0], @@ -236,7 +239,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/runtime_array/main.rs b/examples/src/bin/runtime_array/main.rs index fe9db23b..94f03907 100644 --- a/examples/src/bin/runtime_array/main.rs +++ b/examples/src/bin/runtime_array/main.rs @@ -33,6 +33,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -170,6 +171,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -242,7 +245,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -296,6 +299,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = ImmutableImage::from_iter( + &memory_allocator, image_data, dimensions, MipmapsCount::One, @@ -323,6 +327,7 @@ fn main() { reader.next_frame(&mut image_data).unwrap(); let image = ImmutableImage::from_iter( + &memory_allocator, image_data, dimensions, MipmapsCount::One, diff --git a/examples/src/bin/self-copy-buffer.rs b/examples/src/bin/self-copy-buffer.rs index 61d12422..120fd023 100644 --- a/examples/src/bin/self-copy-buffer.rs +++ b/examples/src/bin/self-copy-buffer.rs @@ -23,6 +23,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -116,6 +117,7 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); @@ -123,7 +125,7 @@ fn main() { // we intitialize half of the array and leave the other half to 0, we will use copy later to fill it let data_iter = (0..65536u32).map(|n| if n < 65536 / 2 { n } else { 0 }); CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, transfer_src: true, diff --git a/examples/src/bin/shader-include/main.rs b/examples/src/bin/shader-include/main.rs index 3f2197df..d27314b8 100644 --- a/examples/src/bin/shader-include/main.rs +++ b/examples/src/bin/shader-include/main.rs @@ -23,6 +23,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -124,13 +125,14 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let data_buffer = { let data_iter = 0..65536u32; CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/shader-types-sharing.rs b/examples/src/bin/shader-types-sharing.rs index 0a601419..38880222 100644 --- a/examples/src/bin/shader-types-sharing.rs +++ b/examples/src/bin/shader-types-sharing.rs @@ -41,6 +41,7 @@ use vulkano::{ QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -237,6 +238,7 @@ fn main() { future.wait(None).unwrap(); } + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); @@ -244,7 +246,7 @@ fn main() { let data_buffer = { let data_iter = 0..65536u32; CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/simple-particles.rs b/examples/src/bin/simple-particles.rs index fa00f274..23ffc0bc 100644 --- a/examples/src/bin/simple-particles.rs +++ b/examples/src/bin/simple-particles.rs @@ -30,6 +30,7 @@ use vulkano::{ image::{view::ImageView, ImageUsage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::{InputAssemblyState, PrimitiveTopology}, @@ -317,6 +318,7 @@ fn main() { let vs = vs::load(device.clone()).unwrap(); let fs = fs::load(device.clone()).unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); @@ -341,7 +343,7 @@ fn main() { // Create a CPU accessible buffer initialized with the vertex data. let temporary_accessible_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() @@ -353,7 +355,7 @@ fn main() { // Create a buffer array on the GPU with enough space for `PARTICLE_COUNT` number of `Vertex`. let device_local_buffer = DeviceLocalBuffer::<[Vertex]>::array( - device.clone(), + &memory_allocator, PARTICLE_COUNT as vulkano::DeviceSize, BufferUsage { storage_buffer: true, diff --git a/examples/src/bin/specialization-constants.rs b/examples/src/bin/specialization-constants.rs index fd5f90b3..d9153c5f 100644 --- a/examples/src/bin/specialization-constants.rs +++ b/examples/src/bin/specialization-constants.rs @@ -21,6 +21,7 @@ use vulkano::{ physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, }, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, sync::{self, GpuFuture}, VulkanLibrary, @@ -125,13 +126,14 @@ fn main() { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let descriptor_set_allocator = StandardDescriptorSetAllocator::new(device.clone()); let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let data_buffer = { let data_iter = 0..65536u32; CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/teapot/main.rs b/examples/src/bin/teapot/main.rs index 7088609e..e0c68a80 100644 --- a/examples/src/bin/teapot/main.rs +++ b/examples/src/bin/teapot/main.rs @@ -20,11 +20,13 @@ use vulkano::{ allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet, }, device::{ - physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, QueueCreateInfo, + physical::PhysicalDeviceType, Device, DeviceCreateInfo, DeviceExtensions, DeviceOwned, + QueueCreateInfo, }, format::Format, image::{view::ImageView, AttachmentImage, ImageAccess, ImageUsage, SwapchainImage}, instance::{Instance, InstanceCreateInfo}, + memory::allocator::{MemoryUsage, StandardMemoryAllocator}, pipeline::{ graphics::{ depth_stencil::DepthStencilState, @@ -156,8 +158,10 @@ fn main() { .unwrap() }; + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); + let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &*memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -167,7 +171,7 @@ fn main() { ) .unwrap(); let normals_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &*memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -177,7 +181,7 @@ fn main() { ) .unwrap(); let index_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &*memory_allocator, BufferUsage { index_buffer: true, ..BufferUsage::empty() @@ -188,11 +192,12 @@ fn main() { .unwrap(); let uniform_buffer = CpuBufferPool::::new( - device.clone(), + memory_allocator.clone(), BufferUsage { uniform_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ); let vs = vs::load(device.clone()).unwrap(); @@ -221,7 +226,7 @@ fn main() { .unwrap(); let (mut pipeline, mut framebuffers) = - window_size_dependent_setup(device.clone(), &vs, &fs, &images, render_pass.clone()); + window_size_dependent_setup(&memory_allocator, &vs, &fs, &images, render_pass.clone()); let mut recreate_swapchain = false; let mut previous_frame_end = Some(sync::now(device.clone()).boxed()); @@ -266,7 +271,7 @@ fn main() { swapchain = new_swapchain; let (new_pipeline, new_framebuffers) = window_size_dependent_setup( - device.clone(), + &memory_allocator, &vs, &fs, &new_images, @@ -399,7 +404,7 @@ fn main() { /// This method is called once during initialization, then again whenever the window is resized fn window_size_dependent_setup( - device: Arc, + memory_allocator: &StandardMemoryAllocator, vs: &ShaderModule, fs: &ShaderModule, images: &[Arc], @@ -408,7 +413,7 @@ fn window_size_dependent_setup( let dimensions = images[0].dimensions().width_height(); let depth_buffer = ImageView::new_default( - AttachmentImage::transient(device.clone(), dimensions, Format::D16_UNORM).unwrap(), + AttachmentImage::transient(memory_allocator, dimensions, Format::D16_UNORM).unwrap(), ) .unwrap(); @@ -449,7 +454,7 @@ fn window_size_dependent_setup( .fragment_shader(fs.entry_point("main").unwrap(), ()) .depth_stencil_state(DepthStencilState::simple_depth_test()) .render_pass(Subpass::from(render_pass, 0).unwrap()) - .build(device) + .build(memory_allocator.device().clone()) .unwrap(); (pipeline, framebuffers) diff --git a/examples/src/bin/tessellation.rs b/examples/src/bin/tessellation.rs index a6d750a6..4de5ba30 100644 --- a/examples/src/bin/tessellation.rs +++ b/examples/src/bin/tessellation.rs @@ -33,6 +33,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::{InputAssemblyState, PrimitiveTopology}, @@ -262,6 +263,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] #[repr(C)] struct Vertex { @@ -299,7 +302,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/texture_array/main.rs b/examples/src/bin/texture_array/main.rs index 9590cd43..d9bd962a 100644 --- a/examples/src/bin/texture_array/main.rs +++ b/examples/src/bin/texture_array/main.rs @@ -28,6 +28,7 @@ use vulkano::{ }, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ color_blend::ColorBlendState, @@ -160,6 +161,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Vertex { @@ -182,7 +185,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::<[Vertex]>::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() @@ -244,6 +247,7 @@ fn main() { array_layers: 3, }; // Replace with your actual image array dimensions let image = ImmutableImage::from_iter( + &memory_allocator, image_array_data, dimensions, MipmapsCount::Log2, diff --git a/examples/src/bin/triangle-v1_3.rs b/examples/src/bin/triangle-v1_3.rs index 7a48ee48..ffbe1717 100644 --- a/examples/src/bin/triangle-v1_3.rs +++ b/examples/src/bin/triangle-v1_3.rs @@ -36,6 +36,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -279,6 +280,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + // We now create a buffer that will store the shape of our triangle. // We use #[repr(C)] here to force rustc to not do anything funky with our data, although for this // particular example, it doesn't actually change the in-memory representation. @@ -301,7 +304,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/examples/src/bin/triangle.rs b/examples/src/bin/triangle.rs index cd99a85d..8edae885 100644 --- a/examples/src/bin/triangle.rs +++ b/examples/src/bin/triangle.rs @@ -30,6 +30,7 @@ use vulkano::{ image::{view::ImageView, ImageAccess, ImageUsage, SwapchainImage}, impl_vertex, instance::{Instance, InstanceCreateInfo}, + memory::allocator::StandardMemoryAllocator, pipeline::{ graphics::{ input_assembly::InputAssemblyState, @@ -259,6 +260,8 @@ fn main() { .unwrap() }; + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); + // We now create a buffer that will store the shape of our triangle. // We use #[repr(C)] here to force rustc to not do anything funky with our data, although for this // particular example, it doesn't actually change the in-memory representation. @@ -281,7 +284,7 @@ fn main() { }, ]; let vertex_buffer = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/vulkano-util/src/context.rs b/vulkano-util/src/context.rs index 1caa0a41..1992ef58 100644 --- a/vulkano-util/src/context.rs +++ b/vulkano-util/src/context.rs @@ -17,6 +17,7 @@ use vulkano::{ debug::{DebugUtilsMessenger, DebugUtilsMessengerCreateInfo}, Instance, InstanceCreateInfo, InstanceExtensions, }, + memory::allocator::StandardMemoryAllocator, Version, VulkanLibrary, }; @@ -105,6 +106,7 @@ pub struct VulkanoContext { device: Arc, graphics_queue: Arc, compute_queue: Arc, + memory_allocator: Arc, } impl Default for VulkanoContext { @@ -173,12 +175,15 @@ impl VulkanoContext { config.device_features, ); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); + Self { instance, _debug_utils_messenger, device, graphics_queue, compute_queue, + memory_allocator, } } @@ -292,4 +297,10 @@ impl VulkanoContext { pub fn compute_queue(&self) -> &Arc { &self.compute_queue } + + /// Returns the memory allocator. + #[inline] + pub fn memory_allocator(&self) -> &Arc { + &self.memory_allocator + } } diff --git a/vulkano-util/src/renderer.rs b/vulkano-util/src/renderer.rs index 690c6d3f..f11bfb40 100644 --- a/vulkano-util/src/renderer.rs +++ b/vulkano-util/src/renderer.rs @@ -16,6 +16,7 @@ use vulkano::{ image::{ view::ImageView, ImageAccess, ImageUsage, ImageViewAbstract, StorageImage, SwapchainImage, }, + memory::allocator::StandardMemoryAllocator, swapchain::{ self, AcquireError, Surface, Swapchain, SwapchainCreateInfo, SwapchainCreationError, SwapchainPresentInfo, @@ -46,6 +47,7 @@ pub struct VulkanoWindowRenderer { compute_queue: Arc, swapchain: Arc, final_views: Vec, + memory_allocator: Arc, /// Additional image views that you can add which are resized with the window. /// Use associated functions to get access to these. additional_image_views: HashMap, @@ -64,6 +66,7 @@ impl VulkanoWindowRenderer { window: winit::window::Window, descriptor: &WindowDescriptor, swapchain_create_info_modify: fn(&mut SwapchainCreateInfo), + memory_allocator: Arc, ) -> VulkanoWindowRenderer { // Create rendering surface from window let surface = @@ -86,6 +89,7 @@ impl VulkanoWindowRenderer { compute_queue: vulkano_context.compute_queue().clone(), swapchain: swap_chain, final_views, + memory_allocator, additional_image_views: HashMap::default(), recreate_swapchain: false, previous_frame_end, @@ -239,6 +243,7 @@ impl VulkanoWindowRenderer { pub fn add_additional_image_view(&mut self, key: usize, format: Format, usage: ImageUsage) { let size = self.swapchain_image_size(); let image = StorageImage::general_purpose_image_view( + &*self.memory_allocator, self.graphics_queue.clone(), size, format, diff --git a/vulkano-util/src/window.rs b/vulkano-util/src/window.rs index fc014ffc..497f63fb 100644 --- a/vulkano-util/src/window.rs +++ b/vulkano-util/src/window.rs @@ -168,6 +168,7 @@ impl VulkanoWindows { winit_window, window_descriptor, swapchain_create_info_modify, + vulkano_context.memory_allocator().clone(), ), ); diff --git a/vulkano/src/buffer/cpu_access.rs b/vulkano/src/buffer/cpu_access.rs index 82016383..19a14ad4 100644 --- a/vulkano/src/buffer/cpu_access.rs +++ b/vulkano/src/buffer/cpu_access.rs @@ -17,17 +17,18 @@ //! or write and write simultaneously will block. use super::{ - sys::UnsafeBuffer, BufferAccess, BufferAccessObject, BufferContents, BufferInner, BufferUsage, + sys::UnsafeBuffer, BufferAccess, BufferAccessObject, BufferContents, BufferCreationError, + BufferInner, BufferUsage, }; use crate::{ - buffer::{sys::UnsafeBufferCreateInfo, BufferCreationError, TypedBufferAccess}, + buffer::{sys::UnsafeBufferCreateInfo, TypedBufferAccess}, device::{Device, DeviceOwned}, memory::{ - pool::{ - AllocFromRequirementsFilter, AllocLayout, MappingRequirement, MemoryPoolAlloc, - PotentialDedicatedAllocation, StandardMemoryPoolAlloc, + allocator::{ + AllocationCreateInfo, AllocationCreationError, AllocationType, MemoryAlloc, + MemoryAllocatePreference, MemoryAllocator, MemoryUsage, }, - DedicatedAllocation, DeviceMemoryError, MemoryPool, + DedicatedAllocation, }, sync::Sharing, DeviceSize, @@ -51,7 +52,7 @@ use std::{ /// memory caches GPU data on the CPU side. This can be more performant in cases where /// the cpu needs to read data coming off the GPU. #[derive(Debug)] -pub struct CpuAccessibleBuffer> +pub struct CpuAccessibleBuffer where T: BufferContents + ?Sized, { @@ -59,7 +60,7 @@ where inner: Arc, // The memory held by the buffer. - memory: A, + memory: MemoryAlloc, // Queue families allowed to access this buffer. queue_family_indices: SmallVec<[u32; 4]>, @@ -77,17 +78,15 @@ where /// # Panics /// /// - Panics if `T` has zero size. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn from_data( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), usage: BufferUsage, host_cached: bool, data: T, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { unsafe { let uninitialized = CpuAccessibleBuffer::raw( - device, + allocator, size_of::() as DeviceSize, usage, host_cached, @@ -113,11 +112,17 @@ where /// /// - Panics if `T` has zero size. pub unsafe fn uninitialized( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), usage: BufferUsage, host_cached: bool, - ) -> Result>, DeviceMemoryError> { - CpuAccessibleBuffer::raw(device, size_of::() as DeviceSize, usage, host_cached, []) + ) -> Result>, AllocationCreationError> { + CpuAccessibleBuffer::raw( + allocator, + size_of::() as DeviceSize, + usage, + host_cached, + [], + ) } } @@ -132,14 +137,12 @@ where /// /// - Panics if `T` has zero size. /// - Panics if `data` is empty. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn from_iter( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), usage: BufferUsage, host_cached: bool, data: I, - ) -> Result>, DeviceMemoryError> + ) -> Result>, AllocationCreationError> where I: IntoIterator, I::IntoIter: ExactSizeIterator, @@ -148,7 +151,7 @@ where unsafe { let uninitialized = CpuAccessibleBuffer::uninitialized_array( - device, + allocator, data.len() as DeviceSize, usage, host_cached, @@ -176,16 +179,14 @@ where /// /// - Panics if `T` has zero size. /// - Panics if `len` is zero. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub unsafe fn uninitialized_array( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), len: DeviceSize, usage: BufferUsage, host_cached: bool, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { CpuAccessibleBuffer::raw( - device, + allocator, len * size_of::() as DeviceSize, usage, host_cached, @@ -207,75 +208,70 @@ where /// # Panics /// /// - Panics if `size` is zero. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub unsafe fn raw( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), size: DeviceSize, usage: BufferUsage, host_cached: bool, queue_family_indices: impl IntoIterator, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); - let buffer = { - match UnsafeBuffer::new( - device.clone(), - UnsafeBufferCreateInfo { - sharing: if queue_family_indices.len() >= 2 { - Sharing::Concurrent(queue_family_indices.clone()) - } else { - Sharing::Exclusive - }, - size, - usage, - ..Default::default() - }, - ) { - Ok(b) => b, - Err(BufferCreationError::AllocError(err)) => return Err(err), - Err(_) => unreachable!(), // We don't use sparse binding, therefore the other - // errors can't happen - } - }; - let mem_reqs = buffer.memory_requirements(); - - let memory = MemoryPool::alloc_from_requirements( - &device.standard_memory_pool(), - &mem_reqs, - AllocLayout::Linear, - MappingRequirement::Map, - Some(DedicatedAllocation::Buffer(&buffer)), - |m| { - if m.property_flags.host_cached { - if host_cached { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } + let buffer = UnsafeBuffer::new( + allocator.device().clone(), + UnsafeBufferCreateInfo { + sharing: if queue_family_indices.len() >= 2 { + Sharing::Concurrent(queue_family_indices.clone()) } else { - if host_cached { - AllocFromRequirementsFilter::Allowed - } else { - AllocFromRequirementsFilter::Preferred - } - } + Sharing::Exclusive + }, + size, + usage, + ..Default::default() }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - debug_assert!(memory.mapped_memory().is_some()); - buffer.bind_memory(memory.memory(), memory.offset())?; + ) + .map_err(|err| match err { + BufferCreationError::AllocError(err) => err, + // We don't use sparse-binding, therefore the other errors can't happen. + _ => unreachable!(), + })?; + let requirements = buffer.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::Linear, + usage: if host_cached { + MemoryUsage::Download + } else { + MemoryUsage::Upload + }, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Buffer(&buffer)), + ..Default::default() + }; - Ok(Arc::new(CpuAccessibleBuffer { - inner: buffer, - memory, - queue_family_indices, - marker: PhantomData, - })) + match allocator.allocate_unchecked(create_info) { + Ok(mut alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + // The implementation might require a larger size than we wanted. With this it is + // easier to invalidate and flush the whole buffer. It does not affect the + // allocation in any way. + alloc.shrink(size); + buffer.bind_memory(alloc.device_memory(), alloc.offset())?; + + Ok(Arc::new(CpuAccessibleBuffer { + inner: buffer, + memory: alloc, + queue_family_indices, + marker: PhantomData, + })) + } + Err(err) => Err(err), + } } } -impl CpuAccessibleBuffer +impl CpuAccessibleBuffer where T: BufferContents + ?Sized, { @@ -285,10 +281,9 @@ where } } -impl CpuAccessibleBuffer +impl CpuAccessibleBuffer where T: BufferContents + ?Sized, - A: MemoryPoolAlloc, { /// Locks the buffer in order to read its content from the CPU. /// @@ -299,7 +294,7 @@ where /// After this function successfully locks the buffer, any attempt to submit a command buffer /// that uses it in exclusive mode will fail. You can still submit this buffer for non-exclusive /// accesses (ie. reads). - pub fn read(&self) -> Result, ReadLockError> { + pub fn read(&self) -> Result, ReadLockError> { let mut state = self.inner.state(); let buffer_range = self.inner().offset..self.inner().offset + self.size(); @@ -308,20 +303,14 @@ where state.cpu_read_lock(buffer_range.clone()); } - let mapped_memory = self.memory.mapped_memory().unwrap(); - let offset = self.memory.offset(); - let memory_range = offset..offset + self.inner.size(); - let bytes = unsafe { // If there are other read locks being held at this point, they also called // `invalidate_range` when locking. The GPU can't write data while the CPU holds a read // lock, so there will no new data and this call will do nothing. // TODO: probably still more efficient to call it only if we're the first to acquire a // read lock, but the number of CPU locks isn't currently tracked anywhere. - mapped_memory - .invalidate_range(memory_range.clone()) - .unwrap(); - mapped_memory.read(memory_range).unwrap() + self.memory.invalidate_range(0..self.size()).unwrap(); + self.memory.mapped_slice().unwrap() }; Ok(ReadLock { @@ -339,7 +328,7 @@ where /// /// After this function successfully locks the buffer, any attempt to submit a command buffer /// that uses it and any attempt to call `read()` will return an error. - pub fn write(&self) -> Result, WriteLockError> { + pub fn write(&self) -> Result, WriteLockError> { let mut state = self.inner.state(); let buffer_range = self.inner().offset..self.inner().offset + self.size(); @@ -348,30 +337,22 @@ where state.cpu_write_lock(buffer_range.clone()); } - let mapped_memory = self.memory.mapped_memory().unwrap(); - let offset = self.memory.offset(); - let memory_range = offset..offset + self.size(); - let bytes = unsafe { - mapped_memory - .invalidate_range(memory_range.clone()) - .unwrap(); - mapped_memory.write(memory_range.clone()).unwrap() + self.memory.invalidate_range(0..self.size()).unwrap(); + self.memory.write(0..self.size()).unwrap() }; Ok(WriteLock { inner: self, buffer_range, - memory_range, data: T::from_bytes_mut(bytes).unwrap(), }) } } -unsafe impl BufferAccess for CpuAccessibleBuffer +unsafe impl BufferAccess for CpuAccessibleBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn inner(&self) -> BufferInner<'_> { BufferInner { @@ -385,25 +366,23 @@ where } } -impl BufferAccessObject for Arc> +impl BufferAccessObject for Arc> where T: BufferContents + ?Sized, - A: Send + Sync + 'static, { fn as_buffer_access_object(&self) -> Arc { self.clone() } } -unsafe impl TypedBufferAccess for CpuAccessibleBuffer +unsafe impl TypedBufferAccess for CpuAccessibleBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { type Content = T; } -unsafe impl DeviceOwned for CpuAccessibleBuffer +unsafe impl DeviceOwned for CpuAccessibleBuffer where T: BufferContents + ?Sized, { @@ -412,27 +391,20 @@ where } } -impl PartialEq for CpuAccessibleBuffer +impl PartialEq for CpuAccessibleBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() && self.size() == other.size() } } -impl Eq for CpuAccessibleBuffer -where - T: BufferContents + ?Sized, - A: Send + Sync, -{ -} +impl Eq for CpuAccessibleBuffer where T: BufferContents + ?Sized {} -impl Hash for CpuAccessibleBuffer +impl Hash for CpuAccessibleBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn hash(&self, state: &mut H) { self.inner().hash(state); @@ -445,20 +417,18 @@ where /// Note that this object holds a rwlock read guard on the chunk. If another thread tries to access /// this buffer's content or tries to submit a GPU command that uses this buffer, it will block. #[derive(Debug)] -pub struct ReadLock<'a, T, A> +pub struct ReadLock<'a, T> where T: BufferContents + ?Sized, - A: MemoryPoolAlloc, { - inner: &'a CpuAccessibleBuffer, + inner: &'a CpuAccessibleBuffer, buffer_range: Range, data: &'a T, } -impl<'a, T, A> Drop for ReadLock<'a, T, A> +impl<'a, T> Drop for ReadLock<'a, T> where T: BufferContents + ?Sized + 'a, - A: MemoryPoolAlloc, { fn drop(&mut self) { unsafe { @@ -468,10 +438,9 @@ where } } -impl<'a, T, A> Deref for ReadLock<'a, T, A> +impl<'a, T> Deref for ReadLock<'a, T> where T: BufferContents + ?Sized + 'a, - A: MemoryPoolAlloc, { type Target = T; @@ -485,30 +454,22 @@ where /// Note that this object holds a rwlock write guard on the chunk. If another thread tries to access /// this buffer's content or tries to submit a GPU command that uses this buffer, it will block. #[derive(Debug)] -pub struct WriteLock<'a, T, A> +pub struct WriteLock<'a, T> where T: BufferContents + ?Sized, - A: MemoryPoolAlloc, { - inner: &'a CpuAccessibleBuffer, + inner: &'a CpuAccessibleBuffer, buffer_range: Range, - memory_range: Range, data: &'a mut T, } -impl<'a, T, A> Drop for WriteLock<'a, T, A> +impl<'a, T> Drop for WriteLock<'a, T> where T: BufferContents + ?Sized + 'a, - A: MemoryPoolAlloc, { fn drop(&mut self) { unsafe { - self.inner - .memory - .mapped_memory() - .unwrap() - .flush_range(self.memory_range.clone()) - .unwrap(); + self.inner.memory.flush_range(0..self.inner.size()).unwrap(); let mut state = self.inner.inner.state(); state.cpu_write_unlock(self.buffer_range.clone()); @@ -516,10 +477,9 @@ where } } -impl<'a, T, A> Deref for WriteLock<'a, T, A> +impl<'a, T> Deref for WriteLock<'a, T> where T: BufferContents + ?Sized + 'a, - A: MemoryPoolAlloc, { type Target = T; @@ -528,10 +488,9 @@ where } } -impl<'a, T, A> DerefMut for WriteLock<'a, T, A> +impl<'a, T> DerefMut for WriteLock<'a, T> where T: BufferContents + ?Sized + 'a, - A: MemoryPoolAlloc, { fn deref_mut(&mut self) -> &mut T { self.data @@ -592,17 +551,19 @@ impl Display for WriteLockError { #[cfg(test)] mod tests { - use crate::buffer::{BufferUsage, CpuAccessibleBuffer}; + use super::*; + use crate::memory::allocator::StandardMemoryAllocator; #[test] fn create_empty_buffer() { let (device, _queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); const EMPTY: [i32; 0] = []; assert_should_panic!({ CpuAccessibleBuffer::from_data( - device.clone(), + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() @@ -612,7 +573,7 @@ mod tests { ) .unwrap(); CpuAccessibleBuffer::from_iter( - device, + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() diff --git a/vulkano/src/buffer/cpu_pool.rs b/vulkano/src/buffer/cpu_pool.rs index ffb73757..d7608a6a 100644 --- a/vulkano/src/buffer/cpu_pool.rs +++ b/vulkano/src/buffer/cpu_pool.rs @@ -15,13 +15,13 @@ use super::{ use crate::{ device::{Device, DeviceOwned}, memory::{ - pool::{ - AllocFromRequirementsFilter, AllocLayout, MappingRequirement, MemoryPoolAlloc, - PotentialDedicatedAllocation, StandardMemoryPool, + allocator::{ + AllocationCreateInfo, AllocationCreationError, AllocationType, MemoryAlloc, + MemoryAllocatePreference, MemoryAllocator, MemoryUsage, StandardMemoryAllocator, }, - DedicatedAllocation, DeviceMemoryError, MemoryPool, + DedicatedAllocation, }, - DeviceSize, OomError, + DeviceSize, }; use std::{ hash::{Hash, Hasher}, @@ -65,12 +65,12 @@ use std::{ /// use vulkano::command_buffer::CommandBufferUsage; /// use vulkano::command_buffer::PrimaryCommandBufferAbstract; /// use vulkano::sync::GpuFuture; -/// # let device: std::sync::Arc = return; /// # let queue: std::sync::Arc = return; +/// # let memory_allocator: std::sync::Arc = return; /// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return; /// /// // Create the ring buffer. -/// let buffer = CpuBufferPool::upload(device.clone()); +/// let buffer = CpuBufferPool::upload(memory_allocator); /// /// for n in 0 .. 25u32 { /// // Each loop grabs a new entry from that ring buffer and stores ` data` in it. @@ -95,22 +95,21 @@ use std::{ /// .unwrap(); /// } /// ``` -pub struct CpuBufferPool> +pub struct CpuBufferPool where [T]: BufferContents, - A: MemoryPool, + A: MemoryAllocator + ?Sized, { - // The device of the pool. - device: Arc, - // The memory pool to use for allocations. - pool: A, + allocator: Arc, // Current buffer from which elements are grabbed. - current_buffer: Mutex>>>, + current_buffer: Mutex>>, // Buffer usage. - usage: BufferUsage, + buffer_usage: BufferUsage, + + memory_usage: MemoryUsage, // Necessary to make it compile. marker: PhantomData>, @@ -118,15 +117,12 @@ where // One buffer of the pool. #[derive(Debug)] -struct ActualBuffer -where - A: MemoryPool, -{ +struct ActualBuffer { // Inner content. inner: Arc, // The memory held by the buffer. - memory: PotentialDedicatedAllocation, + memory: MemoryAlloc, // List of the chunks that are reserved. chunks_in_use: Mutex>, @@ -154,12 +150,11 @@ struct ActualBufferChunk { /// A subbuffer allocated from a `CpuBufferPool`. /// /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. -pub struct CpuBufferPoolChunk +pub struct CpuBufferPoolChunk where [T]: BufferContents, - A: MemoryPool, { - buffer: Arc>, + buffer: Arc, // Index of the subbuffer within `buffer`. In number of elements. index: DeviceSize, @@ -179,37 +174,38 @@ where /// A subbuffer allocated from a `CpuBufferPool`. /// /// When this object is destroyed, the subbuffer is automatically reclaimed by the pool. -pub struct CpuBufferPoolSubbuffer +pub struct CpuBufferPoolSubbuffer where [T]: BufferContents, - A: MemoryPool, { // This struct is just a wrapper around `CpuBufferPoolChunk`. - chunk: CpuBufferPoolChunk, + chunk: CpuBufferPoolChunk, } -impl CpuBufferPool +impl CpuBufferPool where [T]: BufferContents, + A: MemoryAllocator + ?Sized, { /// Builds a `CpuBufferPool`. /// /// # Panics /// /// - Panics if `T` has zero size. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ - #[inline] - pub fn new(device: Arc, usage: BufferUsage) -> CpuBufferPool { + /// - Panics if `memory_usage` is [`MemoryUsage::GpuOnly`]. + pub fn new( + allocator: Arc, + buffer_usage: BufferUsage, + memory_usage: MemoryUsage, + ) -> CpuBufferPool { assert!(size_of::() > 0); - assert!(!usage.shader_device_address); - let pool = device.standard_memory_pool(); + assert!(memory_usage != MemoryUsage::GpuOnly); CpuBufferPool { - device, - pool, + allocator, current_buffer: Mutex::new(None), - usage, + buffer_usage, + memory_usage, marker: PhantomData, } } @@ -222,14 +218,14 @@ where /// # Panics /// /// - Panics if `T` has zero size. - #[inline] - pub fn upload(device: Arc) -> CpuBufferPool { + pub fn upload(allocator: Arc) -> CpuBufferPool { CpuBufferPool::new( - device, + allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ) } @@ -241,14 +237,14 @@ where /// # Panics /// /// - Panics if `T` has zero size. - #[inline] - pub fn download(device: Arc) -> CpuBufferPool { + pub fn download(allocator: Arc) -> CpuBufferPool { CpuBufferPool::new( - device, + allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() }, + MemoryUsage::Download, ) } @@ -260,14 +256,14 @@ where /// # Panics /// /// - Panics if `T` has zero size. - #[inline] - pub fn uniform_buffer(device: Arc) -> CpuBufferPool { + pub fn uniform_buffer(allocator: Arc) -> CpuBufferPool { CpuBufferPool::new( - device, + allocator, BufferUsage { uniform_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ) } @@ -279,14 +275,14 @@ where /// # Panics /// /// - Panics if `T` has zero size. - #[inline] - pub fn vertex_buffer(device: Arc) -> CpuBufferPool { + pub fn vertex_buffer(allocator: Arc) -> CpuBufferPool { CpuBufferPool::new( - device, + allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ) } @@ -298,14 +294,14 @@ where /// # Panics /// /// - Panics if `T` has zero size. - #[inline] - pub fn indirect_buffer(device: Arc) -> CpuBufferPool { + pub fn indirect_buffer(allocator: Arc) -> CpuBufferPool { CpuBufferPool::new( - device, + allocator, BufferUsage { indirect_buffer: true, ..BufferUsage::empty() }, + MemoryUsage::Upload, ) } } @@ -313,7 +309,7 @@ where impl CpuBufferPool where [T]: BufferContents, - A: MemoryPool, + A: MemoryAllocator + ?Sized, { /// Returns the current capacity of the pool, in number of elements. pub fn capacity(&self) -> DeviceSize { @@ -327,7 +323,7 @@ where /// case. /// /// Since this can involve a memory allocation, an `OomError` can happen. - pub fn reserve(&self, capacity: DeviceSize) -> Result<(), DeviceMemoryError> { + pub fn reserve(&self, capacity: DeviceSize) -> Result<(), AllocationCreationError> { if capacity == 0 { return Ok(()); } @@ -352,11 +348,10 @@ where /// /// > **Note**: You can think of it like a `Vec`. If you insert an element and the `Vec` is not /// > large enough, a new chunk of memory is automatically allocated. - #[inline] pub fn from_data( &self, data: T, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { Ok(Arc::new(CpuBufferPoolSubbuffer { chunk: self.chunk_impl([data].into_iter())?, })) @@ -373,9 +368,10 @@ where /// # Panic /// /// Panics if the length of the iterator didn't match the actual number of elements. - /// - #[inline] - pub fn from_iter(&self, iter: I) -> Result>, DeviceMemoryError> + pub fn from_iter( + &self, + iter: I, + ) -> Result>, AllocationCreationError> where I: IntoIterator, I::IntoIter: ExactSizeIterator, @@ -386,7 +382,7 @@ where fn chunk_impl( &self, data: impl ExactSizeIterator, - ) -> Result, DeviceMemoryError> { + ) -> Result, AllocationCreationError> { let mut mutex = self.current_buffer.lock().unwrap(); let data = match self.try_next_impl(&mut mutex, data) { @@ -413,8 +409,7 @@ where /// /// A `CpuBufferPool` is always empty the first time you use it, so you shouldn't use /// `try_next` the first time you use it. - #[inline] - pub fn try_next(&self, data: T) -> Option>> { + pub fn try_next(&self, data: T) -> Option>> { let mut mutex = self.current_buffer.lock().unwrap(); self.try_next_impl(&mut mutex, [data]) .map(|c| Arc::new(CpuBufferPoolSubbuffer { chunk: c })) @@ -426,50 +421,55 @@ where // `cur_buf_mutex` must be an active lock of `self.current_buffer`. fn reset_buf( &self, - cur_buf_mutex: &mut MutexGuard<'_, Option>>>, + cur_buf_mutex: &mut MutexGuard<'_, Option>>, capacity: DeviceSize, - ) -> Result<(), DeviceMemoryError> { + ) -> Result<(), AllocationCreationError> { let size = match (size_of::() as DeviceSize).checked_mul(capacity) { Some(s) => s, - None => return Err(DeviceMemoryError::OomError(OomError::OutOfDeviceMemory)), + None => return Err(AllocationCreationError::OutOfDeviceMemory), }; - let buffer = match UnsafeBuffer::new( - self.device.clone(), + + let buffer = UnsafeBuffer::new( + self.device().clone(), UnsafeBufferCreateInfo { size, - usage: self.usage, + usage: self.buffer_usage, ..Default::default() }, - ) { - Ok(b) => b, - Err(BufferCreationError::AllocError(err)) => return Err(err), - Err(_) => unreachable!(), // We don't use sparse binding, therefore the other - // errors can't happen + ) + .map_err(|err| match err { + BufferCreationError::AllocError(err) => err, + // We don't use sparse-binding, therefore the other errors can't happen. + _ => unreachable!(), + })?; + let requirements = buffer.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::Linear, + usage: self.memory_usage, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Buffer(&buffer)), + ..Default::default() }; - let mem_reqs = buffer.memory_requirements(); - unsafe { - let mem = MemoryPool::alloc_from_requirements( - &self.pool, - &mem_reqs, - AllocLayout::Linear, - MappingRequirement::Map, - Some(DedicatedAllocation::Buffer(&buffer)), - |_| AllocFromRequirementsFilter::Allowed, - )?; - debug_assert!((mem.offset() % mem_reqs.alignment) == 0); - debug_assert!(mem.mapped_memory().is_some()); - buffer.bind_memory(mem.memory(), mem.offset())?; + match unsafe { self.allocator.allocate_unchecked(create_info) } { + Ok(mut alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + alloc.shrink(size); + unsafe { buffer.bind_memory(alloc.device_memory(), alloc.offset()) }?; - **cur_buf_mutex = Some(Arc::new(ActualBuffer { - inner: buffer, - memory: mem, - chunks_in_use: Mutex::new(vec![]), - next_index: AtomicU64::new(0), - capacity, - })); + **cur_buf_mutex = Some(Arc::new(ActualBuffer { + inner: buffer, + memory: alloc, + chunks_in_use: Mutex::new(vec![]), + next_index: AtomicU64::new(0), + capacity, + })); - Ok(()) + Ok(()) + } + Err(err) => Err(err), } } @@ -482,12 +482,11 @@ where // # Panic // // Panics if the length of the iterator didn't match the actual number of element. - // fn try_next_impl( &self, - cur_buf_mutex: &mut MutexGuard<'_, Option>>>, + cur_buf_mutex: &mut MutexGuard<'_, Option>>, data: I, - ) -> Result, I::IntoIter> + ) -> Result, I::IntoIter> where I: IntoIterator, I::IntoIter: ExactSizeIterator, @@ -533,7 +532,7 @@ where let idx = current_buffer.next_index.load(Ordering::SeqCst); // Find the required alignment in bytes. - let align_uniform = if self.usage.uniform_buffer { + let align_uniform = if self.buffer_usage.uniform_buffer { self.device() .physical_device() .properties() @@ -541,7 +540,7 @@ where } else { 1 }; - let align_storage = if self.usage.storage_buffer { + let align_storage = if self.buffer_usage.storage_buffer { self.device() .physical_device() .properties() @@ -586,12 +585,10 @@ where // Write `data` in the memory. unsafe { - let mem_off = current_buffer.memory.offset(); - let range = (index * size_of::() as DeviceSize + align_offset + mem_off) - ..((index + requested_len) * size_of::() as DeviceSize + align_offset + mem_off); + let range = (index * size_of::() as DeviceSize + align_offset) + ..((index + requested_len) * size_of::() as DeviceSize + align_offset); - let mapped_memory = current_buffer.memory.mapped_memory().unwrap(); - let bytes = mapped_memory.write(range.clone()).unwrap(); + let bytes = current_buffer.memory.write(range.clone()).unwrap(); let mapping = <[T]>::from_bytes_mut(bytes).unwrap(); let mut written = 0; @@ -600,12 +597,12 @@ where written += 1; } - mapped_memory.flush_range(range).unwrap(); + current_buffer.memory.flush_range(range).unwrap(); assert_eq!( written, requested_len, "Iterator passed to CpuBufferPool::chunk has a mismatch between reported \ - length and actual number of elements" + length and actual number of elements" ); } @@ -634,16 +631,16 @@ where impl Clone for CpuBufferPool where [T]: BufferContents, - A: MemoryPool + Clone, + A: MemoryAllocator + ?Sized, { fn clone(&self) -> Self { let buf = self.current_buffer.lock().unwrap(); CpuBufferPool { - device: self.device.clone(), - pool: self.pool.clone(), + allocator: self.allocator.clone(), current_buffer: Mutex::new(buf.clone()), - usage: self.usage, + buffer_usage: self.buffer_usage, + memory_usage: self.memory_usage, marker: PhantomData, } } @@ -652,20 +649,18 @@ where unsafe impl DeviceOwned for CpuBufferPool where [T]: BufferContents, - A: MemoryPool, + A: MemoryAllocator + ?Sized, { - #[inline] fn device(&self) -> &Arc { - &self.device + self.allocator.device() } } -impl Clone for CpuBufferPoolChunk +impl Clone for CpuBufferPoolChunk where [T]: BufferContents, - A: MemoryPool, { - fn clone(&self) -> CpuBufferPoolChunk { + fn clone(&self) -> CpuBufferPoolChunk { let mut chunks_in_use_lock = self.buffer.chunks_in_use.lock().unwrap(); let chunk = chunks_in_use_lock .iter_mut() @@ -688,13 +683,11 @@ where } } -unsafe impl BufferAccess for CpuBufferPoolChunk +unsafe impl BufferAccess for CpuBufferPoolChunk where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn inner(&self) -> BufferInner<'_> { BufferInner { buffer: &self.buffer.inner, @@ -702,28 +695,24 @@ where } } - #[inline] fn size(&self) -> DeviceSize { self.requested_len * size_of::() as DeviceSize } } -impl BufferAccessObject for Arc> +impl BufferAccessObject for Arc> where T: Send + Sync, [T]: BufferContents, - A: MemoryPool + 'static, { - #[inline] fn as_buffer_access_object(&self) -> Arc { self.clone() } } -impl Drop for CpuBufferPoolChunk +impl Drop for CpuBufferPoolChunk where [T]: BufferContents, - A: MemoryPool, { fn drop(&mut self) { // If `requested_len` is 0, then no entry was added in the chunks. @@ -745,147 +734,125 @@ where } } -unsafe impl TypedBufferAccess for CpuBufferPoolChunk +unsafe impl TypedBufferAccess for CpuBufferPoolChunk where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { type Content = [T]; } -unsafe impl DeviceOwned for CpuBufferPoolChunk +unsafe impl DeviceOwned for CpuBufferPoolChunk where [T]: BufferContents, - A: MemoryPool, { - #[inline] fn device(&self) -> &Arc { self.buffer.inner.device() } } -impl PartialEq for CpuBufferPoolChunk +impl PartialEq for CpuBufferPoolChunk where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() && self.size() == other.size() } } -impl Eq for CpuBufferPoolChunk +impl Eq for CpuBufferPoolChunk where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { } -impl Hash for CpuBufferPoolChunk +impl Hash for CpuBufferPoolChunk where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn hash(&self, state: &mut H) { self.inner().hash(state); self.size().hash(state); } } -impl Clone for CpuBufferPoolSubbuffer +impl Clone for CpuBufferPoolSubbuffer where [T]: BufferContents, - A: MemoryPool, { - fn clone(&self) -> CpuBufferPoolSubbuffer { + fn clone(&self) -> CpuBufferPoolSubbuffer { CpuBufferPoolSubbuffer { chunk: self.chunk.clone(), } } } -unsafe impl BufferAccess for CpuBufferPoolSubbuffer +unsafe impl BufferAccess for CpuBufferPoolSubbuffer where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn inner(&self) -> BufferInner<'_> { self.chunk.inner() } - #[inline] fn size(&self) -> DeviceSize { self.chunk.size() } } -impl BufferAccessObject for Arc> +impl BufferAccessObject for Arc> where T: Send + Sync, [T]: BufferContents, - A: MemoryPool + 'static, { - #[inline] fn as_buffer_access_object(&self) -> Arc { self.clone() } } -unsafe impl TypedBufferAccess for CpuBufferPoolSubbuffer +unsafe impl TypedBufferAccess for CpuBufferPoolSubbuffer where T: BufferContents, [T]: BufferContents, - A: MemoryPool, { type Content = T; } -unsafe impl DeviceOwned for CpuBufferPoolSubbuffer +unsafe impl DeviceOwned for CpuBufferPoolSubbuffer where [T]: BufferContents, - A: MemoryPool, { - #[inline] fn device(&self) -> &Arc { self.chunk.buffer.inner.device() } } -impl PartialEq for CpuBufferPoolSubbuffer +impl PartialEq for CpuBufferPoolSubbuffer where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() && self.size() == other.size() } } -impl Eq for CpuBufferPoolSubbuffer +impl Eq for CpuBufferPoolSubbuffer where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { } -impl Hash for CpuBufferPoolSubbuffer +impl Hash for CpuBufferPoolSubbuffer where T: Send + Sync, [T]: BufferContents, - A: MemoryPool, { - #[inline] fn hash(&self, state: &mut H) { self.inner().hash(state); self.size().hash(state); @@ -894,20 +861,22 @@ where #[cfg(test)] mod tests { - use crate::buffer::CpuBufferPool; + use super::*; use std::mem; #[test] fn basic_create() { let (device, _) = gfx_dev_and_queue!(); - let _ = CpuBufferPool::::upload(device); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); + let _ = CpuBufferPool::::upload(memory_allocator); } #[test] fn reserve() { let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let pool = CpuBufferPool::::upload(device); + let pool = CpuBufferPool::::upload(memory_allocator); assert_eq!(pool.capacity(), 0); pool.reserve(83).unwrap(); @@ -917,8 +886,9 @@ mod tests { #[test] fn capacity_increase() { let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let pool = CpuBufferPool::upload(device); + let pool = CpuBufferPool::upload(memory_allocator); assert_eq!(pool.capacity(), 0); pool.from_data(12).unwrap(); @@ -935,8 +905,9 @@ mod tests { #[test] fn reuse_subbuffers() { let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let pool = CpuBufferPool::upload(device); + let pool = CpuBufferPool::upload(memory_allocator); assert_eq!(pool.capacity(), 0); let mut capacity = None; @@ -955,8 +926,9 @@ mod tests { #[test] fn chunk_loopback() { let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let pool = CpuBufferPool::::upload(device); + let pool = CpuBufferPool::::upload(memory_allocator); pool.reserve(5).unwrap(); let a = pool.from_iter(vec![0, 0]).unwrap(); @@ -973,8 +945,9 @@ mod tests { #[test] fn chunk_0_elems_doesnt_pollute() { let (device, _) = gfx_dev_and_queue!(); + let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device)); - let pool = CpuBufferPool::::upload(device); + let pool = CpuBufferPool::::upload(memory_allocator); let _ = pool.from_iter(vec![]).unwrap(); let _ = pool.from_iter(vec![0, 0]).unwrap(); diff --git a/vulkano/src/buffer/device_local.rs b/vulkano/src/buffer/device_local.rs index 6def52cd..aed1590c 100644 --- a/vulkano/src/buffer/device_local.rs +++ b/vulkano/src/buffer/device_local.rs @@ -16,31 +16,26 @@ use super::{ sys::{UnsafeBuffer, UnsafeBufferCreateInfo}, - BufferAccess, BufferAccessObject, BufferContents, BufferCreationError, BufferInner, - BufferUsage, CpuAccessibleBuffer, TypedBufferAccess, + BufferAccess, BufferAccessObject, BufferContents, BufferInner, BufferUsage, + CpuAccessibleBuffer, TypedBufferAccess, }; use crate::{ - command_buffer::{ - allocator::CommandBufferAllocator, AutoCommandBufferBuilder, CommandBufferBeginError, - CopyBufferInfo, - }, + buffer::{BufferCreationError, ExternalBufferInfo}, + command_buffer::{allocator::CommandBufferAllocator, AutoCommandBufferBuilder, CopyBufferInfo}, device::{Device, DeviceOwned}, memory::{ - pool::{ - alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout, - MappingRequirement, MemoryPoolAlloc, PotentialDedicatedAllocation, - StandardMemoryPoolAlloc, + allocator::{ + AllocationCreateInfo, AllocationCreationError, AllocationType, MemoryAlloc, + MemoryAllocatePreference, MemoryAllocator, MemoryUsage, }, - DedicatedAllocation, DeviceMemoryError, ExternalMemoryHandleType, MemoryPool, - MemoryRequirements, + DedicatedAllocation, DeviceMemoryError, ExternalMemoryHandleType, + ExternalMemoryHandleTypes, }, sync::Sharing, DeviceSize, }; use smallvec::SmallVec; use std::{ - error::Error, - fmt::{Display, Error as FmtError, Formatter}, fs::File, hash::{Hash, Hasher}, marker::PhantomData, @@ -79,6 +74,7 @@ use std::{ /// use vulkano::sync::GpuFuture; /// # let device: std::sync::Arc = return; /// # let queue: std::sync::Arc = return; +/// # let memory_allocator: vulkano::memory::allocator::StandardMemoryAllocator = return; /// # let command_buffer_allocator: vulkano::command_buffer::allocator::StandardCommandBufferAllocator = return; /// /// // Simple iterator to construct test data. @@ -86,7 +82,7 @@ use std::{ /// /// // Create a CPU accessible buffer initialized with the data. /// let temporary_accessible_buffer = CpuAccessibleBuffer::from_iter( -/// device.clone(), +/// &memory_allocator, /// BufferUsage { transfer_src: true, ..BufferUsage::empty() }, // Specify this buffer will be used as a transfer source. /// false, /// data, @@ -95,7 +91,7 @@ use std::{ /// /// // Create a buffer array on the GPU with enough space for `10_000` floats. /// let device_local_buffer = DeviceLocalBuffer::<[f32]>::array( -/// device.clone(), +/// &memory_allocator, /// 10_000 as vulkano::DeviceSize, /// BufferUsage { /// storage_buffer: true, @@ -129,7 +125,7 @@ use std::{ /// .unwrap() /// ``` #[derive(Debug)] -pub struct DeviceLocalBuffer> +pub struct DeviceLocalBuffer where T: BufferContents + ?Sized, { @@ -137,7 +133,7 @@ where inner: Arc, // The memory held by the buffer. - memory: A, + memory: MemoryAlloc, // Queue families allowed to access this buffer. queue_family_indices: SmallVec<[u32; 4]>, @@ -156,13 +152,13 @@ where /// /// - Panics if `T` has zero size. pub fn new( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), usage: BufferUsage, queue_family_indices: impl IntoIterator, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { unsafe { DeviceLocalBuffer::raw( - device, + allocator, size_of::() as DeviceSize, usage, queue_family_indices, @@ -183,16 +179,12 @@ where /// /// `command_buffer_builder` can then be used to record other commands, built, and executed as /// normal. If it is not executed, the buffer contents will be left undefined. - /// - /// # Panics - /// - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn from_buffer( + allocator: &(impl MemoryAllocator + ?Sized), source: Arc, usage: BufferUsage, command_buffer_builder: &mut AutoCommandBufferBuilder, - ) -> Result>, DeviceLocalBufferCreationError> + ) -> Result>, AllocationCreationError> where B: TypedBufferAccess + 'static, A: CommandBufferAllocator, @@ -205,7 +197,7 @@ where }; let buffer = DeviceLocalBuffer::raw( - source.device().clone(), + allocator, source.size(), actual_usage, source @@ -237,18 +229,17 @@ where /// # Panics /// /// - Panics if `T` has zero size. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn from_data( + allocator: &(impl MemoryAllocator + ?Sized), data: T, usage: BufferUsage, command_buffer_builder: &mut AutoCommandBufferBuilder, - ) -> Result>, DeviceLocalBufferCreationError> + ) -> Result>, AllocationCreationError> where A: CommandBufferAllocator, { let source = CpuAccessibleBuffer::from_data( - command_buffer_builder.device().clone(), + allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() @@ -256,7 +247,7 @@ where false, data, )?; - DeviceLocalBuffer::from_buffer(source, usage, command_buffer_builder) + DeviceLocalBuffer::from_buffer(allocator, source, usage, command_buffer_builder) } } @@ -274,20 +265,19 @@ where /// /// - Panics if `T` has zero size. /// - Panics if `data` is empty. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn from_iter( + allocator: &(impl MemoryAllocator + ?Sized), data: D, usage: BufferUsage, command_buffer_builder: &mut AutoCommandBufferBuilder, - ) -> Result>, DeviceLocalBufferCreationError> + ) -> Result>, AllocationCreationError> where D: IntoIterator, D::IntoIter: ExactSizeIterator, A: CommandBufferAllocator, { let source = CpuAccessibleBuffer::from_iter( - command_buffer_builder.device().clone(), + allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() @@ -295,7 +285,7 @@ where false, data, )?; - DeviceLocalBuffer::from_buffer(source, usage, command_buffer_builder) + DeviceLocalBuffer::from_buffer(allocator, source, usage, command_buffer_builder) } } @@ -309,17 +299,15 @@ where /// /// - Panics if `T` has zero size. /// - Panics if `len` is zero. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub fn array( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), len: DeviceSize, usage: BufferUsage, queue_family_indices: impl IntoIterator, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { unsafe { DeviceLocalBuffer::raw( - device, + allocator, len * size_of::() as DeviceSize, usage, queue_family_indices, @@ -341,41 +329,57 @@ where /// # Panics /// /// - Panics if `size` is zero. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub unsafe fn raw( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), size: DeviceSize, usage: BufferUsage, queue_family_indices: impl IntoIterator, - ) -> Result>, DeviceMemoryError> { + ) -> Result>, AllocationCreationError> { let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); - let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_family_indices)?; - - let memory = MemoryPool::alloc_from_requirements( - &device.standard_memory_pool(), - &mem_reqs, - AllocLayout::Linear, - MappingRequirement::DoNotMap, - Some(DedicatedAllocation::Buffer(&buffer)), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred + let buffer = UnsafeBuffer::new( + allocator.device().clone(), + UnsafeBufferCreateInfo { + sharing: if queue_family_indices.len() >= 2 { + Sharing::Concurrent(queue_family_indices.clone()) } else { - AllocFromRequirementsFilter::Allowed - } + Sharing::Exclusive + }, + size, + usage, + ..Default::default() }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - buffer.bind_memory(memory.memory(), memory.offset())?; + ) + .map_err(|err| match err { + BufferCreationError::AllocError(err) => err, + // We don't use sparse-binding, therefore the other errors can't happen. + _ => unreachable!(), + })?; + let requirements = buffer.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::Linear, + usage: MemoryUsage::GpuOnly, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Buffer(&buffer)), + ..Default::default() + }; - Ok(Arc::new(DeviceLocalBuffer { - inner: buffer, - memory, - queue_family_indices, - marker: PhantomData, - })) + match allocator.allocate_unchecked(create_info) { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + buffer.bind_memory(alloc.device_memory(), alloc.offset())?; + + Ok(Arc::new(DeviceLocalBuffer { + inner: buffer, + memory: alloc, + queue_family_indices, + marker: PhantomData, + })) + } + Err(err) => Err(err), + } } /// Same as `raw` but with exportable fd option for the allocated memory on Linux/BSD @@ -383,76 +387,86 @@ where /// # Panics /// /// - Panics if `size` is zero. - /// - Panics if `usage.shader_device_address` is `true`. - // TODO: ^ pub unsafe fn raw_with_exportable_fd( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), size: DeviceSize, usage: BufferUsage, queue_family_indices: impl IntoIterator, - ) -> Result>, DeviceMemoryError> { - assert!(device.enabled_extensions().khr_external_memory_fd); - assert!(device.enabled_extensions().khr_external_memory); + ) -> Result>, AllocationCreationError> { + let enabled_extensions = allocator.device().enabled_extensions(); + assert!(enabled_extensions.khr_external_memory_fd); + assert!(enabled_extensions.khr_external_memory); let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); - let (buffer, mem_reqs) = Self::build_buffer(&device, size, usage, &queue_family_indices)?; + let external_memory_properties = allocator + .device() + .physical_device() + .external_buffer_properties(ExternalBufferInfo { + usage, + ..ExternalBufferInfo::handle_type(ExternalMemoryHandleType::OpaqueFd) + }) + .unwrap() + .external_memory_properties; + // VUID-VkExportMemoryAllocateInfo-handleTypes-00656 + assert!(external_memory_properties.exportable); - let memory = alloc_dedicated_with_exportable_fd( - device, - &mem_reqs, - AllocLayout::Linear, - MappingRequirement::DoNotMap, - DedicatedAllocation::Buffer(&buffer), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; - let mem_offset = memory.offset(); - debug_assert!((mem_offset % mem_reqs.alignment) == 0); - buffer.bind_memory(memory.memory(), mem_offset)?; + // VUID-VkMemoryAllocateInfo-pNext-00639 + // Guaranteed because we always create a dedicated allocation - Ok(Arc::new(DeviceLocalBuffer { - inner: buffer, - memory, - queue_family_indices, - marker: PhantomData, - })) - } - - unsafe fn build_buffer( - device: &Arc, - size: DeviceSize, - usage: BufferUsage, - queue_family_indices: &SmallVec<[u32; 4]>, - ) -> Result<(Arc, MemoryRequirements), DeviceMemoryError> { - let buffer = { - match UnsafeBuffer::new( - device.clone(), - UnsafeBufferCreateInfo { - sharing: if queue_family_indices.len() >= 2 { - Sharing::Concurrent(queue_family_indices.clone()) - } else { - Sharing::Exclusive - }, - size, - usage, - ..Default::default() - }, - ) { - Ok(b) => b, - Err(BufferCreationError::AllocError(err)) => return Err(err), - Err(_) => unreachable!(), // We don't use sparse binding, therefore the other - // errors can't happen - } + let external_memory_handle_types = ExternalMemoryHandleTypes { + opaque_fd: true, + ..ExternalMemoryHandleTypes::empty() }; - let mem_reqs = buffer.memory_requirements(); + let buffer = UnsafeBuffer::new( + allocator.device().clone(), + UnsafeBufferCreateInfo { + sharing: if queue_family_indices.len() >= 2 { + Sharing::Concurrent(queue_family_indices.clone()) + } else { + Sharing::Exclusive + }, + size, + usage, + external_memory_handle_types, + ..Default::default() + }, + ) + .map_err(|err| match err { + BufferCreationError::AllocError(err) => err, + // We don't use sparse-binding, therefore the other errors can't happen. + _ => unreachable!(), + })?; + let requirements = buffer.memory_requirements(); + let memory_type_index = allocator + .find_memory_type_index(requirements.memory_type_bits, MemoryUsage::GpuOnly.into()) + .expect("failed to find a suitable memory type"); - Ok((buffer, mem_reqs)) + let memory_properties = allocator.device().physical_device().memory_properties(); + let heap_index = memory_properties.memory_types[memory_type_index as usize].heap_index; + // VUID-vkAllocateMemory-pAllocateInfo-01713 + assert!(size <= memory_properties.memory_heaps[heap_index as usize].size); + + match allocator.allocate_dedicated_unchecked( + memory_type_index, + requirements.size, + Some(DedicatedAllocation::Buffer(&buffer)), + external_memory_handle_types, + ) { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + buffer.bind_memory(alloc.device_memory(), alloc.offset())?; + + Ok(Arc::new(DeviceLocalBuffer { + inner: buffer, + memory: alloc, + queue_family_indices, + marker: PhantomData, + })) + } + Err(err) => Err(err), + } } /// Exports posix file descriptor for the allocated memory @@ -460,12 +474,12 @@ where /// Only works on Linux/BSD. pub fn export_posix_fd(&self) -> Result { self.memory - .memory() + .device_memory() .export_fd(ExternalMemoryHandleType::OpaqueFd) } } -impl DeviceLocalBuffer +impl DeviceLocalBuffer where T: BufferContents + ?Sized, { @@ -475,7 +489,7 @@ where } } -unsafe impl DeviceOwned for DeviceLocalBuffer +unsafe impl DeviceOwned for DeviceLocalBuffer where T: BufferContents + ?Sized, { @@ -484,10 +498,9 @@ where } } -unsafe impl BufferAccess for DeviceLocalBuffer +unsafe impl BufferAccess for DeviceLocalBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn inner(&self) -> BufferInner<'_> { BufferInner { @@ -501,45 +514,36 @@ where } } -impl BufferAccessObject for Arc> +impl BufferAccessObject for Arc> where T: BufferContents + ?Sized, - A: Send + Sync + 'static, { fn as_buffer_access_object(&self) -> Arc { self.clone() } } -unsafe impl TypedBufferAccess for DeviceLocalBuffer +unsafe impl TypedBufferAccess for DeviceLocalBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { type Content = T; } -impl PartialEq for DeviceLocalBuffer +impl PartialEq for DeviceLocalBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() && self.size() == other.size() } } -impl Eq for DeviceLocalBuffer -where - T: BufferContents + ?Sized, - A: Send + Sync, -{ -} +impl Eq for DeviceLocalBuffer where T: BufferContents + ?Sized {} -impl Hash for DeviceLocalBuffer +impl Hash for DeviceLocalBuffer where T: BufferContents + ?Sized, - A: Send + Sync, { fn hash(&self, state: &mut H) { self.inner().hash(state); @@ -547,42 +551,6 @@ where } } -#[derive(Clone, Debug)] -pub enum DeviceLocalBufferCreationError { - DeviceMemoryAllocationError(DeviceMemoryError), - CommandBufferBeginError(CommandBufferBeginError), -} - -impl Error for DeviceLocalBufferCreationError { - fn source(&self) -> Option<&(dyn Error + 'static)> { - match self { - Self::DeviceMemoryAllocationError(err) => Some(err), - Self::CommandBufferBeginError(err) => Some(err), - } - } -} - -impl Display for DeviceLocalBufferCreationError { - fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { - match self { - Self::DeviceMemoryAllocationError(err) => err.fmt(f), - Self::CommandBufferBeginError(err) => err.fmt(f), - } - } -} - -impl From for DeviceLocalBufferCreationError { - fn from(e: DeviceMemoryError) -> Self { - Self::DeviceMemoryAllocationError(e) - } -} - -impl From for DeviceLocalBufferCreationError { - fn from(e: CommandBufferBeginError) -> Self { - Self::CommandBufferBeginError(e) - } -} - #[cfg(test)] mod tests { use super::*; @@ -591,6 +559,7 @@ mod tests { allocator::StandardCommandBufferAllocator, CommandBufferUsage, PrimaryCommandBufferAbstract, }, + memory::allocator::StandardMemoryAllocator, sync::GpuFuture, }; @@ -605,8 +574,10 @@ mod tests { CommandBufferUsage::OneTimeSubmit, ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let buffer = DeviceLocalBuffer::from_data( + &memory_allocator, 12u32, BufferUsage { transfer_src: true, @@ -617,7 +588,7 @@ mod tests { .unwrap(); let destination = CpuAccessibleBuffer::from_data( - device, + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() @@ -653,8 +624,10 @@ mod tests { CommandBufferUsage::OneTimeSubmit, ) .unwrap(); + let allocator = StandardMemoryAllocator::new_default(device); let buffer = DeviceLocalBuffer::from_iter( + &allocator, (0..512u32).map(|n| n * 2), BufferUsage { transfer_src: true, @@ -665,7 +638,7 @@ mod tests { .unwrap(); let destination = CpuAccessibleBuffer::from_iter( - device, + &allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() @@ -697,16 +670,18 @@ mod tests { fn create_buffer_zero_size_data() { let (device, queue) = gfx_dev_and_queue!(); - let command_buffer_allocator = StandardCommandBufferAllocator::new(device); + let command_buffer_allocator = StandardCommandBufferAllocator::new(device.clone()); let mut command_buffer_builder = AutoCommandBufferBuilder::primary( &command_buffer_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) .unwrap(); + let allocator = StandardMemoryAllocator::new_default(device); assert_should_panic!({ DeviceLocalBuffer::from_data( + &allocator, (), BufferUsage { transfer_dst: true, diff --git a/vulkano/src/buffer/sys.rs b/vulkano/src/buffer/sys.rs index 39ef59b6..e8db8c91 100644 --- a/vulkano/src/buffer/sys.rs +++ b/vulkano/src/buffer/sys.rs @@ -31,7 +31,10 @@ use super::{ use crate::{ device::{Device, DeviceOwned}, macros::vulkan_bitflags, - memory::{DeviceMemory, DeviceMemoryError, ExternalMemoryHandleTypes, MemoryRequirements}, + memory::{ + allocator::AllocationCreationError, DeviceMemory, ExternalMemoryHandleTypes, + MemoryRequirements, + }, range_map::RangeMap, sync::{AccessError, CurrentAccess, Sharing}, DeviceSize, OomError, RequirementNotMet, RequiresOneOf, Version, VulkanError, VulkanObject, @@ -584,7 +587,7 @@ impl Default for UnsafeBufferCreateInfo { #[derive(Clone, Debug, PartialEq, Eq)] pub enum BufferCreationError { /// Allocating memory failed. - AllocError(DeviceMemoryError), + AllocError(AllocationCreationError), RequirementNotMet { required_for: &'static str, @@ -645,11 +648,11 @@ impl From for BufferCreationError { impl From for BufferCreationError { fn from(err: VulkanError) -> BufferCreationError { match err { - err @ VulkanError::OutOfHostMemory => { - BufferCreationError::AllocError(DeviceMemoryError::from(err)) + VulkanError::OutOfHostMemory => { + BufferCreationError::AllocError(AllocationCreationError::OutOfHostMemory) } - err @ VulkanError::OutOfDeviceMemory => { - BufferCreationError::AllocError(DeviceMemoryError::from(err)) + VulkanError::OutOfDeviceMemory => { + BufferCreationError::AllocError(AllocationCreationError::OutOfDeviceMemory) } _ => panic!("unexpected error: {:?}", err), } diff --git a/vulkano/src/buffer/view.rs b/vulkano/src/buffer/view.rs index 622fd479..1a92ea28 100644 --- a/vulkano/src/buffer/view.rs +++ b/vulkano/src/buffer/view.rs @@ -24,26 +24,28 @@ //! use vulkano::buffer::view::{BufferView, BufferViewCreateInfo}; //! use vulkano::format::Format; //! -//! # let device: Arc = return; //! # let queue: Arc = return; +//! # let memory_allocator: vulkano::memory::allocator::StandardMemoryAllocator = return; //! let usage = BufferUsage { //! storage_texel_buffer: true, //! ..BufferUsage::empty() //! }; //! //! let buffer = DeviceLocalBuffer::<[u32]>::array( -//! device.clone(), +//! &memory_allocator, //! 128, //! usage, //! [queue.queue_family_index()], -//! ).unwrap(); +//! ) +//! .unwrap(); //! let _view = BufferView::new( //! buffer, //! BufferViewCreateInfo { //! format: Some(Format::R32_UINT), //! ..Default::default() //! }, -//! ).unwrap(); +//! ) +//! .unwrap(); //! ``` use super::{BufferAccess, BufferAccessObject, BufferInner}; @@ -477,27 +479,31 @@ impl Hash for dyn BufferViewAbstract { #[cfg(test)] mod tests { + use super::{BufferView, BufferViewCreateInfo, BufferViewCreationError}; use crate::{ - buffer::{ - view::{BufferView, BufferViewCreateInfo, BufferViewCreationError}, - BufferUsage, DeviceLocalBuffer, - }, + buffer::{BufferUsage, DeviceLocalBuffer}, format::Format, + memory::allocator::StandardMemoryAllocator, }; #[test] fn create_uniform() { // `VK_FORMAT_R8G8B8A8_UNORM` guaranteed to be a supported format let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let usage = BufferUsage { uniform_texel_buffer: true, ..BufferUsage::empty() }; - let buffer = - DeviceLocalBuffer::<[[u8; 4]]>::array(device, 128, usage, [queue.queue_family_index()]) - .unwrap(); + let buffer = DeviceLocalBuffer::<[[u8; 4]]>::array( + &memory_allocator, + 128, + usage, + [queue.queue_family_index()], + ) + .unwrap(); BufferView::new( buffer, BufferViewCreateInfo { @@ -512,15 +518,20 @@ mod tests { fn create_storage() { // `VK_FORMAT_R8G8B8A8_UNORM` guaranteed to be a supported format let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let usage = BufferUsage { storage_texel_buffer: true, ..BufferUsage::empty() }; - let buffer = - DeviceLocalBuffer::<[[u8; 4]]>::array(device, 128, usage, [queue.queue_family_index()]) - .unwrap(); + let buffer = DeviceLocalBuffer::<[[u8; 4]]>::array( + &memory_allocator, + 128, + usage, + [queue.queue_family_index()], + ) + .unwrap(); BufferView::new( buffer, BufferViewCreateInfo { @@ -535,15 +546,20 @@ mod tests { fn create_storage_atomic() { // `VK_FORMAT_R32_UINT` guaranteed to be a supported format for atomics let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let usage = BufferUsage { storage_texel_buffer: true, ..BufferUsage::empty() }; - let buffer = - DeviceLocalBuffer::<[u32]>::array(device, 128, usage, [queue.queue_family_index()]) - .unwrap(); + let buffer = DeviceLocalBuffer::<[u32]>::array( + &memory_allocator, + 128, + usage, + [queue.queue_family_index()], + ) + .unwrap(); BufferView::new( buffer, BufferViewCreateInfo { @@ -558,9 +574,10 @@ mod tests { fn wrong_usage() { // `VK_FORMAT_R8G8B8A8_UNORM` guaranteed to be a supported format let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let buffer = DeviceLocalBuffer::<[[u8; 4]]>::array( - device, + &memory_allocator, 128, BufferUsage { transfer_dst: true, // Dummy value @@ -585,6 +602,7 @@ mod tests { #[test] fn unsupported_format() { let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let usage = BufferUsage { uniform_texel_buffer: true, @@ -593,7 +611,7 @@ mod tests { }; let buffer = DeviceLocalBuffer::<[[f64; 4]]>::array( - device, + &memory_allocator, 128, usage, [queue.queue_family_index()], diff --git a/vulkano/src/command_buffer/auto.rs b/vulkano/src/command_buffer/auto.rs index 0739017b..acdecdda 100644 --- a/vulkano/src/command_buffer/auto.rs +++ b/vulkano/src/command_buffer/auto.rs @@ -727,6 +727,12 @@ pub struct PrimaryAutoCommandBuffer { state: Mutex, } +unsafe impl DeviceOwned for PrimaryAutoCommandBuffer { + fn device(&self) -> &Arc { + self.inner.device() + } +} + unsafe impl VulkanObject for PrimaryAutoCommandBuffer { type Handle = ash::vk::CommandBuffer; @@ -735,12 +741,6 @@ unsafe impl VulkanObject for PrimaryAutoCommandBuffer { } } -unsafe impl DeviceOwned for PrimaryAutoCommandBuffer { - fn device(&self) -> &Arc { - self.inner.device() - } -} - unsafe impl PrimaryCommandBufferAbstract for PrimaryAutoCommandBuffer where A: CommandBufferAlloc, @@ -918,6 +918,7 @@ mod tests { ExecuteCommandsError, }, device::{DeviceCreateInfo, QueueCreateInfo}, + memory::allocator::StandardMemoryAllocator, sync::GpuFuture, }; @@ -943,9 +944,10 @@ mod tests { .unwrap(); let queue = queues.next().unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let source = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() @@ -956,7 +958,7 @@ mod tests { .unwrap(); let destination = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_dst: true, ..BufferUsage::empty() @@ -966,9 +968,9 @@ mod tests { ) .unwrap(); - let allocator = StandardCommandBufferAllocator::new(device); + let cb_allocator = StandardCommandBufferAllocator::new(device); let mut cbb = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) @@ -1004,11 +1006,11 @@ mod tests { fn secondary_nonconcurrent_conflict() { let (device, queue) = gfx_dev_and_queue!(); - let allocator = StandardCommandBufferAllocator::new(device); + let cb_allocator = StandardCommandBufferAllocator::new(device); // Make a secondary CB that doesn't support simultaneous use. let builder = AutoCommandBufferBuilder::secondary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::MultipleSubmit, Default::default(), @@ -1018,7 +1020,7 @@ mod tests { { let mut builder = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::SimultaneousUse, ) @@ -1041,7 +1043,7 @@ mod tests { { let mut builder = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::SimultaneousUse, ) @@ -1050,7 +1052,7 @@ mod tests { let cb1 = builder.build().unwrap(); let mut builder = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::SimultaneousUse, ) @@ -1078,8 +1080,9 @@ mod tests { fn buffer_self_copy_overlapping() { let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let source = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_src: true, transfer_dst: true, @@ -1090,9 +1093,9 @@ mod tests { ) .unwrap(); - let allocator = StandardCommandBufferAllocator::new(device); + let cb_allocator = StandardCommandBufferAllocator::new(device); let mut builder = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) @@ -1129,8 +1132,9 @@ mod tests { fn buffer_self_copy_not_overlapping() { let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let source = CpuAccessibleBuffer::from_iter( - device.clone(), + &memory_allocator, BufferUsage { transfer_src: true, transfer_dst: true, @@ -1141,9 +1145,9 @@ mod tests { ) .unwrap(); - let allocator = StandardCommandBufferAllocator::new(device); + let cb_allocator = StandardCommandBufferAllocator::new(device); let mut builder = AutoCommandBufferBuilder::primary( - &allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) diff --git a/vulkano/src/command_buffer/synced/mod.rs b/vulkano/src/command_buffer/synced/mod.rs index 48224cb5..ee2fcbf5 100644 --- a/vulkano/src/command_buffer/synced/mod.rs +++ b/vulkano/src/command_buffer/synced/mod.rs @@ -377,6 +377,7 @@ mod tests { }, PersistentDescriptorSet, WriteDescriptorSet, }, + memory::allocator::StandardMemoryAllocator, pipeline::{layout::PipelineLayoutCreateInfo, PipelineBindPoint, PipelineLayout}, sampler::{Sampler, SamplerCreateInfo}, shader::ShaderStages, @@ -412,27 +413,28 @@ mod tests { unsafe { let (device, queue) = gfx_dev_and_queue!(); - let command_buffer_allocator = StandardCommandBufferAllocator::new(device); - let mut command_buffer_builder = AutoCommandBufferBuilder::primary( - &command_buffer_allocator, + let cb_allocator = StandardCommandBufferAllocator::new(device.clone()); + let mut cbb = AutoCommandBufferBuilder::primary( + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); // Create a tiny test buffer let buffer = DeviceLocalBuffer::from_data( + &memory_allocator, 0u32, BufferUsage { transfer_dst: true, ..BufferUsage::empty() }, - &mut command_buffer_builder, + &mut cbb, ) .unwrap(); - command_buffer_builder - .build() + cbb.build() .unwrap() .execute(queue.clone()) .unwrap() @@ -445,7 +447,7 @@ mod tests { let secondary = (0..2) .map(|_| { let mut builder = AutoCommandBufferBuilder::secondary( - &command_buffer_allocator, + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::SimultaneousUse, Default::default(), @@ -461,7 +463,7 @@ mod tests { }) .collect::>(); - let allocs = command_buffer_allocator + let allocs = cb_allocator .allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 2) .unwrap() .collect::>(); @@ -520,9 +522,8 @@ mod tests { unsafe { let (device, queue) = gfx_dev_and_queue!(); - let allocator = StandardCommandBufferAllocator::new(device.clone()); - - let builder_alloc = allocator + let cb_allocator = StandardCommandBufferAllocator::new(device.clone()); + let builder_alloc = cb_allocator .allocate(queue.queue_family_index(), CommandBufferLevel::Primary, 1) .unwrap() .next() @@ -535,8 +536,10 @@ mod tests { }, ) .unwrap(); + + let memory_allocator = StandardMemoryAllocator::new_default(device); let buf = CpuAccessibleBuffer::from_data( - device, + &memory_allocator, BufferUsage { vertex_buffer: true, ..BufferUsage::empty() diff --git a/vulkano/src/device/mod.rs b/vulkano/src/device/mod.rs index 6371ce5e..11202e36 100644 --- a/vulkano/src/device/mod.rs +++ b/vulkano/src/device/mod.rs @@ -114,9 +114,8 @@ pub use crate::{ fns::DeviceFunctions, }; use crate::{ - instance::Instance, - memory::{pool::StandardMemoryPool, ExternalMemoryHandleType}, - OomError, RequirementNotMet, RequiresOneOf, Version, VulkanError, VulkanObject, + instance::Instance, memory::ExternalMemoryHandleType, OomError, RequirementNotMet, + RequiresOneOf, Version, VulkanError, VulkanObject, }; use ash::vk::Handle; use parking_lot::Mutex; @@ -132,7 +131,7 @@ use std::{ ptr, sync::{ atomic::{AtomicU32, Ordering}, - Arc, Weak, + Arc, }, }; @@ -153,7 +152,6 @@ pub struct Device { api_version: Version, fns: DeviceFunctions, - standard_memory_pool: Mutex>, enabled_extensions: DeviceExtensions, enabled_features: Features, active_queue_family_indices: SmallVec<[u32; 2]>, @@ -410,7 +408,6 @@ impl Device { physical_device, api_version, fns, - standard_memory_pool: Mutex::new(Weak::new()), enabled_extensions, enabled_features, active_queue_family_indices, @@ -491,21 +488,6 @@ impl Device { &self.enabled_features } - /// Returns the standard memory pool used by default if you don't provide any other pool. - pub fn standard_memory_pool(self: &Arc) -> Arc { - let mut pool = self.standard_memory_pool.lock(); - - if let Some(p) = pool.upgrade() { - return p; - } - - // The weak pointer is empty, so we create the pool. - let new_pool = StandardMemoryPool::new(self.clone()); - *pool = Arc::downgrade(&new_pool); - - new_pool - } - /// Returns the current number of active [`DeviceMemory`] allocations the device has. /// /// [`DeviceMemory`]: crate::memory::DeviceMemory diff --git a/vulkano/src/image/attachment.rs b/vulkano/src/image/attachment.rs index 39e95219..55244837 100644 --- a/vulkano/src/image/attachment.rs +++ b/vulkano/src/image/attachment.rs @@ -14,15 +14,14 @@ use super::{ use crate::{ device::{Device, DeviceOwned}, format::Format, - image::{sys::UnsafeImageCreateInfo, ImageDimensions}, + image::{sys::UnsafeImageCreateInfo, ImageDimensions, ImageFormatInfo}, memory::{ - pool::{ - alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout, - MappingRequirement, MemoryPoolAlloc, PotentialDedicatedAllocation, - StandardMemoryPoolAlloc, + allocator::{ + AllocationCreateInfo, AllocationType, MemoryAlloc, MemoryAllocatePreference, + MemoryAllocator, MemoryUsage, }, DedicatedAllocation, DeviceMemoryError, ExternalMemoryHandleType, - ExternalMemoryHandleTypes, MemoryPool, + ExternalMemoryHandleTypes, }, DeviceSize, }; @@ -65,12 +64,12 @@ use std::{ /// // TODO: forbid reading transient images outside render passes? #[derive(Debug)] -pub struct AttachmentImage> { +pub struct AttachmentImage { // Inner implementation. image: Arc, // Memory used to back the image. - memory: A, + memory: MemoryAlloc, // Layout to use when the image is used as a framebuffer attachment. // Must be either "depth-stencil optimal" or "color optimal". @@ -88,12 +87,12 @@ impl AttachmentImage { /// format as a framebuffer attachment. #[inline] pub fn new( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -107,7 +106,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `with_usage`. #[inline] pub fn input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { @@ -117,7 +116,7 @@ impl AttachmentImage { }; AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -132,12 +131,19 @@ impl AttachmentImage { /// > want a regular image. #[inline] pub fn multisampled( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, ) -> Result, ImageCreationError> { - AttachmentImage::new_impl(device, dimensions, 1, format, ImageUsage::empty(), samples) + AttachmentImage::new_impl( + allocator, + dimensions, + 1, + format, + ImageUsage::empty(), + samples, + ) } /// Same as `multisampled`, but creates an image that can be used as an input attachment. @@ -145,7 +151,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `multisampled_with_usage`. #[inline] pub fn multisampled_input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, @@ -155,7 +161,7 @@ impl AttachmentImage { ..ImageUsage::empty() }; - AttachmentImage::new_impl(device, dimensions, 1, format, base_usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, base_usage, samples) } /// Same as `new`, but lets you specify additional usages. @@ -165,12 +171,19 @@ impl AttachmentImage { /// addition to these two. #[inline] pub fn with_usage( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, usage: ImageUsage, ) -> Result, ImageCreationError> { - AttachmentImage::new_impl(device, dimensions, 1, format, usage, SampleCount::Sample1) + AttachmentImage::new_impl( + allocator, + dimensions, + 1, + format, + usage, + SampleCount::Sample1, + ) } /// Same as `with_usage`, but creates a multisampled image. @@ -179,13 +192,13 @@ impl AttachmentImage { /// > want a regular image. #[inline] pub fn multisampled_with_usage( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, usage: ImageUsage, ) -> Result, ImageCreationError> { - AttachmentImage::new_impl(device, dimensions, 1, format, usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, usage, samples) } /// Same as `multisampled_with_usage`, but creates an image with multiple layers. @@ -194,14 +207,14 @@ impl AttachmentImage { /// > want a regular image. #[inline] pub fn multisampled_with_usage_with_layers( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], array_layers: u32, samples: SampleCount, format: Format, usage: ImageUsage, ) -> Result, ImageCreationError> { - AttachmentImage::new_impl(device, dimensions, array_layers, format, usage, samples) + AttachmentImage::new_impl(allocator, dimensions, array_layers, format, usage, samples) } /// Same as `new`, except that the image can later be sampled. @@ -209,7 +222,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `with_usage`. #[inline] pub fn sampled( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { @@ -219,7 +232,7 @@ impl AttachmentImage { }; AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -233,7 +246,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `with_usage`. #[inline] pub fn sampled_input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { @@ -244,7 +257,7 @@ impl AttachmentImage { }; AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -261,7 +274,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `multisampled_with_usage`. #[inline] pub fn sampled_multisampled( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, @@ -271,7 +284,7 @@ impl AttachmentImage { ..ImageUsage::empty() }; - AttachmentImage::new_impl(device, dimensions, 1, format, base_usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, base_usage, samples) } /// Same as `sampled_multisampled`, but creates an image that can be used as an input @@ -280,7 +293,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `multisampled_with_usage`. #[inline] pub fn sampled_multisampled_input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, @@ -291,7 +304,7 @@ impl AttachmentImage { ..ImageUsage::empty() }; - AttachmentImage::new_impl(device, dimensions, 1, format, base_usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, base_usage, samples) } /// Same as `new`, except that the image will be transient. @@ -302,7 +315,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `with_usage`. #[inline] pub fn transient( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { @@ -312,7 +325,7 @@ impl AttachmentImage { }; AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -326,7 +339,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `with_usage`. #[inline] pub fn transient_input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], format: Format, ) -> Result, ImageCreationError> { @@ -337,7 +350,7 @@ impl AttachmentImage { }; AttachmentImage::new_impl( - device, + allocator, dimensions, 1, format, @@ -354,7 +367,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `multisampled_with_usage`. #[inline] pub fn transient_multisampled( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, @@ -364,7 +377,7 @@ impl AttachmentImage { ..ImageUsage::empty() }; - AttachmentImage::new_impl(device, dimensions, 1, format, base_usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, base_usage, samples) } /// Same as `transient_multisampled`, but creates an image that can be used as an input @@ -373,7 +386,7 @@ impl AttachmentImage { /// > **Note**: This function is just a convenient shortcut for `multisampled_with_usage`. #[inline] pub fn transient_multisampled_input_attachment( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], samples: SampleCount, format: Format, @@ -384,19 +397,19 @@ impl AttachmentImage { ..ImageUsage::empty() }; - AttachmentImage::new_impl(device, dimensions, 1, format, base_usage, samples) + AttachmentImage::new_impl(allocator, dimensions, 1, format, base_usage, samples) } // All constructors dispatch to this one. fn new_impl( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], array_layers: u32, format: Format, base_usage: ImageUsage, samples: SampleCount, ) -> Result, ImageCreationError> { - let physical_device = device.physical_device(); + let physical_device = allocator.device().physical_device(); let device_properties = physical_device.properties(); if dimensions[0] > device_properties.max_framebuffer_height { @@ -417,7 +430,7 @@ impl AttachmentImage { } let image = UnsafeImage::new( - device.clone(), + allocator.device().clone(), UnsafeImageCreateInfo { dimensions: ImageDimensions::Dim2d { width: dimensions[0], @@ -434,48 +447,46 @@ impl AttachmentImage { ..Default::default() }, )?; + let requirements = image.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::NonLinear, + usage: MemoryUsage::GpuOnly, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Image(&image)), + ..Default::default() + }; - let mem_reqs = image.memory_requirements(); - let memory = MemoryPool::alloc_from_requirements( - &device.standard_memory_pool(), - &mem_reqs, - AllocLayout::Optimal, - MappingRequirement::DoNotMap, - Some(DedicatedAllocation::Image(&image)), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - unsafe { - image.bind_memory(memory.memory(), memory.offset())?; + match unsafe { allocator.allocate_unchecked(create_info) } { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + unsafe { image.bind_memory(alloc.device_memory(), alloc.offset()) }?; + + Ok(Arc::new(AttachmentImage { + image, + memory: alloc, + attachment_layout: if is_depth { + ImageLayout::DepthStencilAttachmentOptimal + } else { + ImageLayout::ColorAttachmentOptimal + }, + initialized: AtomicBool::new(false), + })) + } + Err(err) => Err(err.into()), } - - Ok(Arc::new(AttachmentImage { - image, - memory, - attachment_layout: if is_depth { - ImageLayout::DepthStencilAttachmentOptimal - } else { - ImageLayout::ColorAttachmentOptimal - }, - initialized: AtomicBool::new(false), - })) } pub fn new_with_exportable_fd( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: [u32; 2], array_layers: u32, format: Format, base_usage: ImageUsage, samples: SampleCount, ) -> Result, ImageCreationError> { - let physical_device = device.physical_device(); + let physical_device = allocator.device().physical_device(); let device_properties = physical_device.properties(); if dimensions[0] > device_properties.max_framebuffer_height { @@ -490,9 +501,37 @@ impl AttachmentImage { let aspects = format.aspects(); let is_depth = aspects.depth || aspects.stencil; + let usage = ImageUsage { + color_attachment: !is_depth, + depth_stencil_attachment: is_depth, + ..base_usage + }; + let external_memory_properties = allocator + .device() + .physical_device() + .image_format_properties(ImageFormatInfo { + format: Some(format), + usage, + external_memory_handle_type: Some(ExternalMemoryHandleType::OpaqueFd), + mutable_format: true, + ..Default::default() + }) + .unwrap() + .unwrap() + .external_memory_properties; + // VUID-VkExportMemoryAllocateInfo-handleTypes-00656 + assert!(external_memory_properties.exportable); + + // VUID-VkMemoryAllocateInfo-pNext-00639 + // Guaranteed because we always create a dedicated allocation + + let external_memory_handle_types = ExternalMemoryHandleTypes { + opaque_fd: true, + ..ExternalMemoryHandleTypes::empty() + }; let image = UnsafeImage::new( - device.clone(), + allocator.device().clone(), UnsafeImageCreateInfo { dimensions: ImageDimensions::Dim2d { width: dimensions[0], @@ -501,51 +540,43 @@ impl AttachmentImage { }, format: Some(format), samples, - usage: ImageUsage { - color_attachment: !is_depth, - depth_stencil_attachment: is_depth, - ..base_usage - }, - external_memory_handle_types: ExternalMemoryHandleTypes { - opaque_fd: true, - ..ExternalMemoryHandleTypes::empty() - }, + usage, + external_memory_handle_types, mutable_format: true, ..Default::default() }, )?; + let requirements = image.memory_requirements(); + let memory_type_index = allocator + .find_memory_type_index(requirements.memory_type_bits, MemoryUsage::GpuOnly.into()) + .expect("failed to find a suitable memory type"); - let mem_reqs = image.memory_requirements(); - let memory = alloc_dedicated_with_exportable_fd( - device.clone(), - &mem_reqs, - AllocLayout::Optimal, - MappingRequirement::DoNotMap, - DedicatedAllocation::Image(&image), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; + match unsafe { + allocator.allocate_dedicated_unchecked( + memory_type_index, + requirements.size, + Some(DedicatedAllocation::Image(&image)), + external_memory_handle_types, + ) + } { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + unsafe { image.bind_memory(alloc.device_memory(), alloc.offset()) }?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - unsafe { - image.bind_memory(memory.memory(), memory.offset())?; + Ok(Arc::new(AttachmentImage { + image, + memory: alloc, + attachment_layout: if is_depth { + ImageLayout::DepthStencilAttachmentOptimal + } else { + ImageLayout::ColorAttachmentOptimal + }, + initialized: AtomicBool::new(false), + })) + } + Err(err) => Err(err.into()), } - - Ok(Arc::new(AttachmentImage { - image, - memory, - attachment_layout: if is_depth { - ImageLayout::DepthStencilAttachmentOptimal - } else { - ImageLayout::ColorAttachmentOptimal - }, - initialized: AtomicBool::new(false), - })) } /// Exports posix file descriptor for the allocated memory. @@ -553,21 +584,19 @@ impl AttachmentImage { #[inline] pub fn export_posix_fd(&self) -> Result { self.memory - .memory() + .device_memory() .export_fd(ExternalMemoryHandleType::OpaqueFd) } /// Return the size of the allocated memory (used e.g. with cuda). #[inline] pub fn mem_size(&self) -> DeviceSize { - self.memory.memory().allocation_size() + self.memory.device_memory().allocation_size() } } -unsafe impl ImageAccess for AttachmentImage -where - A: MemoryPoolAlloc, -{ +unsafe impl ImageAccess for AttachmentImage { + #[inline] fn inner(&self) -> ImageInner<'_> { ImageInner { image: &self.image, @@ -578,14 +607,17 @@ where } } + #[inline] fn initial_layout_requirement(&self) -> ImageLayout { self.attachment_layout } + #[inline] fn final_layout_requirement(&self) -> ImageLayout { self.attachment_layout } + #[inline] fn descriptor_layouts(&self) -> Option { Some(ImageDescriptorLayouts { storage_image: ImageLayout::General, @@ -595,45 +627,40 @@ where }) } + #[inline] unsafe fn layout_initialized(&self) { self.initialized.store(true, Ordering::SeqCst); } + #[inline] fn is_layout_initialized(&self) -> bool { self.initialized.load(Ordering::SeqCst) } } -unsafe impl DeviceOwned for AttachmentImage { +unsafe impl DeviceOwned for AttachmentImage { + #[inline] fn device(&self) -> &Arc { self.image.device() } } -unsafe impl ImageContent

for AttachmentImage -where - A: MemoryPoolAlloc, -{ +unsafe impl

ImageContent

for AttachmentImage { fn matches_format(&self) -> bool { true // FIXME: } } -impl PartialEq for AttachmentImage -where - A: MemoryPoolAlloc, -{ +impl PartialEq for AttachmentImage { + #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() } } -impl Eq for AttachmentImage where A: MemoryPoolAlloc {} +impl Eq for AttachmentImage {} -impl Hash for AttachmentImage -where - A: MemoryPoolAlloc, -{ +impl Hash for AttachmentImage { fn hash(&self, state: &mut H) { self.inner().hash(state); } @@ -641,24 +668,29 @@ where #[cfg(test)] mod tests { - use super::AttachmentImage; - use crate::format::Format; + use super::*; + use crate::memory::allocator::StandardMemoryAllocator; #[test] fn create_regular() { let (device, _) = gfx_dev_and_queue!(); - let _img = AttachmentImage::new(device, [32, 32], Format::R8G8B8A8_UNORM).unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); + let _img = + AttachmentImage::new(&memory_allocator, [32, 32], Format::R8G8B8A8_UNORM).unwrap(); } #[test] fn create_transient() { let (device, _) = gfx_dev_and_queue!(); - let _img = AttachmentImage::transient(device, [32, 32], Format::R8G8B8A8_UNORM).unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); + let _img = AttachmentImage::transient(&memory_allocator, [32, 32], Format::R8G8B8A8_UNORM) + .unwrap(); } #[test] fn d16_unorm_always_supported() { let (device, _) = gfx_dev_and_queue!(); - let _img = AttachmentImage::new(device, [32, 32], Format::D16_UNORM).unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); + let _img = AttachmentImage::new(&memory_allocator, [32, 32], Format::D16_UNORM).unwrap(); } } diff --git a/vulkano/src/image/immutable.rs b/vulkano/src/image/immutable.rs index f9cd3b80..76192ef1 100644 --- a/vulkano/src/image/immutable.rs +++ b/vulkano/src/image/immutable.rs @@ -22,11 +22,11 @@ use crate::{ format::Format, image::sys::UnsafeImageCreateInfo, memory::{ - pool::{ - AllocFromRequirementsFilter, AllocLayout, MappingRequirement, MemoryPoolAlloc, - PotentialDedicatedAllocation, StandardMemoryPoolAlloc, + allocator::{ + AllocationCreateInfo, AllocationCreationError, AllocationType, MemoryAlloc, + MemoryAllocatePreference, MemoryAllocator, MemoryUsage, }, - DedicatedAllocation, DeviceMemoryError, MemoryPool, + DedicatedAllocation, }, sampler::Filter, sync::Sharing, @@ -44,10 +44,10 @@ use std::{ /// but then you must only ever read from it. // TODO: type (2D, 3D, array, etc.) as template parameter #[derive(Debug)] -pub struct ImmutableImage> { +pub struct ImmutableImage { image: Arc, dimensions: ImageDimensions, - _memory: A, + _memory: MemoryAlloc, layout: ImageLayout, } @@ -105,7 +105,7 @@ impl ImmutableImage { /// Returns two things: the image, and a special access that should be used for the initial /// upload to the image. pub fn uninitialized( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: ImageDimensions, format: Format, mip_levels: impl Into, @@ -118,7 +118,7 @@ impl ImmutableImage { let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); let image = UnsafeImage::new( - device.clone(), + allocator.device().clone(), UnsafeImageCreateInfo { dimensions, format: Some(format), @@ -140,39 +140,37 @@ impl ImmutableImage { ..Default::default() }, )?; + let requirements = image.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::NonLinear, + usage: MemoryUsage::GpuOnly, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Image(&image)), + ..Default::default() + }; - let mem_reqs = image.memory_requirements(); - let memory = MemoryPool::alloc_from_requirements( - &device.standard_memory_pool(), - &mem_reqs, - AllocLayout::Optimal, - MappingRequirement::DoNotMap, - Some(DedicatedAllocation::Image(&image)), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - unsafe { - image.bind_memory(memory.memory(), memory.offset())?; + match unsafe { allocator.allocate_unchecked(create_info) } { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + unsafe { image.bind_memory(alloc.device_memory(), alloc.offset()) }?; + + let image = Arc::new(ImmutableImage { + image, + _memory: alloc, + dimensions, + layout, + }); + + let init = Arc::new(ImmutableImageInitialization { + image: image.clone(), + }); + + Ok((image, init)) + } + Err(err) => Err(err.into()), } - - let image = Arc::new(ImmutableImage { - image, - _memory: memory, - dimensions, - layout, - }); - - let init = Arc::new(ImmutableImageInitialization { - image: image.clone(), - }); - - Ok((image, init)) } /// Construct an ImmutableImage from the contents of `iter`. @@ -181,6 +179,7 @@ impl ImmutableImage { /// `iter` to it, then calling [`from_buffer`](ImmutableImage::from_buffer) to copy the data /// over. pub fn from_iter( + allocator: &(impl MemoryAllocator + ?Sized), iter: I, dimensions: ImageDimensions, mip_levels: MipmapsCount, @@ -194,7 +193,7 @@ impl ImmutableImage { A: CommandBufferAllocator, { let source = CpuAccessibleBuffer::from_iter( - command_buffer_builder.device().clone(), + allocator, BufferUsage { transfer_src: true, ..BufferUsage::empty() @@ -202,7 +201,9 @@ impl ImmutableImage { false, iter, )?; + ImmutableImage::from_buffer( + allocator, source, dimensions, mip_levels, @@ -221,6 +222,7 @@ impl ImmutableImage { /// `command_buffer_builder` can then be used to record other commands, built, and executed as /// normal. If it is not executed, the image contents will be left undefined. pub fn from_buffer( + allocator: &(impl MemoryAllocator + ?Sized), source: Arc, dimensions: ImageDimensions, mip_levels: MipmapsCount, @@ -258,7 +260,7 @@ impl ImmutableImage { let layout = ImageLayout::ShaderReadOnlyOptimal; let (image, initializer) = ImmutableImage::uninitialized( - source.device().clone(), + allocator, dimensions, format, mip_levels, @@ -292,16 +294,15 @@ impl ImmutableImage { } } -unsafe impl DeviceOwned for ImmutableImage { +unsafe impl DeviceOwned for ImmutableImage { + #[inline] fn device(&self) -> &Arc { self.image.device() } } -unsafe impl ImageAccess for ImmutableImage -where - A: MemoryPoolAlloc, -{ +unsafe impl ImageAccess for ImmutableImage { + #[inline] fn inner(&self) -> ImageInner<'_> { ImageInner { image: &self.image, @@ -312,18 +313,22 @@ where } } + #[inline] fn is_layout_initialized(&self) -> bool { true } + #[inline] fn initial_layout_requirement(&self) -> ImageLayout { self.layout } + #[inline] fn final_layout_requirement(&self) -> ImageLayout { self.layout } + #[inline] fn descriptor_layouts(&self) -> Option { Some(ImageDescriptorLayouts { storage_image: ImageLayout::General, @@ -334,82 +339,71 @@ where } } -unsafe impl ImageContent

for ImmutableImage -where - A: MemoryPoolAlloc, -{ +unsafe impl

ImageContent

for ImmutableImage { fn matches_format(&self) -> bool { true // FIXME: } } -impl PartialEq for ImmutableImage -where - A: MemoryPoolAlloc, -{ +impl PartialEq for ImmutableImage { + #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() } } -impl Eq for ImmutableImage where A: MemoryPoolAlloc {} +impl Eq for ImmutableImage {} -impl Hash for ImmutableImage -where - A: MemoryPoolAlloc, -{ +impl Hash for ImmutableImage { fn hash(&self, state: &mut H) { self.inner().hash(state); } } // Must not implement Clone, as that would lead to multiple `used` values. -pub struct ImmutableImageInitialization> { - image: Arc>, +pub struct ImmutableImageInitialization { + image: Arc, } -unsafe impl DeviceOwned for ImmutableImageInitialization { +unsafe impl DeviceOwned for ImmutableImageInitialization { + #[inline] fn device(&self) -> &Arc { self.image.device() } } -unsafe impl ImageAccess for ImmutableImageInitialization -where - A: MemoryPoolAlloc, -{ +unsafe impl ImageAccess for ImmutableImageInitialization { + #[inline] fn inner(&self) -> ImageInner<'_> { self.image.inner() } + #[inline] fn initial_layout_requirement(&self) -> ImageLayout { ImageLayout::Undefined } + #[inline] fn final_layout_requirement(&self) -> ImageLayout { self.image.layout } + #[inline] fn descriptor_layouts(&self) -> Option { None } } -impl PartialEq for ImmutableImageInitialization -where - A: MemoryPoolAlloc, -{ +impl PartialEq for ImmutableImageInitialization { + #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() } } -impl Eq for ImmutableImageInitialization where A: MemoryPoolAlloc {} +impl Eq for ImmutableImageInitialization {} -impl Hash for ImmutableImageInitialization -where - A: MemoryPoolAlloc, -{ +impl Hash for ImmutableImageInitialization { fn hash(&self, state: &mut H) { self.inner().hash(state); } @@ -419,7 +413,7 @@ where #[derive(Clone, Debug)] pub enum ImmutableImageCreationError { ImageCreationError(ImageCreationError), - DeviceMemoryAllocationError(DeviceMemoryError), + AllocError(AllocationCreationError), CommandBufferBeginError(CommandBufferBeginError), /// The size of the provided source data is less than the required size for an image with the @@ -434,7 +428,7 @@ impl Error for ImmutableImageCreationError { fn source(&self) -> Option<&(dyn Error + 'static)> { match self { Self::ImageCreationError(err) => Some(err), - Self::DeviceMemoryAllocationError(err) => Some(err), + Self::AllocError(err) => Some(err), Self::CommandBufferBeginError(err) => Some(err), _ => None, } @@ -445,15 +439,15 @@ impl Display for ImmutableImageCreationError { fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), FmtError> { match self { Self::ImageCreationError(err) => err.fmt(f), - Self::DeviceMemoryAllocationError(err) => err.fmt(f), + Self::AllocError(err) => err.fmt(f), Self::CommandBufferBeginError(err) => err.fmt(f), - Self::SourceTooSmall { source_size, required_size, } => write!( f, - "the size of the provided source data ({} bytes) is less than the required size for an image of the given format and dimensions ({} bytes)", + "the size of the provided source data ({} bytes) is less than the required size \ + for an image of the given format and dimensions ({} bytes)", source_size, required_size, ), } @@ -466,15 +460,15 @@ impl From for ImmutableImageCreationError { } } -impl From for ImmutableImageCreationError { - fn from(err: DeviceMemoryError) -> Self { - Self::DeviceMemoryAllocationError(err) +impl From for ImmutableImageCreationError { + fn from(err: AllocationCreationError) -> Self { + Self::AllocError(err) } } impl From for ImmutableImageCreationError { fn from(err: OomError) -> Self { - Self::DeviceMemoryAllocationError(err.into()) + Self::AllocError(err.into()) } } diff --git a/vulkano/src/image/mod.rs b/vulkano/src/image/mod.rs index 38cdc64e..9dcb7575 100644 --- a/vulkano/src/image/mod.rs +++ b/vulkano/src/image/mod.rs @@ -915,6 +915,7 @@ mod tests { }, format::Format, image::{ImageAccess, ImageDimensions, ImmutableImage, MipmapsCount}, + memory::allocator::StandardMemoryAllocator, }; #[test] @@ -1021,14 +1022,15 @@ mod tests { fn mipmap_working_immutable_image() { let (device, queue) = gfx_dev_and_queue!(); - let command_buffer_allocator = StandardCommandBufferAllocator::new(device); - let mut command_buffer_builder = AutoCommandBufferBuilder::primary( - &command_buffer_allocator, + let cb_allocator = StandardCommandBufferAllocator::new(device.clone()); + let mut cbb = AutoCommandBufferBuilder::primary( + &cb_allocator, queue.queue_family_index(), CommandBufferUsage::OneTimeSubmit, ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let dimensions = ImageDimensions::Dim2d { width: 512, height: 512, @@ -1040,11 +1042,12 @@ mod tests { vec.resize(512 * 512, 0u8); let image = ImmutableImage::from_iter( + &memory_allocator, vec.into_iter(), dimensions, MipmapsCount::One, Format::R8_UNORM, - &mut command_buffer_builder, + &mut cbb, ) .unwrap(); assert_eq!(image.mip_levels(), 1); @@ -1055,11 +1058,12 @@ mod tests { vec.resize(512 * 512, 0u8); let image = ImmutableImage::from_iter( + &memory_allocator, vec.into_iter(), dimensions, MipmapsCount::Log2, Format::R8_UNORM, - &mut command_buffer_builder, + &mut cbb, ) .unwrap(); assert_eq!(image.mip_levels(), 10); diff --git a/vulkano/src/image/storage.rs b/vulkano/src/image/storage.rs index cb8cc291..971273ca 100644 --- a/vulkano/src/image/storage.rs +++ b/vulkano/src/image/storage.rs @@ -14,14 +14,14 @@ use super::{ use crate::{ device::{Device, DeviceOwned, Queue}, format::Format, - image::{sys::UnsafeImageCreateInfo, view::ImageView}, + image::{sys::UnsafeImageCreateInfo, view::ImageView, ImageFormatInfo}, memory::{ - pool::{ - alloc_dedicated_with_exportable_fd, AllocFromRequirementsFilter, AllocLayout, - MappingRequirement, MemoryPoolAlloc, PotentialDedicatedAllocation, StandardMemoryPool, + allocator::{ + AllocationCreateInfo, AllocationType, MemoryAlloc, MemoryAllocatePreference, + MemoryAllocator, MemoryUsage, }, DedicatedAllocation, DeviceMemoryError, ExternalMemoryHandleType, - ExternalMemoryHandleTypes, MemoryPool, + ExternalMemoryHandleTypes, }, sync::Sharing, DeviceSize, @@ -36,15 +36,12 @@ use std::{ /// General-purpose image in device memory. Can be used for any usage, but will be slower than a /// specialized image. #[derive(Debug)] -pub struct StorageImage> -where - A: MemoryPool, -{ +pub struct StorageImage { // Inner implementation. image: Arc, // Memory used to back the image. - memory: PotentialDedicatedAllocation, + memory: MemoryAlloc, // Dimensions of the image. dimensions: ImageDimensions, @@ -53,7 +50,7 @@ where impl StorageImage { /// Creates a new image with the given dimensions and format. pub fn new( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: ImageDimensions, format: Format, queue_family_indices: impl IntoIterator, @@ -78,7 +75,7 @@ impl StorageImage { let flags = ImageCreateFlags::empty(); StorageImage::with_usage( - device, + allocator, dimensions, format, usage, @@ -89,7 +86,7 @@ impl StorageImage { /// Same as `new`, but allows specifying the usage. pub fn with_usage( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: ImageDimensions, format: Format, usage: ImageUsage, @@ -99,7 +96,7 @@ impl StorageImage { let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); let image = UnsafeImage::new( - device.clone(), + allocator.device().clone(), UnsafeImageCreateInfo { dimensions, format: Some(format), @@ -116,36 +113,34 @@ impl StorageImage { ..Default::default() }, )?; + let requirements = image.memory_requirements(); + let create_info = AllocationCreateInfo { + requirements, + allocation_type: AllocationType::NonLinear, + usage: MemoryUsage::GpuOnly, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: Some(DedicatedAllocation::Image(&image)), + ..Default::default() + }; - let mem_reqs = image.memory_requirements(); - let memory = MemoryPool::alloc_from_requirements( - &device.standard_memory_pool(), - &mem_reqs, - AllocLayout::Optimal, - MappingRequirement::DoNotMap, - Some(DedicatedAllocation::Image(&image)), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - unsafe { - image.bind_memory(memory.memory(), memory.offset())?; + match unsafe { allocator.allocate_unchecked(create_info) } { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + unsafe { image.bind_memory(alloc.device_memory(), alloc.offset()) }?; + + Ok(Arc::new(StorageImage { + image, + memory: alloc, + dimensions, + })) + } + Err(err) => Err(err.into()), } - - Ok(Arc::new(StorageImage { - image, - memory, - dimensions, - })) } pub fn new_with_exportable_fd( - device: Arc, + allocator: &(impl MemoryAllocator + ?Sized), dimensions: ImageDimensions, format: Format, usage: ImageUsage, @@ -154,8 +149,35 @@ impl StorageImage { ) -> Result, ImageCreationError> { let queue_family_indices: SmallVec<[_; 4]> = queue_family_indices.into_iter().collect(); + let external_memory_properties = allocator + .device() + .physical_device() + .image_format_properties(ImageFormatInfo { + format: Some(format), + image_type: dimensions.image_type(), + usage, + external_memory_handle_type: Some(ExternalMemoryHandleType::OpaqueFd), + mutable_format: flags.mutable_format, + cube_compatible: flags.cube_compatible, + array_2d_compatible: flags.array_2d_compatible, + block_texel_view_compatible: flags.block_texel_view_compatible, + ..Default::default() + }) + .unwrap() + .unwrap() + .external_memory_properties; + // VUID-VkExportMemoryAllocateInfo-handleTypes-00656 + assert!(external_memory_properties.exportable); + + // VUID-VkMemoryAllocateInfo-pNext-00639 + // Guaranteed because we always create a dedicated allocation + + let external_memory_handle_types = ExternalMemoryHandleTypes { + opaque_fd: true, + ..ExternalMemoryHandleTypes::empty() + }; let image = UnsafeImage::new( - device.clone(), + allocator.device().clone(), UnsafeImageCreateInfo { dimensions, format: Some(format), @@ -165,10 +187,7 @@ impl StorageImage { } else { Sharing::Exclusive }, - external_memory_handle_types: ExternalMemoryHandleTypes { - opaque_fd: true, - ..ExternalMemoryHandleTypes::empty() - }, + external_memory_handle_types, mutable_format: flags.mutable_format, cube_compatible: flags.cube_compatible, array_2d_compatible: flags.array_2d_compatible, @@ -176,37 +195,38 @@ impl StorageImage { ..Default::default() }, )?; + let requirements = image.memory_requirements(); + let memory_type_index = allocator + .find_memory_type_index(requirements.memory_type_bits, MemoryUsage::GpuOnly.into()) + .expect("failed to find a suitable memory type"); - let mem_reqs = image.memory_requirements(); - let memory = alloc_dedicated_with_exportable_fd( - device, - &mem_reqs, - AllocLayout::Optimal, - MappingRequirement::DoNotMap, - DedicatedAllocation::Image(&image), - |t| { - if t.property_flags.device_local { - AllocFromRequirementsFilter::Preferred - } else { - AllocFromRequirementsFilter::Allowed - } - }, - )?; - debug_assert!((memory.offset() % mem_reqs.alignment) == 0); - unsafe { - image.bind_memory(memory.memory(), memory.offset())?; + match unsafe { + allocator.allocate_dedicated_unchecked( + memory_type_index, + requirements.size, + Some(DedicatedAllocation::Image(&image)), + external_memory_handle_types, + ) + } { + Ok(alloc) => { + debug_assert!(alloc.offset() % requirements.alignment == 0); + debug_assert!(alloc.size() == requirements.size); + unsafe { image.bind_memory(alloc.device_memory(), alloc.offset()) }?; + + Ok(Arc::new(StorageImage { + image, + memory: alloc, + dimensions, + })) + } + Err(err) => Err(err.into()), } - - Ok(Arc::new(StorageImage { - image, - memory, - dimensions, - })) } /// Allows the creation of a simple 2D general purpose image view from `StorageImage`. #[inline] pub fn general_purpose_image_view( + allocator: &(impl MemoryAllocator + ?Sized), queue: Arc, size: [u32; 2], format: Format, @@ -219,7 +239,7 @@ impl StorageImage { }; let flags = ImageCreateFlags::empty(); let image_result = StorageImage::with_usage( - queue.device().clone(), + allocator, dims, format, usage, @@ -244,30 +264,26 @@ impl StorageImage { #[inline] pub fn export_posix_fd(&self) -> Result { self.memory - .memory() + .device_memory() .export_fd(ExternalMemoryHandleType::OpaqueFd) } /// Return the size of the allocated memory (used e.g. with cuda). #[inline] pub fn mem_size(&self) -> DeviceSize { - self.memory.memory().allocation_size() + self.memory.device_memory().allocation_size() } } -unsafe impl DeviceOwned for StorageImage -where - A: MemoryPool, -{ +unsafe impl DeviceOwned for StorageImage { + #[inline] fn device(&self) -> &Arc { self.image.device() } } -unsafe impl ImageAccess for StorageImage -where - A: MemoryPool, -{ +unsafe impl ImageAccess for StorageImage { + #[inline] fn inner(&self) -> ImageInner<'_> { ImageInner { image: &self.image, @@ -278,14 +294,17 @@ where } } + #[inline] fn initial_layout_requirement(&self) -> ImageLayout { ImageLayout::General } + #[inline] fn final_layout_requirement(&self) -> ImageLayout { ImageLayout::General } + #[inline] fn descriptor_layouts(&self) -> Option { Some(ImageDescriptorLayouts { storage_image: ImageLayout::General, @@ -296,30 +315,22 @@ where } } -unsafe impl ImageContent

for StorageImage -where - A: MemoryPool, -{ +unsafe impl

ImageContent

for StorageImage { fn matches_format(&self) -> bool { true // FIXME: } } -impl PartialEq for StorageImage -where - A: MemoryPool, -{ +impl PartialEq for StorageImage { + #[inline] fn eq(&self, other: &Self) -> bool { self.inner() == other.inner() } } -impl Eq for StorageImage where A: MemoryPool {} +impl Eq for StorageImage {} -impl Hash for StorageImage -where - A: MemoryPool, -{ +impl Hash for StorageImage { fn hash(&self, state: &mut H) { self.inner().hash(state); } @@ -327,20 +338,15 @@ where #[cfg(test)] mod tests { - use super::StorageImage; - use crate::{ - format::Format, - image::{ - view::ImageViewCreationError, ImageAccess, ImageCreationError, ImageDimensions, - ImageUsage, - }, - }; + use super::*; + use crate::{image::view::ImageViewCreationError, memory::allocator::StandardMemoryAllocator}; #[test] fn create() { let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let _img = StorageImage::new( - device, + &memory_allocator, ImageDimensions::Dim2d { width: 32, height: 32, @@ -354,7 +360,8 @@ mod tests { #[test] fn create_general_purpose_image_view() { - let (_device, queue) = gfx_dev_and_queue!(); + let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let usage = ImageUsage { transfer_src: true, transfer_dst: true, @@ -362,6 +369,7 @@ mod tests { ..ImageUsage::empty() }; let img_view = StorageImage::general_purpose_image_view( + &memory_allocator, queue, [32, 32], Format::R8G8B8A8_UNORM, @@ -373,13 +381,15 @@ mod tests { #[test] fn create_general_purpose_image_view_failed() { - let (_device, queue) = gfx_dev_and_queue!(); + let (device, queue) = gfx_dev_and_queue!(); + let memory_allocator = StandardMemoryAllocator::new_default(device); // Not valid for image view... let usage = ImageUsage { transfer_src: true, ..ImageUsage::empty() }; let img_result = StorageImage::general_purpose_image_view( + &memory_allocator, queue, [32, 32], Format::R8G8B8A8_UNORM, diff --git a/vulkano/src/image/sys.rs b/vulkano/src/image/sys.rs index a89da51c..3bc0f381 100644 --- a/vulkano/src/image/sys.rs +++ b/vulkano/src/image/sys.rs @@ -27,8 +27,8 @@ use crate::{ SparseImageFormatProperties, }, memory::{ - DeviceMemory, DeviceMemoryError, ExternalMemoryHandleType, ExternalMemoryHandleTypes, - MemoryRequirements, + allocator::AllocationCreationError, DeviceMemory, ExternalMemoryHandleType, + ExternalMemoryHandleTypes, MemoryRequirements, }, range_map::RangeMap, sync::{AccessError, CurrentAccess, Sharing}, @@ -1917,11 +1917,11 @@ impl Default for UnsafeImageCreateInfo { } } -/// Error that can happen when creating an instance. +/// Error that can happen when creating an image. #[derive(Clone, Debug, PartialEq, Eq)] pub enum ImageCreationError { /// Allocating memory failed. - AllocError(DeviceMemoryError), + AllocError(AllocationCreationError), RequirementNotMet { required_for: &'static str, @@ -2174,12 +2174,12 @@ impl Display for ImageCreationError { impl From for ImageCreationError { fn from(err: OomError) -> Self { - Self::AllocError(DeviceMemoryError::OomError(err)) + Self::AllocError(err.into()) } } -impl From for ImageCreationError { - fn from(err: DeviceMemoryError) -> Self { +impl From for ImageCreationError { + fn from(err: AllocationCreationError) -> Self { Self::AllocError(err) } } @@ -2187,8 +2187,12 @@ impl From for ImageCreationError { impl From for ImageCreationError { fn from(err: VulkanError) -> Self { match err { - err @ VulkanError::OutOfHostMemory => Self::AllocError(err.into()), - err @ VulkanError::OutOfDeviceMemory => Self::AllocError(err.into()), + VulkanError::OutOfHostMemory => { + Self::AllocError(AllocationCreationError::OutOfHostMemory) + } + VulkanError::OutOfDeviceMemory => { + Self::AllocError(AllocationCreationError::OutOfDeviceMemory) + } _ => panic!("unexpected error: {:?}", err), } } diff --git a/vulkano/src/memory/allocator/mod.rs b/vulkano/src/memory/allocator/mod.rs new file mode 100644 index 00000000..933a7f70 --- /dev/null +++ b/vulkano/src/memory/allocator/mod.rs @@ -0,0 +1,1593 @@ +// Copyright (c) 2016 The vulkano developers +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , +// at your option. All files in the project carrying such +// notice may not be copied, modified, or distributed except +// according to those terms. + +//! In Vulkan, suballocation of [`DeviceMemory`] is left to the application, because every +//! application has slightly different needs and one can not incorporate an allocator into the +//! driver that would perform well in all cases. Vulkano stays true to this sentiment, but aims to +//! reduce the burden on the user as much as possible. You have a toolbox of configurable +//! [suballocators] to choose from that cover all allocation algorithms, which you can compose into +//! any kind of [hierarchy] you wish. This way you have maximum flexibility while still only using +//! a few `DeviceMemory` blocks and not writing any of the very error-prone code. +//! +//! If you just want to allocate memory and don't have any special needs, look no further than the +//! [`StandardMemoryAllocator`]. +//! +//! # Why not just allocate `DeviceMemory`? +//! +//! But the driver has an allocator! Otherwise you wouldn't be able to allocate `DeviceMemory`, +//! right? Indeed, but that allocation is very expensive. Not only that, there is also a pretty low +//! limit on the number of allocations by the drivers. See, everything in Vulkan tries to keep you +//! away from allocating `DeviceMemory` too much. These limits are used by the implementation to +//! optimize on its end, while the application optimizes on the other end. +//! +//! # Alignment +//! +//! At the end of the day, memory needs to be backed by hardware somehow. A *memory cell* stores a +//! single *bit*, bits are grouped into *bytes* and bytes are grouped into *words*. Intuitively, it +//! should make sense that accessing single bits at a time would be very inefficient. That is why +//! computers always access a whole word of memory at once, at least. That means that if you tried +//! to do an unaligned access, you would need to access twice the number of memory locations. +//! +//! Example aligned access, performing bitwise NOT on the (64-bit) word at offset 0x08: +//! +//! ```plain +//! | 08 | 10 | 18 +//! ----+-------------------------+-------------------------+---- +//! ••• | 35 35 35 35 35 35 35 35 | 01 23 45 67 89 ab cd ef | ••• +//! ----+-------------------------+-------------------------+---- +//! , | , +//! +------------|------------+ +//! ' v ' +//! ----+-------------------------+-------------------------+---- +//! ••• | ca ca ca ca ca ca ca ca | 01 23 45 67 89 ab cd ef | ••• +//! ----+-------------------------+-------------------------+---- +//! ``` +//! +//! Same example as above, but this time unaligned with a word at offset 0x0a: +//! +//! ```plain +//! | 08 0a | 10 | 18 +//! ----+-------------------------+-------------------------+---- +//! ••• | cd ef 35 35 35 35 35 35 | 35 35 01 23 45 67 89 ab | ••• +//! ----+-------------------------+-------------------------+---- +//! , | , +//! +------------|------------+ +//! ' v ' +//! ----+-------------------------+-------------------------+---- +//! ••• | cd ef ca ca ca ca ca ca | ca ca 01 23 45 67 89 ab | ••• +//! ----+-------------------------+-------------------------+---- +//! ``` +//! +//! As you can see, in the unaligned case the hardware would need to read both the word at offset +//! 0x08 and the word at the offset 0x10 and then shift the bits from one register into the other. +//! Safe to say it should to be avoided, and this is why we need alignment. This example also goes +//! to show how inefficient unaligned writes are. Say you pieced together your word as described, +//! and now you want to perform the bitwise NOT and write the result back. Difficult, isn't it? +//! That's due to the fact that even though the chunks occupy different ranges in memory, they are +//! still said to *alias* each other, because if you try to write to one memory location, you would +//! be overwriting 2 or more different chunks of data. +//! +//! ## Pages +//! +//! It doesn't stop at the word, though. Words are further grouped into *pages*. These are +//! typically power-of-two multiples of the word size, much like words are typically powers of two +//! themselves. You can easily extend the concepts from the previous examples to pages if you think +//! of the examples as having a page size of 1 word. Two resources are said to alias if they share +//! a page, and therefore should be aligned to the page size. What the page size is depends on the +//! context, and a computer might have multiple different ones for different parts of hardware. +//! +//! ## Memory requirements +//! +//! A Vulkan device might have any number of reasons it would want certain alignments for certain +//! resources. For example, the device might have different caches for different types of +//! resources, which have different page sizes. Maybe the device wants to store images in some +//! other cache compared to buffers which needs different alignment. Or maybe images of different +//! layouts require different alignment, or buffers with different usage/mapping do. The specifics +//! don't matter in the end, this just goes to illustrate the point. This is why memory +//! requirements in Vulkan vary not only with the Vulkan implementation, but also with the type of +//! resource. +//! +//! ## Buffer-image granularity +//! +//! This unfortunately named granularity is the page size which a linear resource neighboring a +//! non-linear resource must be aligned to in order for them not to alias. The difference between +//! the memory requirements of the individual resources and the [buffer-image granularity] is that +//! the memory requirements only apply to the resource they are for, while the buffer-image +//! granularity applies to two neighboring resources. For example, you might create two buffers, +//! which might have two different memory requirements, but as long as those are satisfied, you can +//! put these buffers cheek to cheek. On the other hand, if one of them is an (optimal layout) +//! image, then they must not share any page, whose size is given by this granularity. The Vulkan +//! implementation can use this for additional optimizations if it needs to, or report a +//! granularity of 1. +//! +//! # Fragmentation +//! +//! Memory fragmentation refers to the wastage of memory that results from alignment requirements +//! and/or dynamic memory allocation. As such, some level of fragmentation is always going to be +//! inevitable. Different allocation algorithms each have their own characteristics and trade-offs +//! in relation to fragmentation. +//! +//! ## Internal Fragmentation +//! +//! This type of fragmentation arises from alignment requirements. These might be imposed by the +//! Vulkan implementation or the application itself. +//! +//! Say for example your allocations need to be aligned to 64B, then any allocation whose size is +//! not a multiple of the alignment will need padding at the end: +//! +//! ```plain +//! | 0x040 | 0x080 | 0x0c0 | 0x100 +//! ----+------------------+------------------+------------------+-------- +//! | ############ | ################ | ######## | ####### +//! ••• | ### 48 B ### | ##### 64 B ##### | # 32 B # | ### ••• +//! | ############ | ################ | ######## | ####### +//! ----+------------------+------------------+------------------+-------- +//! ``` +//! +//! If this alignment is imposed by the Vulkan implementation, then there's nothing one can do +//! about this. Simply put, that space is unusable. One also shouldn't want to do anything about +//! it, since these requirements have very good reasons, as described in further detail in previous +//! sections. They prevent resources from aliasing so that performance is optimal. +//! +//! It might seem strange that the application would want to cause internal fragmentation itself, +//! but this is often a good trade-off to reduce or even completely eliminate external +//! fragmentation. Internal fragmentation is very predictable, which makes it easier to deal with. +//! +//! ## External fragmentation +//! +//! With external fragmentation, what happens is that while the allocations might be using their +//! own memory totally efficiently, the way they are arranged in relation to each other would +//! prevent a new contiguous chunk of memory to be allocated even though there is enough free space +//! left. That is why this fragmentation is said to be external to the allocations. Also, the +//! allocations together with the fragments in-between add overhead both in terms of space and time +//! to the allocator, because it needs to keep track of more things overall. +//! +//! As an example, take these 4 allocations within some block, with the rest of the block assumed +//! to be full: +//! +//! ```plain +//! +-----+-------------------+-------+-----------+-- - - --+ +//! | | | | | | +//! | A | B | C | D | ••• | +//! | | | | | | +//! +-----+-------------------+-------+-----------+-- - - --+ +//! ``` +//! +//! The allocations were all done in order, and naturally there is no fragmentation at this point. +//! Now if we free B and D, since these are done out of order, we will be left with holes between +//! the other allocations, and we won't be able to fit allocation E anywhere: +//! +//! ```plain +//! +-----+-------------------+-------+-----------+-- - - --+ +-------------------------+ +//! | | | | | | ? | | +//! | A | | C | | ••• | <== | E | +//! | | | | | | | | +//! +-----+-------------------+-------+-----------+-- - - --+ +-------------------------+ +//! ``` +//! +//! So fine, we use a different block for E, and just use this block for allocations that fit: +//! +//! ```plain +//! +-----+---+-----+---------+-------+-----+-----+-- - - --+ +//! | | | | | | | | | +//! | A | H | I | J | C | F | G | ••• | +//! | | | | | | | | | +//! +-----+---+-----+---------+-------+-----+-----+-- - - --+ +//! ``` +//! +//! Sure, now let's free some shall we? And voilà, the problem just became much worse: +//! +//! ```plain +//! +-----+---+-----+---------+-------+-----+-----+-- - - --+ +//! | | | | | | | | | +//! | A | | I | J | | F | | ••• | +//! | | | | | | | | | +//! +-----+---+-----+---------+-------+-----+-----+-- - - --+ +//! ``` +//! +//! # Leakage +//! +//! Memory leaks happen when allocations are kept alive past their shelf life. This most often +//! occurs because of [cyclic references]. If you have structures that have cycles, then make sure +//! you read the documentation for [`Arc`]/[`Rc`] carefully to avoid memory leaks. You can also +//! introduce memory leaks willingly by using [`mem::forget`] or [`Box::leak`] to name a few. In +//! all of these examples the memory can never be reclaimed, but that doesn't have to be the case +//! for something to be considered a leak. Say for example you have a [region] which you +//! suballocate, and at some point you drop all the suballocations. When that happens, the region +//! can be returned (freed) to the next level up the hierarchy, or it can be reused by another +//! suballocator. But if you happen to keep alive just one suballocation for the duration of the +//! program for instance, then the whole region is also kept as it is for that time (and keep in +//! mind this bubbles up the hierarchy). Therefore, for the program, that memory might be a leak +//! depending on the allocator, because some allocators wouldn't be able to reuse the entire rest +//! of the region. You must always consider the lifetime of your resources when choosing the +//! appropriate allocator. +//! +//! [suballocators]: Suballocator +//! [hierarchy]: Suballocator#memory-hierarchies +//! [buffer-image granularity]: crate::device::Properties::buffer_image_granularity +//! [cyclic references]: Arc#breaking-cycles-with-weak +//! [`Rc`]: std::rc::Rc +//! [`mem::forget`]: std::mem::forget +//! [region]: Suballocator#regions + +pub mod suballocator; + +use self::array_vec::ArrayVec; +pub use self::suballocator::{ + AllocationType, BuddyAllocator, BumpAllocator, FreeListAllocator, MemoryAlloc, PoolAllocator, + SuballocationCreateInfo, SuballocationCreationError, Suballocator, +}; +use super::{ + DedicatedAllocation, DeviceMemory, ExternalMemoryHandleTypes, MemoryAllocateFlags, + MemoryAllocateInfo, MemoryProperties, MemoryPropertyFlags, MemoryRequirements, MemoryType, +}; +use crate::{ + device::{Device, DeviceOwned}, + DeviceSize, OomError, RequirementNotMet, RequiresOneOf, Version, VulkanError, +}; +use ash::vk::{MAX_MEMORY_HEAPS, MAX_MEMORY_TYPES}; +use parking_lot::RwLock; +use std::{ + error::Error, + fmt::{self, Display}, + sync::Arc, +}; + +const B: DeviceSize = 1; +const K: DeviceSize = 1024 * B; +const M: DeviceSize = 1024 * K; +const G: DeviceSize = 1024 * M; + +/// General-purpose memory allocators which allocate from any memory type dynamically as needed. +pub unsafe trait MemoryAllocator: DeviceOwned { + /// Finds the most suitable memory type index based on a filter. Returns [`None`] if the + /// requirements are too strict and no memory type is able to satisfy them. + fn find_memory_type_index( + &self, + memory_type_bits: u32, + requirements: MemoryTypeFilter, + ) -> Option; + + /// Allocates memory from a specific memory type. + fn allocate_from_type( + &self, + memory_type_index: u32, + create_info: SuballocationCreateInfo, + ) -> Result; + + /// Allocates memory from a specific memory type without checking the parameters. + /// + /// # Safety + /// + /// - `create_info.size` must not be zero. + /// - `create_info.alignment` must not be zero. + /// - `create_info.alignment` must be a power of two. + #[cfg_attr(not(feature = "document_unchecked"), doc(hidden))] + unsafe fn allocate_from_type_unchecked( + &self, + memory_type_index: u32, + create_info: SuballocationCreateInfo, + never_allocate: bool, + ) -> Result; + + /// Allocates memory according to requirements. + fn allocate( + &self, + create_info: AllocationCreateInfo<'_>, + ) -> Result; + + /// Allocates memory according to requirements without checking the parameters. + /// + /// # Safety + /// + /// - `create_info.requirements.size` must not be zero. + /// - `create_info.requirements.alignment` must not be zero. + /// - `create_info.requirements.alignment` must be a power of two. + /// - If you are going to bind this allocation to a resource, then `create_info.requirements` + /// must match the memory requirements of the resource. + /// - If `create_info.dedicated_allocation` is `Some` then `create_info.requirements.size` must + /// match the memory requirements of the resource. + /// - If `create_info.dedicated_allocation` is `Some` then the device the resource was created + /// with must match the device the allocator was created with. + #[cfg_attr(not(feature = "document_unchecked"), doc(hidden))] + unsafe fn allocate_unchecked( + &self, + create_info: AllocationCreateInfo<'_>, + ) -> Result; + + /// Creates a root allocation/dedicated allocation without checking the parameters. + /// + /// # Safety + /// + /// - `allocation_size` must not exceed the size of the heap that the memory type corresponding + /// to `memory_type_index` resides in. + /// - The handle types in `export_handle_types` must be supported and compatible, as reported by + /// [`ExternalBufferProperties`] or [`ImageFormatProperties`]. + /// - If any of the handle types in `export_handle_types` require a dedicated allocation, as + /// reported by [`ExternalBufferProperties::external_memory_properties`] or + /// [`ImageFormatProperties::external_memory_properties`], then `dedicated_allocation` must + /// not be `None`. + /// + /// [`ExternalBufferProperties`]: crate::buffer::ExternalBufferProperties + /// [`ImageFormatProperties`]: crate::image::ImageFormatProperties + /// [`ExternalBufferProperties::external_memory_properties`]: crate::buffer::ExternalBufferProperties + /// [`ImageFormatProperties::external_memory_properties`]: crate::image::ImageFormatProperties::external_memory_properties + #[cfg_attr(not(feature = "document_unchecked"), doc(hidden))] + unsafe fn allocate_dedicated_unchecked( + &self, + memory_type_index: u32, + allocation_size: DeviceSize, + dedicated_allocation: Option>, + export_handle_types: ExternalMemoryHandleTypes, + ) -> Result; +} + +/// Describes what memory property flags are required, preferred and not preferred when picking a +/// memory type index. +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub struct MemoryTypeFilter { + pub required_flags: MemoryPropertyFlags, + pub preferred_flags: MemoryPropertyFlags, + pub not_preferred_flags: MemoryPropertyFlags, +} + +impl From for MemoryTypeFilter { + #[inline] + fn from(usage: MemoryUsage) -> Self { + let mut requirements = Self::default(); + + match usage { + MemoryUsage::GpuOnly => { + requirements.preferred_flags.device_local = true; + requirements.not_preferred_flags.host_visible = true; + } + MemoryUsage::Upload => { + requirements.required_flags.host_visible = true; + requirements.preferred_flags.device_local = true; + requirements.not_preferred_flags.host_cached = true; + } + MemoryUsage::Download => { + requirements.required_flags.host_visible = true; + requirements.preferred_flags.host_cached = true; + } + } + + requirements + } +} + +/// Parameters to create a new [allocation] using a [memory allocator]. +/// +/// [allocation]: MemoryAlloc +/// [memory allocator]: MemoryAllocator +#[derive(Clone, Debug)] +pub struct AllocationCreateInfo<'d> { + /// Requirements of the resource you want to allocate memory for. + /// + /// If you plan to bind this memory directly to a non-sparse resource, then this must + /// correspond to the value returned by either [`UnsafeBuffer::memory_requirements`] or + /// [`UnsafeImage::memory_requirements`] for the respective buffer or image. + /// + /// All of the fields must be non-zero, [`alignment`] must be a power of two, and + /// [`memory_type_bits`] must be below 2*n* where *n* is the number of available + /// memory types. + /// + /// The default is all zeros, which must be overridden. + /// + /// [`alignment`]: MemoryRequirements::alignment + /// [`memory_type_bits`]: MemoryRequirements::memory_type_bits + /// + /// [`UnsafeBuffer::memory_requirements`]: crate::buffer::sys::UnsafeBuffer::memory_requirements + /// [`UnsafeImage::memory_requirements`]: crate::image::sys::UnsafeImage::memory_requirements + pub requirements: MemoryRequirements, + + /// What type of resource this allocation will be used for. + /// + /// This should be [`Linear`] for buffers and linear images, and [`NonLinear`] for optimal + /// images. You can not bind memory allocated with the [`Linear`] type to optimal images or + /// bind memory allocated with the [`NonLinear`] type to buffers and linear images. You should + /// never use the [`Unknown`] type unless you have to, as that can be less memory efficient. + /// + /// The default value is [`AllocationType::Unknown`]. + /// + /// [`Linear`]: AllocationType::Linear + /// [`NonLinear`]: AllocationType::NonLinear + /// [`Unknown`]: AllocationType::Unknown + pub allocation_type: AllocationType, + + /// The intended usage for the allocation. + /// + /// The default value is [`MemoryUsage::GpuOnly`]. + pub usage: MemoryUsage, + + /// How eager the allocator should be to allocate [`DeviceMemory`]. + /// + /// The default value is [`MemoryAllocatePreference::Unknown`]. + pub allocate_preference: MemoryAllocatePreference, + + /// Allows a dedicated allocation to be created. + /// + /// You should always fill this field in if you are allocating memory for a non-sparse + /// resource, otherwise the allocator won't be able to create a dedicated allocation if one is + /// recommended. + /// + /// This option is silently ignored (treated as `None`) if the device API version is below 1.1 + /// and the [`khr_dedicated_allocation`] extension is not enabled on the device. + /// + /// The default value is [`None`]. + /// + /// [`requirements.prefer_dedicated`]: MemoryRequirements::prefer_dedicated + /// [`khr_dedicated_allocation`]: crate::device::DeviceExtensions::khr_dedicated_allocation + pub dedicated_allocation: Option>, + + pub _ne: crate::NonExhaustive, +} + +impl Default for AllocationCreateInfo<'_> { + #[inline] + fn default() -> Self { + AllocationCreateInfo { + requirements: MemoryRequirements { + size: 0, + alignment: 0, + memory_type_bits: 0, + prefer_dedicated: false, + }, + allocation_type: AllocationType::Unknown, + usage: MemoryUsage::GpuOnly, + allocate_preference: MemoryAllocatePreference::Unknown, + dedicated_allocation: None, + _ne: crate::NonExhaustive(()), + } + } +} + +/// Describes how a memory allocation is going to be used. +/// +/// This is mostly an optimization, except for `MemoryUsage::GpuOnly` which will pick a memory type +/// that is not CPU-accessible if such a type exists. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum MemoryUsage { + /// The memory is intended to only be used by the GPU. + /// + /// Prefers picking a memory type with the [`device_local`] flag and without the + /// [`host_visible`] flag. + /// + /// This option is what you will always want to use unless the memory needs to be accessed by + /// the CPU, because a memory type that can only be accessed by the GPU is going to give the + /// best performance. Example use cases would be textures and other maps which are written to + /// once and then never again, or resources that are only written and read by the GPU, like + /// render targets and intermediary buffers. + /// + /// [`device_local`]: super::MemoryPropertyFlags::device_local + /// [`host_visible`]: super::MemoryPropertyFlags::host_visible + GpuOnly, + + /// The memory is intended for upload to the GPU. + /// + /// Guarantees picking a memory type with the [`host_visible`] flag. Prefers picking one + /// without the [`host_cached`] flag and with the [`device_local`] flag. + /// + /// This option is best suited for resources that need to be constantly updated by the CPU, + /// like vertex and index buffers for example. It is also neccessary for *staging buffers*, + /// whose only purpose in life it is to get data into `device_local` memory or texels into an + /// optimal image. + /// + /// [`host_visible`]: super::MemoryPropertyFlags::host_visible + /// [`host_cached`]: super::MemoryPropertyFlags::host_cached + /// [`device_local`]: super::MemoryPropertyFlags::device_local + Upload, + + /// The memory is intended for download from the GPU. + /// + /// Guarantees picking a memory type with the [`host_visible`] flag. Prefers picking one with + /// the [`host_cached`] flag and without the [`device_local`] flag. + /// + /// This option is best suited if you're using the GPU for things other than rendering and you + /// need to get the results back to the CPU. That might be compute shading, or image or video + /// manipulation, or screenshotting for example. + /// + /// [`host_visible`]: super::MemoryPropertyFlags::host_visible + /// [`host_cached`]: super::MemoryPropertyFlags::host_cached + /// [`device_local`]: super::MemoryPropertyFlags::device_local + Download, +} + +/// Describes whether allocating [`DeviceMemory`] is desired. +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum MemoryAllocatePreference { + /// There is no known preference, let the allocator decide. + Unknown, + + /// The allocator should never allocate `DeviceMemory` and should instead only suballocate from + /// existing blocks. + /// + /// This option is best suited if you can not afford the overhead of allocating `DeviceMemory`. + NeverAllocate, + + /// The allocator should always allocate `DeviceMemory`. + /// + /// This option is best suited if you are allocating a long-lived resource that you know could + /// benefit from having a dedicated allocation. + AlwaysAllocate, +} + +/// Error that can be returned when creating an [allocation] using a [memory allocator]. +/// +/// [allocation]: MemoryAlloc +/// [memory allocator]: MemoryAllocator +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum AllocationCreationError { + /// There is not enough memory on the host. + OutOfHostMemory, + + /// There is not enough memory on the device. + OutOfDeviceMemory, + + /// Too many [`DeviceMemory`] allocations exist already. + TooManyObjects, + + /// Failed to map memory. + MemoryMapFailed, + + /// There is not enough memory in the pool. + /// + /// This is returned when using [`MemoryAllocatePreference::NeverAllocate`] and there is not + /// enough memory in the pool. + OutOfPoolMemory, + + /// The block size for the allocator was exceeded. + /// + /// This is returned when using [`MemoryAllocatePreference::NeverAllocate`] and the allocation + /// size exceeded the block size for all heaps of suitable memory types. + BlockSizeExceeded, + + /// The block size for the suballocator was exceeded. + /// + /// This is returned when using [`GenericMemoryAllocator>>`] if + /// the allocation size exceeded `BLOCK_SIZE`. + SuballocatorBlockSizeExceeded, +} + +impl Error for AllocationCreationError {} + +impl Display for AllocationCreationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Self::OutOfHostMemory => "out of host memory", + Self::OutOfDeviceMemory => "out of device memory", + Self::TooManyObjects => "too many `DeviceMemory` allocations exist already", + Self::MemoryMapFailed => "failed to map memory", + Self::OutOfPoolMemory => "the pool doesn't have enough free space", + Self::BlockSizeExceeded => + "the allocation size was greater than the block size for all heaps of suitable \ + memory types and dedicated allocations were explicitly forbidden", + Self::SuballocatorBlockSizeExceeded => + "the allocation size was greater than the suballocator's block size", + } + ) + } +} + +impl From for AllocationCreationError { + fn from(err: OomError) -> Self { + match err { + OomError::OutOfHostMemory => AllocationCreationError::OutOfHostMemory, + OomError::OutOfDeviceMemory => AllocationCreationError::OutOfDeviceMemory, + } + } +} + +/// Standard memory allocator intended as a global and general-purpose allocator. +/// +/// This type of allocator should work well in most cases, it is however **not** to be used when +/// allocations need to be made very frequently (say, once or more per frame). For that purpose, +/// use [`FastMemoryAllocator`]. +/// +/// See [`FreeListAllocator`] for details about the allocation algorithm and example usage. +pub type StandardMemoryAllocator = GenericMemoryAllocator>; + +impl StandardMemoryAllocator { + /// Creates a new `StandardMemoryAllocator` with default configuration. + pub fn new_default(device: Arc) -> Self { + #[allow(clippy::erasing_op, clippy::identity_op)] + let create_info = GenericMemoryAllocatorCreateInfo { + #[rustfmt::skip] + block_sizes: &[ + (0 * B, 64 * M), + (1 * G, 256 * M), + ], + ..Default::default() + }; + + unsafe { Self::new_unchecked(device, create_info) } + } +} + +/// Fast memory allocator intended as a local and special-purpose allocator. +/// +/// This type of allocator is only useful when you need to allocate a lot, for example once or more +/// per frame. It is **not** to be used when allocations are long-lived. For that purpose use +/// [`StandardMemoryAllocator`]. +/// +/// See [`BumpAllocator`] for details about the allocation algorithm. +pub type FastMemoryAllocator = GenericMemoryAllocator>; + +impl FastMemoryAllocator { + /// Creates a new `FastMemoryAllocator` with default configuration. + pub fn new_default(device: Arc) -> Self { + #[allow(clippy::erasing_op, clippy::identity_op)] + let create_info = GenericMemoryAllocatorCreateInfo { + #[rustfmt::skip] + block_sizes: &[ + ( 0 * B, 16 * M), + (512 * M, 32 * M), + ( 1 * G, 64 * M), + ], + ..Default::default() + }; + + unsafe { Self::new_unchecked(device, create_info) } + } +} + +/// A generic implementation of a [memory allocator]. +/// +/// The allocator keeps a pool of [`DeviceMemory`] blocks for each memory type and uses the type +/// parameter `S` to [suballocate] these blocks. You can also configure the sizes of these blocks. +/// This means that you can have as many `GenericMemoryAllocator`s as you you want for different +/// needs, or for performance reasons, as long as the block sizes are configured properly so that +/// too much memory isn't wasted. +/// +/// See also [the `MemoryAllocator` implementation]. +/// +/// # `DeviceMemory` allocation +/// +/// If an allocation is created with the [`MemoryAllocatePreference::Unknown`] option, and the +/// allocator deems the allocation too big for suballocation (larger than half the block size), or +/// the implementation prefers a dedicated allocation, then that allocation is made a dedicated +/// allocation. Using [`MemoryAllocatePreference::NeverAllocate`], a dedicated allocation is never +/// created, even if the allocation is larger than the block size. In such a case an error is +/// returned instead. Using [`MemoryAllocatePreference::AlwaysAllocate`], a dedicated allocation is +/// always created. +/// +/// In all other cases, `DeviceMemory` is only allocated if a pool runs out of memory and needs +/// another block. No `DeviceMemory` is allocated when the allocator is created, the blocks are +/// only allocated once they are needed. +/// +/// # Locking behavior +/// +/// The allocator never needs to lock while suballocating unless `S` needs to lock. The only time +/// when a pool must be locked is when a new `DeviceMemory` block is allocated for the pool. This +/// means that the allocator is suited to both locking and lock-free (sub)allocation algorithms. +/// +/// [memory allocator]: MemoryAllocator +/// [suballocate]: Suballocator +/// [the `MemoryAllocator` implementation]: Self#impl-MemoryAllocator-for-GenericMemoryAllocator +#[derive(Debug)] +pub struct GenericMemoryAllocator { + device: Arc, + // Each memory type has a pool of `DeviceMemory` blocks. + pools: ArrayVec, MAX_MEMORY_TYPES>, + // Each memory heap has its own block size. + block_sizes: ArrayVec, + allocation_type: AllocationType, + export_handle_types: ArrayVec, + dedicated_allocation: bool, + flags: MemoryAllocateFlags, + // Global mask of memory types. + memory_type_bits: u32, + // How many `DeviceMemory` allocations should be allowed before restricting them. + max_allocations: u32, +} + +#[derive(Debug)] +struct Pool { + blocks: RwLock>, + // This is cached here for faster access, so we don't need to hop through 3 pointers. + memory_type: ash::vk::MemoryType, +} + +impl GenericMemoryAllocator { + // This is a false-positive, we only use this const for static initialization. + #[allow(clippy::declare_interior_mutable_const)] + const EMPTY_POOL: Pool = Pool { + blocks: RwLock::new(Vec::new()), + memory_type: ash::vk::MemoryType { + property_flags: ash::vk::MemoryPropertyFlags::empty(), + heap_index: 0, + }, + }; + + /// Creates a new `GenericMemoryAllocator` using the provided suballocator `S` for + /// suballocation of [`DeviceMemory`] blocks. + /// + /// # Panics + /// + /// - Panics if `create_info.block_sizes` is not sorted by threshold. + /// - Panics if `create_info.block_sizes` contains duplicate thresholds. + /// - Panics if `create_info.block_sizes` does not contain a baseline threshold of `0`. + /// - Panics if the block size for a heap exceeds the size of the heap. + pub fn new( + device: Arc, + create_info: GenericMemoryAllocatorCreateInfo<'_, '_>, + ) -> Result { + Self::validate_new(&device, &create_info)?; + + Ok(unsafe { Self::new_unchecked(device, create_info) }) + } + + fn validate_new( + device: &Device, + create_info: &GenericMemoryAllocatorCreateInfo<'_, '_>, + ) -> Result<(), GenericMemoryAllocatorCreationError> { + let &GenericMemoryAllocatorCreateInfo { + block_sizes, + allocation_type: _, + dedicated_allocation: _, + export_handle_types, + device_address: _, + _ne: _, + } = create_info; + + assert!( + block_sizes.windows(2).all(|win| win[0].0 < win[1].0), + "`create_info.block_sizes` must be sorted by threshold without duplicates", + ); + assert!( + matches!(block_sizes.first(), Some((0, _))), + "`create_info.block_sizes` must contain a baseline threshold `0`", + ); + + if !export_handle_types.is_empty() { + if !(device.api_version() >= Version::V1_1 + && device.enabled_extensions().khr_external_memory) + { + return Err(GenericMemoryAllocatorCreationError::RequirementNotMet { + required_for: "`create_info.export_handle_types` was not empty", + requires_one_of: RequiresOneOf { + api_version: Some(Version::V1_1), + device_extensions: &["khr_external_memory"], + ..Default::default() + }, + }); + } + + assert!( + export_handle_types.len() + == device + .physical_device() + .memory_properties() + .memory_types + .len(), + "`create_info.export_handle_types` must contain as many elements as the number of \ + memory types if not empty", + ); + + for export_handle_types in export_handle_types { + // VUID-VkExportMemoryAllocateInfo-handleTypes-parameter + export_handle_types.validate_device(device)?; + } + } + + Ok(()) + } + + #[cfg_attr(not(feature = "document_unchecked"), doc(hidden))] + pub unsafe fn new_unchecked( + device: Arc, + create_info: GenericMemoryAllocatorCreateInfo<'_, '_>, + ) -> Self { + let GenericMemoryAllocatorCreateInfo { + block_sizes, + allocation_type, + dedicated_allocation, + export_handle_types, + mut device_address, + _ne: _, + } = create_info; + + let MemoryProperties { + memory_types, + memory_heaps, + } = device.physical_device().memory_properties(); + + let mut pools = ArrayVec::new(memory_types.len(), [Self::EMPTY_POOL; MAX_MEMORY_TYPES]); + for (i, memory_type) in memory_types.iter().enumerate() { + pools[i].memory_type = ash::vk::MemoryType { + property_flags: memory_type.property_flags.into(), + heap_index: memory_type.heap_index, + }; + } + + let block_sizes = { + let mut sizes = ArrayVec::new(memory_heaps.len(), [0; MAX_MEMORY_HEAPS]); + for (i, memory_heap) in memory_heaps.iter().enumerate() { + let idx = match block_sizes.binary_search_by_key(&memory_heap.size, |&(t, _)| t) { + Ok(idx) => idx, + Err(idx) => idx.saturating_sub(1), + }; + sizes[i] = block_sizes[idx].1; + + // VUID-vkAllocateMemory-pAllocateInfo-01713 + assert!(sizes[i] <= memory_heap.size); + } + + sizes + }; + + let export_handle_types = { + let mut types = ArrayVec::new( + export_handle_types.len(), + [ExternalMemoryHandleTypes::empty(); MAX_MEMORY_TYPES], + ); + types.copy_from_slice(export_handle_types); + + types + }; + + // VUID-VkMemoryAllocateInfo-flags-03331 + device_address &= device.enabled_features().buffer_device_address + && !device.enabled_extensions().ext_buffer_device_address; + // Providers of `VkMemoryAllocateFlags` + device_address &= + device.api_version() >= Version::V1_1 || device.enabled_extensions().khr_device_group; + + let mut memory_type_bits = u32::MAX; + for (index, MemoryType { property_flags, .. }) in memory_types.iter().enumerate() { + if property_flags.lazily_allocated + || property_flags.protected + || property_flags.device_coherent + || property_flags.device_uncached + || property_flags.rdma_capable + { + // VUID-VkMemoryAllocateInfo-memoryTypeIndex-01872 + // VUID-vkAllocateMemory-deviceCoherentMemory-02790 + // Lazily allocated memory would just cause problems for suballocation in general. + memory_type_bits &= !(1 << index); + } + } + + let max_memory_allocation_count = device + .physical_device() + .properties() + .max_memory_allocation_count; + let max_allocations = max_memory_allocation_count / 4 * 3; + + GenericMemoryAllocator { + device, + pools, + block_sizes, + allocation_type, + export_handle_types, + dedicated_allocation, + flags: MemoryAllocateFlags { + device_address, + ..Default::default() + }, + memory_type_bits, + max_allocations, + } + } + + fn validate_allocate_from_type( + &self, + memory_type_index: u32, + create_info: &SuballocationCreateInfo, + ) { + let memory_type = &self.pools[memory_type_index as usize].memory_type; + // VUID-VkMemoryAllocateInfo-memoryTypeIndex-01872 + assert!( + !memory_type + .property_flags + .contains(ash::vk::MemoryPropertyFlags::PROTECTED) + || self.device.enabled_features().protected_memory, + "attempted to allocate from a protected memory type without the `protected_memory` \ + feature being enabled on the device", + ); + + // VUID-vkAllocateMemory-deviceCoherentMemory-02790 + assert!( + !memory_type.property_flags.intersects( + ash::vk::MemoryPropertyFlags::DEVICE_COHERENT_AMD + | ash::vk::MemoryPropertyFlags::DEVICE_UNCACHED_AMD + ) || self.device.enabled_features().device_coherent_memory, + "attempted to allocate memory from a device-coherent/device-uncached memory type \ + without the `device_coherent_memory` feature being enabled on the device", + ); + + let block_size = self.block_sizes[memory_type.heap_index as usize]; + // VUID-vkAllocateMemory-pAllocateInfo-01713 + assert!( + create_info.size <= block_size, + "attempted to create an allocation larger than the block size for the memory heap", + ); + + create_info.validate(); + } + + fn validate_allocate(&self, create_info: &AllocationCreateInfo<'_>) { + let &AllocationCreateInfo { + requirements, + allocation_type: _, + usage: _, + allocate_preference: _, + dedicated_allocation, + _ne: _, + } = create_info; + + SuballocationCreateInfo::from(create_info.clone()).validate(); + + assert!(requirements.memory_type_bits != 0); + assert!(requirements.memory_type_bits < 1 << self.pools.len()); + + if let Some(dedicated_allocation) = dedicated_allocation { + match dedicated_allocation { + DedicatedAllocation::Buffer(buffer) => { + // VUID-VkMemoryDedicatedAllocateInfo-commonparent + assert_eq!(&self.device, buffer.device()); + + let required_size = buffer.memory_requirements().size; + + // VUID-VkMemoryDedicatedAllocateInfo-buffer-02965 + assert!(requirements.size != required_size); + } + DedicatedAllocation::Image(image) => { + // VUID-VkMemoryDedicatedAllocateInfo-commonparent + assert_eq!(&self.device, image.device()); + + let required_size = image.memory_requirements().size; + + // VUID-VkMemoryDedicatedAllocateInfo-image-02964 + assert!(requirements.size != required_size); + } + } + } + + // VUID-VkMemoryAllocateInfo-pNext-00639 + // VUID-VkExportMemoryAllocateInfo-handleTypes-00656 + // Can't validate, must be ensured by user + } +} + +unsafe impl MemoryAllocator for GenericMemoryAllocator { + fn find_memory_type_index( + &self, + memory_type_bits: u32, + requirements: MemoryTypeFilter, + ) -> Option { + let required_flags = requirements.required_flags.into(); + let preferred_flags = requirements.preferred_flags.into(); + let not_preferred_flags = requirements.not_preferred_flags.into(); + + self.pools + .iter() + .map(|pool| pool.memory_type.property_flags) + .enumerate() + // Filter out memory types which are supported by the memory type bits and have the + // required flags set. + .filter(|&(index, flags)| { + memory_type_bits & (1 << index) != 0 && flags & required_flags == required_flags + }) + // Rank memory types with more of the preferred flags higher, and ones with more of the + // not preferred flags lower. + .min_by_key(|&(_, flags)| { + (!flags & preferred_flags).as_raw().count_ones() + + (flags & not_preferred_flags).as_raw().count_ones() + }) + .map(|(index, _)| index as u32) + } + + /// Allocates memory from a specific memory type. + /// + /// # Panics + /// + /// - Panics if `memory_type_index` is not less than the number of available memory types. + /// - Panics if `memory_type_index` refers to a memory type which has the [`protected`] flag set + /// and the [`protected_memory`] feature is not enabled on the device. + /// - Panics if `create_info.size` is greater than the block size corresponding to the heap that + /// the memory type corresponding to `memory_type_index` resides in. + /// - Panics if `create_info.size` is zero. + /// - Panics if `create_info.alignment` is zero. + /// - Panics if `create_info.alignment` is not a power of two. + /// + /// # Errors + /// + /// - Returns an error if allocating a new block is required and failed. This can be one of the + /// OOM errors or [`TooManyObjects`]. + /// - Returns [`BlockSizeExceeded`] if `S` is `PoolAllocator` and `create_info.size` + /// is greater than `BLOCK_SIZE`. + /// + /// [`protected`]: super::MemoryPropertyFlags::protected + /// [`protected_memory`]: crate::device::Features::protected_memory + /// [`TooManyObjects`]: AllocationCreationError::TooManyObjects + /// [`BlockSizeExceeded`]: AllocationCreationError::BlockSizeExceeded + fn allocate_from_type( + &self, + memory_type_index: u32, + create_info: SuballocationCreateInfo, + ) -> Result { + self.validate_allocate_from_type(memory_type_index, &create_info); + + if self.pools[memory_type_index as usize] + .memory_type + .property_flags + .contains(ash::vk::MemoryPropertyFlags::LAZILY_ALLOCATED) + { + return unsafe { + self.allocate_dedicated_unchecked( + memory_type_index, + create_info.size, + None, + self.export_handle_types[memory_type_index as usize], + ) + }; + } + + unsafe { self.allocate_from_type_unchecked(memory_type_index, create_info, false) } + } + + unsafe fn allocate_from_type_unchecked( + &self, + memory_type_index: u32, + create_info: SuballocationCreateInfo, + never_allocate: bool, + ) -> Result { + let SuballocationCreateInfo { + size, + alignment: _, + allocation_type: _, + _ne: _, + } = create_info; + + let pool = &self.pools[memory_type_index as usize]; + + let mut blocks = if S::IS_BLOCKING { + // If the allocation algorithm needs to block, then there's no point in trying to avoid + // locks here either. In that case the best strategy is to take full advantage of it by + // always taking an exclusive lock, which lets us sort the blocks by free size. If you + // as a user want to avoid locks, simply don't share the allocator between threads. You + // can create as many allocators as you wish, but keep in mind that that will waste a + // huge amount of memory unless you configure your block sizes properly! + + let mut blocks = pool.blocks.write(); + blocks.sort_by_key(Suballocator::free_size); + let (Ok(idx) | Err(idx)) = blocks.binary_search_by_key(&size, Suballocator::free_size); + for block in &blocks[idx..] { + match block.allocate_unchecked(create_info.clone()) { + Ok(alloc) => return Ok(alloc), + Err(SuballocationCreationError::BlockSizeExceeded) => { + return Err(AllocationCreationError::SuballocatorBlockSizeExceeded); + } + Err(_) => {} + } + } + + blocks + } else { + // If the allocation algorithm is lock-free, then we should avoid taking an exclusive + // lock unless it is absolutely neccessary (meaning, only when allocating a new + // `DeviceMemory` block and inserting it into a pool). This has the disadvantage that + // traversing the pool is O(n), which is not a problem since the number of blocks is + // expected to be small. If there are more than 10 blocks in a pool then that's a + // configuration error. Also, sorting the blocks before each allocation would be less + // efficient because to get the free size of the `PoolAllocator` and `BumpAllocator` + // has the same performance as trying to allocate. + + let blocks = pool.blocks.read(); + // Search in reverse order because we always append new blocks at the end. + for block in blocks.iter().rev() { + match block.allocate_unchecked(create_info.clone()) { + Ok(alloc) => return Ok(alloc), + // This can happen when using the `PoolAllocator` if the allocation + // size is greater than `BLOCK_SIZE`. + Err(SuballocationCreationError::BlockSizeExceeded) => { + return Err(AllocationCreationError::SuballocatorBlockSizeExceeded); + } + Err(_) => {} + } + } + + let len = blocks.len(); + drop(blocks); + let blocks = pool.blocks.write(); + if blocks.len() > len { + // Another thread beat us to it and inserted a fresh block, try to allocate from it. + match blocks[len].allocate_unchecked(create_info.clone()) { + Ok(alloc) => return Ok(alloc), + // This can happen if this is the first block that was inserted and when using + // the `PoolAllocator` if the allocation size is greater than + // `BLOCK_SIZE`. + Err(SuballocationCreationError::BlockSizeExceeded) => { + return Err(AllocationCreationError::SuballocatorBlockSizeExceeded); + } + Err(_) => {} + } + } + + blocks + }; + + // For bump allocators, first do a garbage sweep and try to allocate again. + if S::NEEDS_CLEANUP { + blocks.iter_mut().for_each(Suballocator::cleanup); + blocks.sort_unstable_by_key(Suballocator::free_size); + + if let Some(block) = blocks.last() { + if let Ok(alloc) = block.allocate_unchecked(create_info.clone()) { + return Ok(alloc); + } + } + } + + if never_allocate { + return Err(AllocationCreationError::OutOfPoolMemory); + } + + // The pool doesn't have enough real estate, so we need a new block. + let block = { + let block_size = self.block_sizes[pool.memory_type.heap_index as usize]; + let export_handle_types = if !self.export_handle_types.is_empty() { + self.export_handle_types[memory_type_index as usize] + } else { + ExternalMemoryHandleTypes::empty() + }; + let mut i = 0; + + loop { + let allocate_info = MemoryAllocateInfo { + allocation_size: block_size >> i, + memory_type_index, + export_handle_types, + dedicated_allocation: None, + flags: self.flags, + ..Default::default() + }; + match DeviceMemory::allocate_unchecked(self.device.clone(), allocate_info, None) { + Ok(device_memory) => { + break S::new(MemoryAlloc::new_root(device_memory)?); + } + // Retry up to 3 times, halving the allocation size each time. + Err(VulkanError::OutOfHostMemory | VulkanError::OutOfDeviceMemory) if i < 3 => { + i += 1; + } + Err(VulkanError::OutOfHostMemory) => { + return Err(AllocationCreationError::OutOfHostMemory); + } + Err(VulkanError::OutOfDeviceMemory) => { + return Err(AllocationCreationError::OutOfDeviceMemory); + } + Err(VulkanError::TooManyObjects) => { + return Err(AllocationCreationError::TooManyObjects); + } + Err(_) => unreachable!(), + } + } + }; + + blocks.push(block); + let block = blocks.last().unwrap(); + + match block.allocate_unchecked(create_info) { + Ok(alloc) => Ok(alloc), + // This can happen if the block ended up smaller than advertised because there wasn't + // enough memory. + Err(SuballocationCreationError::OutOfRegionMemory) => { + Err(AllocationCreationError::OutOfDeviceMemory) + } + // This can not happen as the block is fresher than Febreze and we're still holding an + // exclusive lock. + Err(SuballocationCreationError::FragmentedRegion) => unreachable!(), + // This can happen if this is the first block that was inserted and when using the + // `PoolAllocator` if the allocation size is greater than `BLOCK_SIZE`. + Err(SuballocationCreationError::BlockSizeExceeded) => { + Err(AllocationCreationError::SuballocatorBlockSizeExceeded) + } + } + } + + /// Allocates memory according to requirements. + /// + /// # Panics + /// + /// - Panics if `create_info.requirements.size` is zero. + /// - Panics if `create_info.requirements.alignment` is zero. + /// - Panics if `create_info.requirements.alignment` is not a power of two. + /// - Panics if `create_info.requirements.memory_type_bits` is zero. + /// - Panics if `create_info.requirements.memory_type_bits` is not less than 2*n* + /// where *n* is the number of available memory types. + /// - Panics if `create_info.dedicated_allocation` is `Some` and + /// `create_info.requirements.size` doesn't match the memory requirements of the resource. + /// - Panics if finding a suitable memory type failed. This only happens if the + /// `create_info.requirements` correspond to those of an optimal image but + /// `create_info.usage` is not [`MemoryUsage::GpuOnly`]. + /// + /// # Errors + /// + /// - Returns an error if allocating a new block is required and failed. This can be one of the + /// OOM errors or [`TooManyObjects`]. + /// - Returns [`OutOfPoolMemory`] if `create_info.allocate_preference` is + /// [`MemoryAllocatePreference::NeverAllocate`] and `create_info.requirements.size` is greater + /// than the block size for all heaps of suitable memory types. + /// - Returns [`BlockSizeExceeded`] if `create_info.allocate_preference` is + /// [`MemoryAllocatePreference::NeverAllocate`] and none of the pools of suitable memory + /// types have enough free space. + /// - Returns [`SuballocatorBlockSizeExceeded`] if `S` is `PoolAllocator` and + /// `create_info.size` is greater than `BLOCK_SIZE` and a dedicated allocation was not + /// created. + /// + /// [`device_local`]: MemoryPropertyFlags::device_local + /// [`host_visible`]: MemoryPropertyFlags::host_visible + /// [`NoSuitableMemoryTypes`]: AllocationCreationError::NoSuitableMemoryTypes + /// [`TooManyObjects`]: AllocationCreationError::TooManyObjects + /// [`SuballocatorBlockSizeExceeded`]: AllocationCreationError::SuballocatorBlockSizeExceeded + /// [`OutOfPoolMemory`]: AllocationCreationError::OutOfPoolMemory + /// [`BlockSizeExceeded`]: AllocationCreationError::BlockSizeExceeded + fn allocate( + &self, + create_info: AllocationCreateInfo<'_>, + ) -> Result { + self.validate_allocate(&create_info); + + unsafe { self.allocate_unchecked(create_info) } + } + + unsafe fn allocate_unchecked( + &self, + create_info: AllocationCreateInfo<'_>, + ) -> Result { + let AllocationCreateInfo { + requirements: + MemoryRequirements { + size, + alignment: _, + mut memory_type_bits, + mut prefer_dedicated, + }, + allocation_type: _, + usage, + allocate_preference, + mut dedicated_allocation, + _ne: _, + } = create_info; + + let create_info = SuballocationCreateInfo::from(create_info); + + memory_type_bits &= self.memory_type_bits; + + let requirements = usage.into(); + + let mut memory_type_index = self + .find_memory_type_index(memory_type_bits, requirements) + .expect("couldn't find a suitable memory type"); + if !self.dedicated_allocation { + dedicated_allocation = None; + } + let export_handle_types = if self.export_handle_types.is_empty() { + ExternalMemoryHandleTypes::empty() + } else { + self.export_handle_types[memory_type_index as usize] + }; + + loop { + let memory_type = self.pools[memory_type_index as usize].memory_type; + let block_size = self.block_sizes[memory_type.heap_index as usize]; + + let res = match allocate_preference { + MemoryAllocatePreference::Unknown => { + if size > block_size / 2 { + prefer_dedicated = true; + } + if self.device.allocation_count() > self.max_allocations && size < block_size { + prefer_dedicated = false; + } + + if prefer_dedicated { + self.allocate_dedicated_unchecked( + memory_type_index, + size, + dedicated_allocation, + export_handle_types, + ) + // Fall back to suballocation. + .or_else(|err| { + if size < block_size { + self.allocate_from_type_unchecked( + memory_type_index, + create_info.clone(), + true, // A dedicated allocation already failed. + ) + .map_err(|_| err) + } else { + Err(err) + } + }) + } else { + self.allocate_from_type_unchecked( + memory_type_index, + create_info.clone(), + false, + ) + // Fall back to dedicated allocation. It is possible that the 1/8 block size + // that was tried was greater than the allocation size, so there's hope. + .or_else(|_| { + self.allocate_dedicated_unchecked( + memory_type_index, + size, + dedicated_allocation, + export_handle_types, + ) + }) + } + } + MemoryAllocatePreference::AlwaysAllocate => self.allocate_dedicated_unchecked( + memory_type_index, + size, + dedicated_allocation, + export_handle_types, + ), + MemoryAllocatePreference::NeverAllocate => { + if size <= block_size { + self.allocate_from_type_unchecked( + memory_type_index, + create_info.clone(), + true, + ) + } else { + Err(AllocationCreationError::BlockSizeExceeded) + } + } + }; + + match res { + Ok(alloc) => return Ok(alloc), + // This is not recoverable. + Err(AllocationCreationError::SuballocatorBlockSizeExceeded) => { + return Err(AllocationCreationError::SuballocatorBlockSizeExceeded); + } + // Try a different memory type. + Err(err) => { + memory_type_bits &= !(1 << memory_type_index); + memory_type_index = self + .find_memory_type_index(memory_type_bits, requirements) + .ok_or(err)?; + } + } + } + } + + unsafe fn allocate_dedicated_unchecked( + &self, + memory_type_index: u32, + allocation_size: DeviceSize, + mut dedicated_allocation: Option>, + export_handle_types: ExternalMemoryHandleTypes, + ) -> Result { + // Providers of `VkMemoryDedicatedAllocateInfo` + if !(self.device.api_version() >= Version::V1_1 + || self.device.enabled_extensions().khr_dedicated_allocation) + { + dedicated_allocation = None; + } + + let is_dedicated = dedicated_allocation.is_some(); + let allocate_info = MemoryAllocateInfo { + allocation_size, + memory_type_index, + dedicated_allocation, + export_handle_types, + flags: self.flags, + ..Default::default() + }; + let device_memory = + DeviceMemory::allocate_unchecked(self.device.clone(), allocate_info, None).map_err( + |err| match err { + VulkanError::OutOfHostMemory => AllocationCreationError::OutOfHostMemory, + VulkanError::OutOfDeviceMemory => AllocationCreationError::OutOfDeviceMemory, + VulkanError::TooManyObjects => AllocationCreationError::TooManyObjects, + _ => unreachable!(), + }, + )?; + + MemoryAlloc::new_inner(device_memory, is_dedicated).map(|mut alloc| { + alloc.set_allocation_type(self.allocation_type); + alloc + }) + } +} + +unsafe impl DeviceOwned for GenericMemoryAllocator { + fn device(&self) -> &Arc { + &self.device + } +} + +/// Parameters to create a new [`GenericMemoryAllocator`]. +#[derive(Clone, Debug)] +pub struct GenericMemoryAllocatorCreateInfo<'b, 'e> { + /// Lets you configure the block sizes for various heap size classes. + /// + /// Each entry is a pair of the threshold for the heap size and the block size that should be + /// used for that heap. Must be sorted by threshold and all thresholds must be unique. Must + /// contain a baseline threshold of 0. + /// + /// The allocator keeps a pool of [`DeviceMemory`] blocks for each memory type, so each memory + /// type that resides in a heap whose size crosses one of the thresholds will use the + /// corresponding block size. If multiple thresholds apply to a given heap, the block size + /// corresponding to the largest threshold is chosen. + /// + /// The block size is going to be the maximum size of a `DeviceMemory` block that is tried. If + /// allocating a block with the size fails, the allocator tries 1/2, 1/4 and 1/8 of the block + /// size in that order until one succeeds, else a dedicated allocation is attempted for the + /// allocation. If an allocation is created with a size greater than half the block size it is + /// always made a dedicated allocation. All of this doesn't apply when using + /// [`MemoryAllocatePreference::NeverAllocate`] however. + /// + /// The default value is `&[]`, which must be overridden. + pub block_sizes: &'b [(Threshold, BlockSize)], + + /// The allocation type that should be used for root allocations. + /// + /// You only need to worry about this if you're using [`PoolAllocator`] as the suballocator, as + /// all suballocations that the pool allocator makes inherit their allocation type from the + /// parent allocation. In all other cases it doesn't matter what this is. + /// + /// The default value is [`AllocationType::Unknown`]. + pub allocation_type: AllocationType, + + /// Whether the allocator should use the dedicated allocation APIs. + /// + /// This means that when the allocator decides that an allocation should not be suballocated, + /// but rather have its own block of [`DeviceMemory`], that that allocation will be made a + /// dedicated allocation. Otherwise they are still made free-standing ([root]) allocations, + /// just not [dedicated] ones. + /// + /// Dedicated allocations are an optimization which may result in better performance, so there + /// really is no reason to disable this option, unless the restrictions that they bring with + /// them are a problem. Namely, a dedicated allocation must only be used for the resource it + /// was created for. Meaning that [reusing the memory] for something else is not possible, + /// [suballocating it] is not possible, and [aliasing it] is also not possible. + /// + /// This option is silently ignored (treated as `false`) if the device API version is below 1.1 + /// and the [`khr_dedicated_allocation`] extension is not enabled on the device. + /// + /// The default value is `true`. + /// + /// [root]: MemoryAlloc::is_root + /// [dedicated]: MemoryAlloc::is_dedicated + /// [reusing the memory]: MemoryAlloc::try_unwrap + /// [suballocating it]: Suballocator + /// [aliasing it]: MemoryAlloc::alias + /// [`khr_dedicated_allocation`]: crate::device::DeviceExtensions::khr_dedicated_allocation + pub dedicated_allocation: bool, + + /// Lets you configure the external memory handle types that the [`DeviceMemory`] blocks will + /// be allocated with. + /// + /// Must be either empty or contain one element for each memory type. When `DeviceMemory` is + /// allocated, the external handle types corresponding to the memory type index are looked up + /// here and used for the allocation. + /// + /// The default value is `&[]`. + pub export_handle_types: &'e [ExternalMemoryHandleTypes], + + /// Whether the allocator should allocate the [`DeviceMemory`] blocks with the + /// [`device_address`] flag set. + /// + /// This is required if you want to allocate memory for buffers that have the + /// [`shader_device_address`] usage set. For this option too, there is no reason to disable it. + /// + /// This option is silently ignored (treated as `false`) if the [`buffer_device_address`] + /// feature is not enabled on the device or if the [`ext_buffer_device_address`] extension is + /// enabled on the device. It is also ignored if the device API version is below 1.1 and the + /// [`khr_device_group`] extension is not enabled on the device. + /// + /// The default value is `true`. + /// + /// [`device_address`]: MemoryAllocateFlags::device_address + /// [`shader_device_address`]: crate::buffer::BufferUsage::shader_device_address + /// [`buffer_device_address`]: crate::device::Features::buffer_device_address + /// [`ext_buffer_device_address`]: crate::device::DeviceExtensions::ext_buffer_device_address + /// [`khr_device_group`]: crate::device::DeviceExtensions::khr_device_group + pub device_address: bool, + + pub _ne: crate::NonExhaustive, +} + +pub type Threshold = DeviceSize; + +pub type BlockSize = DeviceSize; + +impl Default for GenericMemoryAllocatorCreateInfo<'_, '_> { + #[inline] + fn default() -> Self { + GenericMemoryAllocatorCreateInfo { + block_sizes: &[], + allocation_type: AllocationType::Unknown, + dedicated_allocation: true, + export_handle_types: &[], + device_address: true, + _ne: crate::NonExhaustive(()), + } + } +} + +/// Error that can be returned when creating a [`GenericMemoryAllocator`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum GenericMemoryAllocatorCreationError { + RequirementNotMet { + required_for: &'static str, + requires_one_of: RequiresOneOf, + }, +} + +impl Error for GenericMemoryAllocatorCreationError {} + +impl Display for GenericMemoryAllocatorCreationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::RequirementNotMet { + required_for, + requires_one_of, + } => write!( + f, + "a requirement was not met for: {}; requires one of: {}", + required_for, requires_one_of, + ), + } + } +} + +impl From for GenericMemoryAllocatorCreationError { + fn from(err: RequirementNotMet) -> Self { + Self::RequirementNotMet { + required_for: err.required_for, + requires_one_of: err.requires_one_of, + } + } +} + +mod array_vec { + use std::ops::{Deref, DerefMut}; + + /// Minimal implementation of an `ArrayVec`. Useful when a `Vec` is needed but there is a known + /// limit on the number of elements, so that it can occupy real estate on the stack. + #[derive(Clone, Copy, Debug)] + pub(super) struct ArrayVec { + len: usize, + data: [T; N], + } + + impl ArrayVec { + pub fn new(len: usize, data: [T; N]) -> Self { + assert!(len <= N); + + ArrayVec { len, data } + } + } + + impl Deref for ArrayVec { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + // SAFETY: `self.len <= N`. + unsafe { self.data.get_unchecked(0..self.len) } + } + } + + impl DerefMut for ArrayVec { + fn deref_mut(&mut self) -> &mut Self::Target { + // SAFETY: `self.len <= N`. + unsafe { self.data.get_unchecked_mut(0..self.len) } + } + } +} diff --git a/vulkano/src/memory/allocator/suballocator.rs b/vulkano/src/memory/allocator/suballocator.rs new file mode 100644 index 00000000..977f96d0 --- /dev/null +++ b/vulkano/src/memory/allocator/suballocator.rs @@ -0,0 +1,3090 @@ +//! Suballocators are used to divide a *region* into smaller *suballocations*. +//! +//! See also [the parent module] for details about memory allocation in Vulkan. +//! +//! [the parent module]: super + +use self::host::SlotId; +use super::{array_vec::ArrayVec, AllocationCreateInfo, AllocationCreationError}; +use crate::{ + device::{Device, DeviceOwned}, + image::ImageTiling, + memory::DeviceMemory, + DeviceSize, OomError, VulkanError, VulkanObject, +}; +use crossbeam_queue::ArrayQueue; +use parking_lot::Mutex; +use std::{ + cell::Cell, + error::Error, + ffi::c_void, + fmt::{self, Display}, + mem::{self, ManuallyDrop, MaybeUninit}, + num::NonZeroU64, + ops::Range, + ptr::{self, NonNull}, + slice, + sync::{ + atomic::{AtomicU64, Ordering}, + Arc, + }, +}; + +/// Memory allocations are portions of memory that are reserved for a specific resource or purpose. +/// +/// There's a few ways you can obtain a `MemoryAlloc` in Vulkano. Most commonly you will probably +/// want to use a [memory allocator]. If you already have a [`DeviceMemory`] block on hand that you +/// would like to turn into an allocation, you can use one of the constructors. Lastly, you can use +/// a [suballocator] if you want to create multiple smaller allocations out of a bigger one. +/// +/// [memory allocator]: super::MemoryAllocator +/// [suballocator]: Suballocator +#[derive(Debug)] +pub struct MemoryAlloc { + offset: DeviceSize, + size: DeviceSize, + // Needed when binding resources to the allocation in order to avoid aliasing memory. + allocation_type: AllocationType, + // Mapped pointer to the start of the allocation or `None` is the memory is not host-visible. + mapped_ptr: Option>, + // Used by the suballocators to align allocations to the non-coherent atom size when the memory + // type is host-visible but not host-coherent. This will be `None` for any other memory type. + atom_size: Option, + // Used in the `Drop` impl to free the allocation if required. + parent: AllocParent, +} + +#[derive(Debug)] +enum AllocParent { + FreeList { + allocator: Arc, + id: SlotId, + }, + Buddy { + allocator: Arc, + order: usize, + offset: DeviceSize, + }, + Pool { + allocator: Arc, + index: DeviceSize, + }, + Bump(Arc), + Root(Arc), + Dedicated(DeviceMemory), +} + +// It is safe to share `mapped_ptr` between threads because the user would have to use unsafe code +// themself to get UB in the first place. +unsafe impl Send for MemoryAlloc {} +unsafe impl Sync for MemoryAlloc {} + +impl MemoryAlloc { + /// Creates a new root allocation. + /// + /// The memory is mapped automatically if it's host-visible. + #[inline] + pub fn new_root(device_memory: DeviceMemory) -> Result { + Self::new_inner(device_memory, false) + } + + /// Creates a new dedicated allocation. + /// + /// The memory is mapped automatically if it's host-visible. + #[inline] + pub fn new_dedicated(device_memory: DeviceMemory) -> Result { + Self::new_inner(device_memory, true) + } + + pub(super) fn new_inner( + device_memory: DeviceMemory, + dedicated: bool, + ) -> Result { + let device = device_memory.device(); + let physical_device = device.physical_device(); + let memory_type_index = device_memory.memory_type_index(); + let property_flags = &physical_device.memory_properties().memory_types + [memory_type_index as usize] + .property_flags; + + let mapped_ptr = if property_flags.host_visible { + let fns = device.fns(); + let mut output = MaybeUninit::uninit(); + // This is always valid because we are mapping the whole range. + unsafe { + (fns.v1_0.map_memory)( + device.handle(), + device_memory.handle(), + 0, + ash::vk::WHOLE_SIZE, + ash::vk::MemoryMapFlags::empty(), + output.as_mut_ptr(), + ) + .result() + .map_err(|err| match err.into() { + VulkanError::OutOfHostMemory => AllocationCreationError::OutOfHostMemory, + VulkanError::OutOfDeviceMemory => AllocationCreationError::OutOfDeviceMemory, + VulkanError::MemoryMapFailed => AllocationCreationError::MemoryMapFailed, + _ => unreachable!(), + })?; + + NonNull::new(output.assume_init()) + } + } else { + None + }; + + let atom_size = (property_flags.host_visible && !property_flags.host_coherent) + .then_some(physical_device.properties().non_coherent_atom_size) + .and_then(NonZeroU64::new); + + Ok(MemoryAlloc { + offset: 0, + size: device_memory.allocation_size(), + allocation_type: AllocationType::Unknown, + mapped_ptr, + atom_size, + parent: if dedicated { + AllocParent::Dedicated(device_memory) + } else { + AllocParent::Root(Arc::new(device_memory)) + }, + }) + } + + /// Returns the offset of the allocation within the [`DeviceMemory`] block. + #[inline] + pub fn offset(&self) -> DeviceSize { + self.offset + } + + /// Returns the size of the allocation. + #[inline] + pub fn size(&self) -> DeviceSize { + self.size + } + + /// Returns the type of resources that can be bound to this allocation. + #[inline] + pub fn allocation_type(&self) -> AllocationType { + self.allocation_type + } + + /// Returns the mapped pointer to the start of the allocation if the memory is host-visible, + /// otherwise returns [`None`]. + #[inline] + pub fn mapped_ptr(&self) -> Option> { + self.mapped_ptr + } + + /// Returns a mapped slice to the data within the allocation if the memory is host-visible, + /// otherwise returns [`None`]. + /// + /// # Safety + /// + /// - While the returned slice exists, there must be no operations pending or executing in a + /// GPU queue that write to the same memory. + #[inline] + pub unsafe fn mapped_slice(&self) -> Option<&[u8]> { + self.mapped_ptr + .map(|ptr| slice::from_raw_parts(ptr.as_ptr().cast(), self.size as usize)) + } + + /// Returns a mapped mutable slice to the data within the allocation if the memory is + /// host-visible, otherwise returns [`None`]. + /// + /// # Safety + /// + /// - While the returned slice exists, there must be no operations pending or executing in a + /// GPU queue that access the same memory. + #[inline] + pub unsafe fn mapped_slice_mut(&mut self) -> Option<&mut [u8]> { + self.mapped_ptr + .map(|ptr| slice::from_raw_parts_mut(ptr.as_ptr().cast(), self.size as usize)) + } + + pub(crate) unsafe fn write(&self, range: Range) -> Option<&mut [u8]> { + debug_assert!(!range.is_empty() && range.end <= self.size); + + self.mapped_ptr.map(|ptr| { + slice::from_raw_parts_mut( + ptr.as_ptr().add(range.start as usize).cast(), + (range.end - range.start) as usize, + ) + }) + } + + /// Invalidates the host (CPU) cache for a range of the allocation. + /// + /// You must call this method before the memory is read by the host, if the device previously + /// wrote to the memory. It has no effect if the memory is not mapped or if the memory is + /// [host-coherent]. + /// + /// `range` is specified in bytes relative to the start of the allocation. The start and end of + /// `range` must be a multiple of the [`non_coherent_atom_size`] device property, but + /// `range.end` can also equal to `self.size()`. + /// + /// # Safety + /// + /// - If there are memory writes by the GPU that have not been propagated into the CPU cache, + /// then there must not be any references in Rust code to the specified `range` of the memory. + /// + /// # Panics + /// + /// - Panics if `range` is empty. + /// - Panics if `range.end` exceeds `self.size`. + /// - Panics if `range.start` or `range.end` are not a multiple of the `non_coherent_atom_size`. + /// + /// [host-coherent]: super::MemoryPropertyFlags::host_coherent + /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size + #[inline] + pub unsafe fn invalidate_range(&self, range: Range) -> Result<(), OomError> { + // VUID-VkMappedMemoryRange-memory-00684 + if let Some(atom_size) = self.atom_size { + let range = self.create_memory_range(range, atom_size.get()); + let device = self.device(); + let fns = device.fns(); + (fns.v1_0.invalidate_mapped_memory_ranges)(device.handle(), 1, &range) + .result() + .map_err(VulkanError::from)?; + } else { + // FIXME: + // self.debug_validate_memory_range(&range); + } + + Ok(()) + } + + /// Flushes the host (CPU) cache for a range of the allocation. + /// + /// You must call this method after writing to the memory from the host, if the device is going + /// to read the memory. It has no effect if the memory is not mapped or if the memory is + /// [host-coherent]. + /// + /// `range` is specified in bytes relative to the start of the allocation. The start and end of + /// `range` must be a multiple of the [`non_coherent_atom_size`] device property, but + /// `range.end` can also equal to `self.size()`. + /// + /// # Safety + /// + /// - There must be no operations pending or executing in a GPU queue that access the specified + /// `range` of the memory. + /// + /// # Panics + /// + /// - Panics if `range` is empty. + /// - Panics if `range.end` exceeds `self.size`. + /// - Panics if `range.start` or `range.end` are not a multiple of the `non_coherent_atom_size`. + /// + /// [host-coherent]: super::MemoryPropertyFlags::host_coherent + /// [`non_coherent_atom_size`]: crate::device::Properties::non_coherent_atom_size + #[inline] + pub unsafe fn flush_range(&self, range: Range) -> Result<(), OomError> { + // VUID-VkMappedMemoryRange-memory-00684 + if let Some(atom_size) = self.atom_size { + let range = self.create_memory_range(range, atom_size.get()); + let device = self.device(); + let fns = device.fns(); + (fns.v1_0.flush_mapped_memory_ranges)(device.handle(), 1, &range) + .result() + .map_err(VulkanError::from)?; + } else { + // FIXME: + // self.debug_validate_memory_range(&range); + } + + Ok(()) + } + + fn create_memory_range( + &self, + range: Range, + atom_size: DeviceSize, + ) -> ash::vk::MappedMemoryRange { + assert!(!range.is_empty() && range.end <= self.size); + + // VUID-VkMappedMemoryRange-size-00685 + // Guaranteed because we always map the entire `DeviceMemory`. + + // VUID-VkMappedMemoryRange-offset-00687 + // VUID-VkMappedMemoryRange-size-01390 + assert!( + range.start % atom_size == 0 && (range.end % atom_size == 0 || range.end == self.size) + ); + + // VUID-VkMappedMemoryRange-offset-00687 + // Guaranteed as long as `range.start` is aligned because the suballocators always align + // `self.offset` to the non-coherent atom size for non-coherent host-visible memory. + let offset = self.offset + range.start; + + let mut size = range.end - range.start; + let device_memory = self.device_memory(); + + // VUID-VkMappedMemoryRange-size-01390 + if offset + size < device_memory.allocation_size() { + // We align the size in case `range.end == self.size`. We can do this without aliasing + // other allocations because the suballocators ensure that all allocations are aligned + // to the atom size for non-coherent host-visible memory. + size = align_up(size, atom_size); + } + + ash::vk::MappedMemoryRange { + memory: device_memory.handle(), + offset, + size, + ..Default::default() + } + } + + /// This exists because even if no cache control is required, the parameters should still be + /// valid, otherwise you might have bugs in your code forever just because your memory happens + /// to be host-coherent. + #[allow(dead_code)] + fn debug_validate_memory_range(&self, range: &Range) { + debug_assert!(!range.is_empty() && range.end <= self.size); + debug_assert!({ + let atom_size = self + .device() + .physical_device() + .properties() + .non_coherent_atom_size; + + range.start % atom_size == 0 && (range.end % atom_size == 0 || range.end == self.size) + }); + } + + /// Returns the underlying block of [`DeviceMemory`]. + #[inline] + pub fn device_memory(&self) -> &DeviceMemory { + match &self.parent { + AllocParent::FreeList { allocator, .. } => &allocator.device_memory, + AllocParent::Buddy { allocator, .. } => &allocator.device_memory, + AllocParent::Pool { allocator, .. } => &allocator.device_memory, + AllocParent::Bump(allocator) => &allocator.device_memory, + AllocParent::Root(device_memory) => device_memory, + AllocParent::Dedicated(device_memory) => device_memory, + } + } + + /// Returns the parent allocation if this allocation is a [suballocation], otherwise returns + /// [`None`]. + /// + /// [suballocation]: Suballocator + #[inline] + pub fn parent_allocation(&self) -> Option<&Self> { + match &self.parent { + AllocParent::FreeList { allocator, .. } => Some(&allocator.region), + AllocParent::Buddy { allocator, .. } => Some(&allocator.region), + AllocParent::Pool { allocator, .. } => Some(&allocator.region), + AllocParent::Bump(allocator) => Some(&allocator.region), + AllocParent::Root(_) => None, + AllocParent::Dedicated(_) => None, + } + } + + /// Returns `true` if this allocation is the root of the [memory hierarchy]. + /// + /// [memory hierarchy]: Suballocator#memory-hierarchies + #[inline] + pub fn is_root(&self) -> bool { + matches!(&self.parent, AllocParent::Root(_)) + } + + /// Returns `true` if this allocation is a [dedicated allocation]. + /// + /// [dedicated allocation]: crate::memory::MemoryAllocateInfo#structfield.dedicated_allocation + #[inline] + pub fn is_dedicated(&self) -> bool { + matches!(&self.parent, AllocParent::Dedicated(_)) + } + + /// Returns the underlying block of [`DeviceMemory`] if this allocation [is the root + /// allocation] and is not [aliased], otherwise returns the allocation back wrapped in [`Err`]. + /// + /// [is the root allocation]: Self::is_root + /// [aliased]: Self::alias + #[inline] + pub fn try_unwrap(self) -> Result { + let this = ManuallyDrop::new(self); + + // SAFETY: This is safe because even if a panic happens, `self.parent` can not be + // double-freed since `self` was wrapped in `ManuallyDrop`. If we fail to unwrap the + // `DeviceMemory`, the copy of `self.parent` is forgotten and only then is the + // `ManuallyDrop` wrapper removed from `self`. + match unsafe { ptr::read(&this.parent) } { + AllocParent::Root(device_memory) => { + Arc::try_unwrap(device_memory).map_err(|device_memory| { + mem::forget(device_memory); + ManuallyDrop::into_inner(this) + }) + } + parent => { + mem::forget(parent); + Err(ManuallyDrop::into_inner(this)) + } + } + } + + /// Duplicates the allocation, creating aliased memory. Returns [`None`] if the allocation [is + /// a dedicated allocation]. + /// + /// You might consider using this method if you want to optimize memory usage by aliasing + /// render targets for example, in which case you will have to double and triple check that the + /// memory is not used concurrently unless it only involves reading. You are highly discouraged + /// from doing this unless you have a reason to. + /// + /// # Safety + /// + /// - You must ensure memory accesses are synchronized yourself. + /// + /// [memory hierarchy]: Suballocator#memory-hierarchies + /// [is a dedicated allocation]: Self::is_dedicated + #[inline] + pub unsafe fn alias(&self) -> Option { + self.root().map(|device_memory| MemoryAlloc { + parent: AllocParent::Root(device_memory.clone()), + ..*self + }) + } + + fn root(&self) -> Option<&Arc> { + match &self.parent { + AllocParent::FreeList { allocator, .. } => Some(&allocator.device_memory), + AllocParent::Buddy { allocator, .. } => Some(&allocator.device_memory), + AllocParent::Pool { allocator, .. } => Some(&allocator.device_memory), + AllocParent::Bump(allocator) => Some(&allocator.device_memory), + AllocParent::Root(device_memory) => Some(device_memory), + AllocParent::Dedicated(_) => None, + } + } + + /// Increases the offset of the allocation by the specified `amount` and shrinks its size by + /// the same amount. + /// + /// # Panics + /// + /// - Panics if the `amount` exceeds the size of the allocation. + #[inline] + pub fn shift(&mut self, amount: DeviceSize) { + assert!(amount <= self.size); + + self.offset += amount; + self.size -= amount; + } + + /// Shrinks the size of the allocation to the specified `new_size`. + /// + /// # Panics + /// + /// - Panics if the `new_size` exceeds the current size of the allocation. + #[inline] + pub fn shrink(&mut self, new_size: DeviceSize) { + assert!(new_size <= self.size); + + self.size = new_size; + } + + /// Sets the offset of the allocation without checking for memory aliasing. + /// + /// See also [`shift`], which moves the offset safely. + /// + /// # Safety + /// + /// - You must ensure that the allocation doesn't alias any other allocations within the + /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are + /// synchronized yourself. + /// - You must ensure the allocation still fits inside the `DeviceMemory` block. + /// + /// [`shift`]: Self::shift + #[inline] + pub unsafe fn set_offset(&mut self, new_offset: DeviceSize) { + self.offset = new_offset; + } + + /// Sets the size of the allocation without checking for memory aliasing. + /// + /// See also [`shrink`], which sets the size safely. + /// + /// # Safety + /// + /// - You must ensure that the allocation doesn't alias any other allocations within the + /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are + /// synchronized yourself. + /// - You must ensure the allocation still fits inside the `DeviceMemory` block. + /// + /// [`shrink`]: Self::shrink + #[inline] + pub unsafe fn set_size(&mut self, new_size: DeviceSize) { + self.size = new_size; + } + + /// Sets the allocation type. + /// + /// This might cause memory aliasing due to [buffer-image granularity] conflicts if the + /// allocation type is [`Linear`] or [`NonLinear`] and is changed to a different one. + /// + /// # Safety + /// + /// - You must ensure that the allocation doesn't alias any other allocations within the + /// [`DeviceMemory`] block, and if it does, then you must ensure memory accesses are + /// synchronized yourself. + /// + /// [buffer-image granularity]: super#buffer-image-granularity + /// [`Linear`]: AllocationType::Linear + /// [`NonLinear`]: AllocationType::NonLinear + #[inline] + pub unsafe fn set_allocation_type(&mut self, new_type: AllocationType) { + self.allocation_type = new_type; + } +} + +impl Drop for MemoryAlloc { + #[inline] + fn drop(&mut self) { + match &self.parent { + AllocParent::FreeList { allocator, id } => { + allocator.free(*id); + } + AllocParent::Buddy { + allocator, + order, + offset, + } => { + allocator.free(*order, *offset); + } + AllocParent::Pool { allocator, index } => { + allocator.free(*index); + } + // The bump allocator can't free individually, but we need to keep a reference to it so + // it don't get reset or dropped while in use. + AllocParent::Bump(_) => {} + // A root allocation frees itself once all references to the `DeviceMemory` are dropped. + AllocParent::Root(_) => {} + // Dedicated allocations free themselves when the `DeviceMemory` is dropped. + AllocParent::Dedicated(_) => {} + } + } +} + +unsafe impl DeviceOwned for MemoryAlloc { + #[inline] + fn device(&self) -> &Arc { + self.device_memory().device() + } +} + +/// Suballocators are used to divide a *region* into smaller *suballocations*. +/// +/// # Regions +/// +/// As the name implies, a region is a contiguous portion of memory. It may be the whole dedicated +/// block of [`DeviceMemory`], or only a part of it. Regions are just [allocations] like any other, +/// but we use this term to refer specifically to an allocation that is to be suballocated. Every +/// suballocator is created with a region to work with. +/// +/// # Free-lists +/// +/// A free-list, also kind of predictably, refers to a list of (sub)allocations within a region +/// that are currently free. Every (sub)allocator that can free allocations dynamically (in any +/// order) needs to keep a free-list of some sort. This list is then consulted when new allocations +/// are made, and can be used to coalesce neighboring allocations that are free into bigger ones. +/// +/// # Memory hierarchies +/// +/// Different applications have wildly different allocation needs, and there's no way to cover them +/// all with a single type of allocator. Furthermore, different allocators have different +/// trade-offs and are best suited to specific tasks. To account for all possible use-cases, +/// Vulkano offers the ability to create *memory hierarchies*. We refer to the [`DeviceMemory`] as +/// the root of any such hierarchy, even though technically the driver has levels that are further +/// up, because those `DeviceMemory` blocks need to be allocated from physical memory [pages] +/// themselves, but since those levels are not accessible to us we don't need to consider them. You +/// can create any number of levels/branches from there, bounded only by the amount of available +/// memory within a `DeviceMemory` block. You can suballocate the root into regions, which are then +/// suballocated into further regions and so on, creating hierarchies of arbitrary height. +/// +/// As an added bonus, memory hierarchies lend themselves perfectly to the concept of composability +/// we all love so much, making them a natural fit for Rust. For one, a region can be allocated any +/// way, and fed into any suballocator. Also, once you are done with a branch of a hierarchy, +/// meaning there are no more suballocations in use within the region of that branch, and you would +/// like to reuse the region, you can do so safely! All suballocators have a `try_into_region` +/// method for this purpose. This means that you can replace one suballocator with another without +/// consulting any of the higher levels in the hierarchy. +/// +/// # Examples +/// +/// Allocating a region to suballocatate: +/// +/// ``` +/// use vulkano::memory::{DeviceMemory, MemoryAllocateInfo, MemoryType}; +/// use vulkano::memory::allocator::MemoryAlloc; +/// # let device: std::sync::Arc = return; +/// +/// // First you need to find a suitable memory type. +/// let memory_type_index = device +/// .physical_device() +/// .memory_properties() +/// .memory_types +/// .iter() +/// .enumerate() +/// // In a real-world scenario, you would probably want to rank the memory types based on your +/// // requirements, instead of picking the first one that satisfies them. Also, you have to +/// // take the requirements of the resources you want to allocate memory for into consideration. +/// .find_map(|(index, MemoryType { property_flags, .. })| { +/// property_flags.device_local.then_some(index) +/// }) +/// .unwrap() as u32; +/// +/// let region = MemoryAlloc::new_root( +/// DeviceMemory::allocate( +/// device.clone(), +/// MemoryAllocateInfo { +/// allocation_size: 64 * 1024 * 1024, +/// memory_type_index, +/// ..Default::default() +/// }, +/// ) +/// .unwrap(), +/// ); +/// +/// // You can now feed `region` into any suballocator. +/// ``` +/// +/// # Implementing the trait +/// +/// Please don't. +/// +/// [allocations]: MemoryAlloc +/// [pages]: super#pages +pub unsafe trait Suballocator: DeviceOwned { + /// Whether this allocator needs to block or not. + /// + /// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the + /// suballocator at compile time. + /// + /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator + const IS_BLOCKING: bool; + + /// Whether the allocator needs [`cleanup`] to be called before memory can be released. + /// + /// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the + /// suballocator at compile time. + /// + /// [`cleanup`]: Self::cleanup + /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator + const NEEDS_CLEANUP: bool; + + /// Creates a new suballocator for the given [region]. + /// + /// [region]: Self#regions + fn new(region: MemoryAlloc) -> Self + where + Self: Sized; + + /// Creates a new suballocation within the [region]. + /// + /// [region]: Self#regions + fn allocate( + &self, + create_info: SuballocationCreateInfo, + ) -> Result; + + /// Creates a new suballocation within the [region] without checking the parameters. + /// + /// # Safety + /// + /// - `create_info.size` must not be zero. + /// - `create_info.alignment` must not be zero. + /// - `create_info.alignment` must be a power of two. + /// + /// [region]: Self#regions + /// [`allocate`]: Self::allocate + #[cfg_attr(not(feature = "document_unchecked"), doc(hidden))] + unsafe fn allocate_unchecked( + &self, + create_info: SuballocationCreateInfo, + ) -> Result; + + /// Returns a reference to the underlying [region]. + /// + /// [region]: Self#regions + fn region(&self) -> &MemoryAlloc; + + /// Returns the underlying [region] if there are no other strong references to the allocator, + /// otherwise hands you back the allocator wrapped in [`Err`]. Allocations made with the + /// allocator count as references for as long as they are alive. + /// + /// [region]: Self#regions + fn try_into_region(self) -> Result + where + Self: Sized; + + /// Returns the total amount of free space that is left in the [region]. + /// + /// [region]: Self#regions + fn free_size(&self) -> DeviceSize; + + /// Tries to free some space, if applicable. + fn cleanup(&mut self); +} + +/// Parameters to create a new [allocation] using a [suballocator]. +/// +/// [allocation]: MemoryAlloc +/// [suballocator]: Suballocator +#[derive(Clone, Debug)] +pub struct SuballocationCreateInfo { + /// Size of the allocation in bytes. + /// + /// The default value is `0`, which must be overridden. + pub size: DeviceSize, + + /// [Alignment] of the allocation in bytes. Must be a power of 2. + /// + /// The default value is `0`, which must be overridden. + /// + /// [Alignment]: super#alignment + pub alignment: DeviceSize, + + /// Type of resources that can be bound to the allocation. + /// + /// The default value is [`AllocationType::Unknown`]. + pub allocation_type: AllocationType, + + pub _ne: crate::NonExhaustive, +} + +impl Default for SuballocationCreateInfo { + #[inline] + fn default() -> Self { + SuballocationCreateInfo { + size: 0, + alignment: 0, + allocation_type: AllocationType::Unknown, + _ne: crate::NonExhaustive(()), + } + } +} + +impl From> for SuballocationCreateInfo { + #[inline] + fn from(create_info: AllocationCreateInfo<'_>) -> Self { + SuballocationCreateInfo { + size: create_info.requirements.size, + alignment: create_info.requirements.alignment, + allocation_type: create_info.allocation_type, + _ne: crate::NonExhaustive(()), + } + } +} + +impl SuballocationCreateInfo { + pub(super) fn validate(&self) { + assert!(self.size > 0); + assert!(self.alignment > 0); + assert!(self.alignment.is_power_of_two()); + } +} + +/// Tells the [suballocator] what type of resource will be bound to the allocation, so that it can +/// optimize memory usage while still respecting the [buffer-image granularity]. +/// +/// [suballocator]: Suballocator +/// [buffer-image granularity]: super#buffer-image-granularity +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum AllocationType { + /// The type of resource is unknown, it might be either linear or non-linear. What this means is + /// that allocations created with this type must always be aligned to the buffer-image + /// granularity. + Unknown = 0, + + /// The resource is linear, e.g. buffers, linear images. A linear allocation following another + /// linear allocation never needs to be aligned to the buffer-image granularity. + Linear = 1, + + /// The resource is non-linear, e.g. optimal images. A non-linear allocation following another + /// non-linear allocation never needs to be aligned to the buffer-image granularity. + NonLinear = 2, +} + +impl From for AllocationType { + #[inline] + fn from(tiling: ImageTiling) -> Self { + match tiling { + ImageTiling::Optimal => AllocationType::NonLinear, + ImageTiling::Linear => AllocationType::Linear, + } + } +} + +/// Error that can be returned when using a [suballocator]. +/// +/// [suballocator]: Suballocator +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum SuballocationCreationError { + /// There is no more space available in the region. + OutOfRegionMemory, + + /// The region has enough free space to satisfy the request but is too fragmented. + FragmentedRegion, + + /// The allocation was larger than the allocator's block size, meaning that this error would + /// arise with the parameters no matter the state the allocator was in. + /// + /// This can be used to let the [`GenericMemoryAllocator`] know that allocating a new block of + /// [`DeviceMemory`] and trying to suballocate it with the same parameters would not solve the + /// issue. + /// + /// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator + BlockSizeExceeded, +} + +impl Error for SuballocationCreationError {} + +impl Display for SuballocationCreationError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "{}", + match self { + Self::OutOfRegionMemory => "out of region memory", + Self::FragmentedRegion => "the region is too fragmented", + Self::BlockSizeExceeded => + "the allocation size was greater than the suballocator's block size", + } + ) + } +} + +/// A [suballocator] that uses the most generic [free-list]. +/// +/// The strength of this allocator is that it can create and free allocations completely +/// dynamically, which means they can be any size and created/freed in any order. The downside is +/// that this always leads to horrific [external fragmentation] the more such dynamic allocations +/// are made. Therefore, this allocator is best suited for long-lived allocations. If you need +/// to create allocations of various sizes, but can't afford this fragmentation, then the +/// [`BuddyAllocator`] is your best buddy. If you need to create allocations which share a similar +/// size, consider the [`PoolAllocator`]. Lastly, if you need to allocate very often, then +/// [`BumpAllocator`] is best suited. +/// +/// See also [the `Suballocator` implementation]. +/// +/// # Algorithm +/// +/// The free-list stores suballocations which can have any offset and size. When an allocation +/// request is made, the list is searched using the best-fit strategy, meaning that the smallest +/// suballocation that fits the request is chosen. If required, the chosen suballocation is trimmed +/// at the ends and the ends are returned to the free-list. As such, no [internal fragmentation] +/// occurs. The front might need to be trimmed because of [alignment requirements] and the end +/// because of a larger than required size. When an allocation is freed, the allocator checks if +/// the adjacent suballocations are free, and if so it coalesces them into a bigger one before +/// putting it in the free-list. +/// +/// # Efficiency +/// +/// The allocator is synchronized internally with a lock, which is held only for a very short +/// period each time an allocation is created and freed. The free-list is sorted by size, which +/// means that when allocating, finding a best-fit is always possible in *O*(log(*n*)) time in the +/// worst case. When freeing, the coalescing requires us to remove the adjacent free suballocations +/// from the free-list which is *O*(log(*n*)), and insert the possibly coalesced suballocation into +/// the free-list which has the same time complexity, so in total freeing is *O*(log(*n*)). +/// +/// There is one notable edge-case: after the allocator finds a best-fit, it is possible that it +/// needs to align the suballocation's offset to a higher value, after which the requested size +/// might no longer fit. In such a case, the next free suballocation in sorted order is tried until +/// a fit is successful. If this issue is encountered with all candidates, then the time complexity +/// would be *O*(*n*). However, this scenario is extremely unlikely which is why we are not +/// considering it in the above analysis. Additionally, if your free-list is filled with +/// allocations that all have the same size then that seems pretty sus. Sounds like you're in dire +/// need of a `PoolAllocator`. +/// +/// # Examples +/// +/// Most commonly you will not want to use this suballocator directly but rather use it within +/// [`GenericMemoryAllocator`], having one global [`StandardMemoryAllocator`] for most if not all +/// of your allocation needs. +/// +/// Basic usage as a global allocator for long-lived resources: +/// +/// ``` +/// use vulkano::format::Format; +/// use vulkano::image::{ImageDimensions, ImmutableImage}; +/// use vulkano::memory::allocator::StandardMemoryAllocator; +/// # let device: std::sync::Arc = return; +/// +/// let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); +/// +/// # fn read_textures() -> Vec> { Vec::new() } +/// # let mut command_buffer_builder: vulkano::command_buffer::AutoCommandBufferBuilder> = return; +/// // Allocate some resources. +/// let textures_data: Vec> = read_textures(); +/// let textures = textures_data.into_iter().map(|data| { +/// ImmutableImage::from_iter( +/// &memory_allocator, +/// data, +/// ImageDimensions::Dim2d { +/// width: 1024, +/// height: 1024, +/// array_layers: 1, +/// }, +/// 1.into(), +/// Format::R8G8B8A8_UNORM, +/// &mut command_buffer_builder, +/// ) +/// .unwrap() +/// }); +/// ``` +/// +/// For use in allocating buffers for [`CpuBufferPool`]: +/// +/// ``` +/// use std::sync::Arc; +/// use vulkano::buffer::CpuBufferPool; +/// use vulkano::memory::allocator::StandardMemoryAllocator; +/// # let device: std::sync::Arc = return; +/// +/// // We need to wrap the allocator in an `Arc` so that we can share ownership of it. +/// let memory_allocator = Arc::new(StandardMemoryAllocator::new_default(device.clone())); +/// let buffer_pool = CpuBufferPool::::upload(memory_allocator.clone()); +/// +/// // You can continue using `memory_allocator` for other things. +/// ``` +/// +/// Sometimes, it is neccessary to suballocate an allocation. If you don't want to allocate new +/// [`DeviceMemory`] blocks to suballocate, perhaps because of concerns of memory wastage or +/// allocation efficiency, you can use your existing global `StandardMemoryAllocator` to allocate +/// regions for your suballocation needs: +/// +/// ``` +/// use vulkano::memory::allocator::{MemoryAllocator, StandardMemoryAllocator, SuballocationCreateInfo}; +/// +/// # let device: std::sync::Arc = return; +/// let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); +/// +/// # let memory_type_index = 0; +/// let region = memory_allocator.allocate_from_type( +/// // When choosing the index, you have to make sure that the memory type is allowed for the +/// // type of resource that you want to bind the suballocations to. +/// memory_type_index, +/// SuballocationCreateInfo { +/// // This will be the size of your region. +/// size: 16 * 1024 * 1024, +/// // It generally does not matter what the alignment is, because you're going to +/// // suballocate the allocation anyway, and not bind it directly. +/// alignment: 1, +/// ..Default::default() +/// }, +/// ) +/// .unwrap(); +/// +/// // You can now feed the `region` into any suballocator. +/// ``` +/// +/// [suballocator]: Suballocator +/// [free-list]: Suballocator#free-lists +/// [external fragmentation]: super#external-fragmentation +/// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc +/// [internal fragmentation]: super#internal-fragmentation +/// [alignment requirements]: super#alignment +/// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator +/// [`StandardMemoryAllocator`]: super::StandardMemoryAllocator +/// [`CpuBufferPool`]: crate::buffer::CpuBufferPool +#[derive(Debug)] +pub struct FreeListAllocator { + region: MemoryAlloc, + device_memory: Arc, + buffer_image_granularity: DeviceSize, + atom_size: DeviceSize, + // Total memory remaining in the region. + free_size: AtomicU64, + inner: Mutex, +} + +impl FreeListAllocator { + /// Creates a new `FreeListAllocator` for the given [region]. + /// + /// # Panics + /// + /// - Panics if `region.allocation_type` is not [`AllocationType::Unknown`]. This is done to + /// avoid checking for a special case of [buffer-image granularity] conflict. + /// - Panics if `region` is a [dedicated allocation]. + /// + /// [region]: Suballocator#regions + /// [buffer-image granularity]: super#buffer-image-granularity + /// [dedicated allocation]: MemoryAlloc::is_dedicated + #[inline] + pub fn new(region: MemoryAlloc) -> Arc { + // NOTE(Marc): This number was pulled straight out of my a- + const AVERAGE_ALLOCATION_SIZE: DeviceSize = 64 * 1024; + + assert!(region.allocation_type == AllocationType::Unknown); + + let device_memory = region + .root() + .expect("dedicated allocations can't be suballocated") + .clone(); + let buffer_image_granularity = device_memory + .device() + .physical_device() + .properties() + .buffer_image_granularity; + let atom_size = region.atom_size.map(NonZeroU64::get).unwrap_or(1); + let free_size = AtomicU64::new(region.size); + + let capacity = (region.size / AVERAGE_ALLOCATION_SIZE) as usize; + let mut nodes = host::PoolAllocator::new(capacity + 64); + let mut free_list = Vec::with_capacity(capacity / 16 + 16); + let root_id = nodes.allocate(SuballocationListNode { + prev: None, + next: None, + offset: 0, + size: region.size, + ty: SuballocationType::Free, + }); + free_list.push(root_id); + let inner = Mutex::new(FreeListAllocatorInner { nodes, free_list }); + + Arc::new(FreeListAllocator { + region, + device_memory, + buffer_image_granularity, + atom_size, + free_size, + inner, + }) + } + + fn free(&self, id: SlotId) { + let mut inner = self.inner.lock(); + self.free_size + .fetch_add(inner.nodes.get(id).size, Ordering::Release); + inner.nodes.get_mut(id).ty = SuballocationType::Free; + inner.coalesce(id); + inner.free(id); + } +} + +unsafe impl Suballocator for Arc { + const IS_BLOCKING: bool = true; + + const NEEDS_CLEANUP: bool = false; + + #[inline] + fn new(region: MemoryAlloc) -> Self { + FreeListAllocator::new(region) + } + + /// Creates a new suballocation within the [region]. + /// + /// # Panics + /// + /// - Panics if `create_info.size` is zero. + /// - Panics if `create_info.alignment` is zero. + /// - Panics if `create_info.alignment` is not a power of two. + /// + /// # Errors + /// + /// - Returns [`OutOfRegionMemory`] if there are no free suballocations large enough so satisfy + /// the request. + /// - Returns [`FragmentedRegion`] if a suballocation large enough to satisfy the request could + /// have been formed, but wasn't because of [external fragmentation]. + /// + /// [region]: Suballocator#regions + /// [`allocate`]: Suballocator::allocate + /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory + /// [`FragmentedRegion`]: SuballocationCreationError::FragmentedRegion + /// [external fragmentation]: super#external-fragmentation + #[inline] + fn allocate( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + create_info.validate(); + + unsafe { self.allocate_unchecked(create_info) } + } + + #[inline] + unsafe fn allocate_unchecked( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + fn has_granularity_conflict(prev_ty: SuballocationType, ty: AllocationType) -> bool { + if prev_ty == SuballocationType::Free { + false + } else if prev_ty == SuballocationType::Unknown { + true + } else { + prev_ty != ty.into() + } + } + + let SuballocationCreateInfo { + size, + alignment, + allocation_type, + _ne: _, + } = create_info; + + let alignment = DeviceSize::max(alignment, self.atom_size); + let mut inner = self.inner.lock(); + + match inner.free_list.last() { + Some(&last) if inner.nodes.get(last).size >= size => { + let index = match inner + .free_list + .binary_search_by_key(&size, |&x| inner.nodes.get(x).size) + { + // Exact fit. + Ok(index) => index, + // Next-best fit. Note that `index == free_list.len()` can not be because we + // checked that the free-list contains a suballocation that is big enough. + Err(index) => index, + }; + + for &id in &inner.free_list[index..] { + let suballoc = inner.nodes.get(id); + let mut offset = align_up(self.region.offset + suballoc.offset, alignment); + + if let Some(prev_id) = suballoc.prev { + let prev = inner.nodes.get(prev_id); + + if are_blocks_on_same_page( + prev.offset, + prev.size, + offset, + self.buffer_image_granularity, + ) && has_granularity_conflict(prev.ty, allocation_type) + { + offset = align_up(offset, self.buffer_image_granularity); + } + } + + if offset + size <= suballoc.offset + suballoc.size { + inner.allocate(id); + inner.split(id, offset, size); + inner.nodes.get_mut(id).ty = allocation_type.into(); + self.free_size.fetch_sub(size, Ordering::Release); + + return Ok(MemoryAlloc { + offset, + size, + allocation_type, + mapped_ptr: self.region.mapped_ptr.and_then(|ptr| { + NonNull::new( + ptr.as_ptr().add((offset - self.region.offset) as usize), + ) + }), + atom_size: self.region.atom_size, + parent: AllocParent::FreeList { + allocator: self.clone(), + id, + }, + }); + } + } + + // There is not enough space due to alignment requirements. + Err(SuballocationCreationError::OutOfRegionMemory) + } + // There would be enough space if the region wasn't so fragmented. :( + Some(_) if self.free_size() >= size => { + Err(SuballocationCreationError::FragmentedRegion) + } + // There is not enough space. + Some(_) => Err(SuballocationCreationError::OutOfRegionMemory), + // There is no space at all. + None => Err(SuballocationCreationError::OutOfRegionMemory), + } + } + + #[inline] + fn region(&self) -> &MemoryAlloc { + &self.region + } + + #[inline] + fn try_into_region(self) -> Result { + Arc::try_unwrap(self).map(|allocator| allocator.region) + } + + #[inline] + fn free_size(&self) -> DeviceSize { + self.free_size.load(Ordering::Acquire) + } + + #[inline] + fn cleanup(&mut self) {} +} + +unsafe impl DeviceOwned for FreeListAllocator { + #[inline] + fn device(&self) -> &Arc { + self.device_memory.device() + } +} + +#[derive(Debug)] +struct FreeListAllocatorInner { + nodes: host::PoolAllocator, + // Free suballocations sorted by size in ascending order. This means we can always find a + // best-fit in *O*(log(*n*)) time in the worst case, and iterating in order is very efficient. + free_list: Vec, +} + +#[derive(Clone, Copy, Debug)] +struct SuballocationListNode { + prev: Option, + next: Option, + offset: DeviceSize, + size: DeviceSize, + ty: SuballocationType, +} + +/// Tells us if a suballocation is free, and if not, whether it is linear or not. This is needed in +/// order to be able to respect the buffer-image granularity. +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +enum SuballocationType { + Unknown, + Linear, + NonLinear, + Free, +} + +impl From for SuballocationType { + fn from(ty: AllocationType) -> Self { + match ty { + AllocationType::Unknown => SuballocationType::Unknown, + AllocationType::Linear => SuballocationType::Linear, + AllocationType::NonLinear => SuballocationType::NonLinear, + } + } +} + +impl FreeListAllocatorInner { + /// Removes the target suballocation from the free-list. The free-list must contain it. + fn allocate(&mut self, node_id: SlotId) { + debug_assert!(self.free_list.contains(&node_id)); + + let node = self.nodes.get(node_id); + + match self + .free_list + .binary_search_by_key(&node.size, |&x| self.nodes.get(x).size) + { + Ok(index) => { + // If there are multiple free suballocations with the same size, the search might + // have returned any one, so we need to find the one corresponding to the target ID. + if self.free_list[index] == node_id { + self.free_list.remove(index); + return; + } + + // Check all previous indices that point to suballocations with the same size. + { + let mut index = index; + loop { + index = index.wrapping_sub(1); + if let Some(&id) = self.free_list.get(index) { + if id == node_id { + self.free_list.remove(index); + return; + } + if self.nodes.get(id).size != node.size { + break; + } + } else { + break; + } + } + } + + // Check all next indices that point to suballocations with the same size. + { + let mut index = index; + loop { + index += 1; + if let Some(&id) = self.free_list.get(index) { + if id == node_id { + self.free_list.remove(index); + return; + } + if self.nodes.get(id).size != node.size { + break; + } + } else { + break; + } + } + } + + unreachable!(); + } + Err(_) => unreachable!(), + } + } + + /// Fits a suballocation inside the target one, splitting the target at the ends if required. + fn split(&mut self, node_id: SlotId, offset: DeviceSize, size: DeviceSize) { + let node = self.nodes.get(node_id); + + debug_assert!(node.ty == SuballocationType::Free); + debug_assert!(offset >= node.offset); + debug_assert!(offset + size <= node.offset + node.size); + + let padding_front = offset - node.offset; + let padding_back = node.offset + node.size - offset - size; + + if padding_front > 0 { + let padding = SuballocationListNode { + prev: node.prev, + next: Some(node_id), + offset: node.offset, + size: padding_front, + ty: SuballocationType::Free, + }; + let padding_id = self.nodes.allocate(padding); + + if let Some(prev_id) = padding.prev { + self.nodes.get_mut(prev_id).next = Some(padding_id); + } + + let node = self.nodes.get_mut(node_id); + node.prev = Some(padding_id); + node.offset = offset; + node.size -= padding.size; + + self.free(padding_id); + } + + if padding_back > 0 { + let padding = SuballocationListNode { + prev: Some(node_id), + next: node.next, + offset: offset + size, + size: padding_back, + ty: SuballocationType::Free, + }; + let padding_id = self.nodes.allocate(padding); + + if let Some(next_id) = padding.next { + self.nodes.get_mut(next_id).prev = Some(padding_id); + } + + let node = self.nodes.get_mut(node_id); + node.next = Some(padding_id); + node.size -= padding.size; + + self.free(padding_id); + } + } + + /// Inserts the target suballocation into the free-list. The free-list must not contain it + /// already. + fn free(&mut self, node_id: SlotId) { + debug_assert!(!self.free_list.contains(&node_id)); + + let node = self.nodes.get(node_id); + let index = match self + .free_list + .binary_search_by_key(&node.size, |&x| self.nodes.get(x).size) + { + Ok(index) => index, + Err(index) => index, + }; + self.free_list.insert(index, node_id); + } + + /// Coalesces the target (free) suballocation with adjacent ones that are also free. + fn coalesce(&mut self, node_id: SlotId) { + let node = self.nodes.get(node_id); + + debug_assert!(node.ty == SuballocationType::Free); + + if let Some(prev_id) = node.prev { + let prev = self.nodes.get(prev_id); + + if prev.ty == SuballocationType::Free { + self.allocate(prev_id); + self.nodes.free(prev_id); + + let node = self.nodes.get_mut(node_id); + node.prev = prev.prev; + node.offset = prev.offset; + node.size += prev.size; // nom nom nom + + if let Some(prev_id) = node.prev { + self.nodes.get_mut(prev_id).next = Some(node_id); + } + } + } + + if let Some(next_id) = node.next { + let next = self.nodes.get(next_id); + + if next.ty == SuballocationType::Free { + self.allocate(next_id); + self.nodes.free(next_id); + + let node = self.nodes.get_mut(node_id); + node.next = next.next; + node.size += next.size; + + if let Some(next_id) = node.next { + self.nodes.get_mut(next_id).prev = Some(node_id); + } + } + } + } +} + +/// A [suballocator] whose structure forms a binary tree of power-of-two-sized suballocations. +/// +/// That is, all allocation sizes are rounded up to the next power of two. This helps reduce +/// [external fragmentation] by a lot, at the expense of possibly severe [internal fragmentation] +/// if you're not careful. For example, if you needed an allocation size of 64MiB, you would be +/// wasting no memory. But with an allocation size of 70MiB, you would use a whole 128MiB instead, +/// wasting 45% of the memory. Use this algorithm if you need to create and free a lot of +/// allocations, which would cause too much external fragmentation when using +/// [`FreeListAllocator`]. However, if the sizes of your allocations are more or less the same, +/// then the [`PoolAllocator`] would be a better choice and would eliminate external fragmentation +/// completely. +/// +/// See also [the `Suballocator` implementation]. +/// +/// # Algorithm +/// +/// Say you have a [region] of size 256MiB, and you want to allocate 14MiB. Assuming there are no +/// existing allocations, the `BuddyAllocator` would split the 256MiB root *node* into two 128MiB +/// nodes. These two nodes are called *buddies*. The allocator would then proceed to split the left +/// node recursively until it wouldn't be able to fit the allocation anymore. In this example, that +/// would happen after 4 splits and end up with a node size of 16MiB. Since the allocation +/// requested was 14MiB, 2MiB would become internal fragmentation and be unusable for the lifetime +/// of the allocation. When an allocation is freed, this process is done backwards, checking if the +/// buddy of each node on the way up is free and if so they are coalesced. +/// +/// Each possible node size has an *order*, with the smallest node size being of order 0 and the +/// largest of the highest order. With this notion, node sizes are proportional to 2*n* +/// where *n* is the order. The highest order is determined from the size of the region and a +/// constant minimum node size, which we chose to be 16B: log(*region size* / 16) or +/// equiavalently log(*region size*) - 4 (assuming +/// *region size* ≥ 16). +/// +/// It's safe to say that this algorithm works best if you have some level of control over your +/// allocation sizes, so that you don't end up allocating twice as much memory. An example of this +/// would be when you need to allocate regions for other allocators, such as the `PoolAllocator` or +/// the [`BumpAllocator`]. +/// +/// # Efficiency +/// +/// The allocator is synchronized internally with a lock, which is held only for a very short +/// period each time an allocation is created and freed. The time complexity of both allocation and +/// freeing is *O*(*m*) in the worst case where *m* is the highest order, which equates to *O*(log +/// (*n*)) where *n* is the size of the region. +/// +/// # Examples +/// +/// Basic usage together with [`GenericMemoryAllocator`], to allocate resources that have a +/// moderately low life span (for example if you have a lot of images, each of which needs to be +/// resized every now and then): +/// +/// ``` +/// use std::sync::Arc; +/// use vulkano::memory::allocator::{ +/// BuddyAllocator, GenericMemoryAllocator, GenericMemoryAllocatorCreateInfo, +/// }; +/// +/// # let device: std::sync::Arc = return; +/// let memory_allocator = GenericMemoryAllocator::>::new( +/// device.clone(), +/// GenericMemoryAllocatorCreateInfo { +/// // Your block sizes must be powers of two, because `BuddyAllocator` only accepts +/// // power-of-two-sized regions. +/// block_sizes: &[(0, 64 * 1024 * 1024)], +/// ..Default::default() +/// }, +/// ); +/// +/// // Now you can use `memory_allocator` to allocate whatever it is you need. +/// ``` +/// +/// [suballocator]: Suballocator +/// [internal fragmentation]: super#internal-fragmentation +/// [external fragmentation]: super#external-fragmentation +/// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc +/// [region]: Suballocator#regions +/// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator +#[derive(Debug)] +pub struct BuddyAllocator { + region: MemoryAlloc, + device_memory: Arc, + buffer_image_granularity: DeviceSize, + atom_size: DeviceSize, + // Total memory remaining in the region. + free_size: AtomicU64, + inner: Mutex, +} + +impl BuddyAllocator { + const MIN_NODE_SIZE: DeviceSize = 16; + + /// Arbitrary maximum number of orders, used to avoid a 2D `Vec`. Together with a minimum node + /// size of 16, this is enough for a 64GiB region. + const MAX_ORDERS: usize = 32; + + /// Creates a new `BuddyAllocator` for the given [region]. + /// + /// # Panics + /// + /// - Panics if `region.allocation_type` is not [`AllocationType::Unknown`]. This is done to + /// avoid checking for a special case of [buffer-image granularity] conflict. + /// - Panics if `region.size` is not a power of two. + /// - Panics if `region.size` is not in the range \[16B, 64GiB\]. + /// - Panics if `region` is a [dedicated allocation]. + /// + /// [region]: Suballocator#regions + /// [buffer-image granularity]: super#buffer-image-granularity + /// [dedicated allocation]: MemoryAlloc::is_dedicated + #[inline] + pub fn new(region: MemoryAlloc) -> Arc { + const EMPTY_FREE_LIST: Vec = Vec::new(); + + let max_order = (region.size / Self::MIN_NODE_SIZE).trailing_zeros() as usize; + + assert!(region.allocation_type == AllocationType::Unknown); + assert!(region.size.is_power_of_two()); + assert!(region.size >= Self::MIN_NODE_SIZE && max_order < Self::MAX_ORDERS); + + let device_memory = region + .root() + .expect("dedicated allocations can't be suballocated") + .clone(); + let buffer_image_granularity = device_memory + .device() + .physical_device() + .properties() + .buffer_image_granularity; + let atom_size = region.atom_size.map(NonZeroU64::get).unwrap_or(1); + let free_size = AtomicU64::new(region.size); + + let mut free_list = ArrayVec::new(max_order + 1, [EMPTY_FREE_LIST; Self::MAX_ORDERS]); + // The root node has the lowest offset and highest order, so it's the whole region. + free_list[max_order].push(region.offset); + let inner = Mutex::new(BuddyAllocatorInner { free_list }); + + Arc::new(BuddyAllocator { + region, + device_memory, + buffer_image_granularity, + atom_size, + free_size, + inner, + }) + } + + fn free(&self, min_order: usize, mut offset: DeviceSize) { + let mut inner = self.inner.lock(); + + // Try to coalesce nodes while incrementing the order. + for (order, free_list) in inner.free_list.iter_mut().enumerate().skip(min_order) { + let size = Self::MIN_NODE_SIZE << order; + let buddy_offset = ((offset - self.region.offset) ^ size) + self.region.offset; + + match free_list.binary_search(&buddy_offset) { + // If the buddy is in the free-list, we can coalesce. + Ok(index) => { + free_list.remove(index); + offset = DeviceSize::min(offset, buddy_offset); + } + // Otherwise free the node. + Err(_) => { + let index = match free_list.binary_search(&offset) { + Ok(index) => index, + Err(index) => index, + }; + free_list.insert(index, offset); + self.free_size + .fetch_add(Self::MIN_NODE_SIZE << min_order, Ordering::Release); + + break; + } + } + } + } +} + +unsafe impl Suballocator for Arc { + const IS_BLOCKING: bool = true; + + const NEEDS_CLEANUP: bool = false; + + #[inline] + fn new(region: MemoryAlloc) -> Self { + BuddyAllocator::new(region) + } + + /// Creates a new suballocation within the [region]. + /// + /// # Panics + /// + /// - Panics if `create_info.size` is zero. + /// - Panics if `create_info.alignment` is zero. + /// - Panics if `create_info.alignment` is not a power of two. + /// + /// # Errors + /// + /// - Returns [`OutOfRegionMemory`] if there are no free nodes large enough so satisfy the + /// request. + /// - Returns [`FragmentedRegion`] if a node large enough to satisfy the request could have + /// been formed, but wasn't because of [external fragmentation]. + /// + /// [region]: Suballocator#regions + /// [`allocate`]: Suballocator::allocate + /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory + /// [`FragmentedRegion`]: SuballocationCreationError::FragmentedRegion + /// [external fragmentation]: super#external-fragmentation + #[inline] + fn allocate( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + create_info.validate(); + + unsafe { self.allocate_unchecked(create_info) } + } + + #[inline] + unsafe fn allocate_unchecked( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + /// Returns the largest power of two smaller or equal to the input. + fn prev_power_of_two(val: DeviceSize) -> DeviceSize { + const MAX_POWER_OF_TWO: DeviceSize = 1 << (DeviceSize::BITS - 1); + + MAX_POWER_OF_TWO + .checked_shr(val.leading_zeros()) + .unwrap_or(0) + } + + let SuballocationCreateInfo { + mut size, + mut alignment, + allocation_type, + _ne: _, + } = create_info; + + if allocation_type == AllocationType::Unknown + || allocation_type == AllocationType::NonLinear + { + size = align_up(size, self.buffer_image_granularity); + alignment = DeviceSize::max(alignment, self.buffer_image_granularity); + } + + let size = DeviceSize::max(size, BuddyAllocator::MIN_NODE_SIZE).next_power_of_two(); + let alignment = DeviceSize::max(alignment, self.atom_size); + let min_order = (size / BuddyAllocator::MIN_NODE_SIZE).trailing_zeros() as usize; + let mut inner = self.inner.lock(); + + // Start searching at the lowest possible order going up. + for (order, free_list) in inner.free_list.iter_mut().enumerate().skip(min_order) { + for (index, &offset) in free_list.iter().enumerate() { + if offset % alignment == 0 { + free_list.remove(index); + + // Go in the opposite direction, splitting nodes from higher orders. The lowest + // order doesn't need any splitting. + for (order, free_list) in inner + .free_list + .iter_mut() + .enumerate() + .skip(min_order) + .take(order - min_order) + .rev() + { + let size = BuddyAllocator::MIN_NODE_SIZE << order; + let right_child = offset + size; + + // Insert the right child in sorted order. + let (Ok(index) | Err(index)) = free_list.binary_search(&right_child); + free_list.insert(index, right_child); + + // Repeat splitting for the left child if required in the next loop turn. + } + + self.free_size.fetch_sub(size, Ordering::Release); + + return Ok(MemoryAlloc { + offset, + size: create_info.size, + allocation_type, + mapped_ptr: self.region.mapped_ptr.and_then(|ptr| { + NonNull::new(ptr.as_ptr().add((offset - self.region.offset) as usize)) + }), + atom_size: self.region.atom_size, + parent: AllocParent::Buddy { + allocator: self.clone(), + order: min_order, + offset, // The offset in the alloc itself can change. + }, + }); + } + } + } + + if prev_power_of_two(self.free_size()) >= create_info.size { + // A node large enough could be formed if the region wasn't so fragmented. + Err(SuballocationCreationError::FragmentedRegion) + } else { + Err(SuballocationCreationError::OutOfRegionMemory) + } + } + + #[inline] + fn region(&self) -> &MemoryAlloc { + &self.region + } + + #[inline] + fn try_into_region(self) -> Result { + Arc::try_unwrap(self).map(|allocator| allocator.region) + } + + /// Returns the total amount of free space left in the [region] that is available to the + /// allocator, which means that [internal fragmentation] is excluded. + /// + /// [region]: Suballocator#regions + /// [internal fragmentation]: super#internal-fragmentation + #[inline] + fn free_size(&self) -> DeviceSize { + self.free_size.load(Ordering::Acquire) + } + + #[inline] + fn cleanup(&mut self) {} +} + +unsafe impl DeviceOwned for BuddyAllocator { + #[inline] + fn device(&self) -> &Arc { + self.device_memory.device() + } +} + +#[derive(Debug)] +struct BuddyAllocatorInner { + // Every order has its own free-list for convenience, so that we don't have to traverse a tree. + // Each free-list is sorted by offset because we want to find the first-fit as this strategy + // minimizes external fragmentation. + free_list: ArrayVec, { BuddyAllocator::MAX_ORDERS }>, +} + +/// A [suballocator] using a pool of fixed-size blocks as a [free-list]. +/// +/// Since the size of the blocks is fixed, you can not create allocations bigger than that. You can +/// create smaller ones, though, which leads to more and more [internal fragmentation] the smaller +/// the allocations get. This is generally a good trade-off, as internal fragmentation is nowhere +/// near as hard to deal with as [external fragmentation]. +/// +/// See also [the `Suballocator` implementation]. +/// +/// # Algorithm +/// +/// The free-list contains indices of blocks in the region that are available, so allocation +/// consists merely of popping an index from the free-list. The same goes for freeing, all that is +/// required is to push the index of the block into the free-list. Note that this is only possible +/// because the blocks have a fixed size. Due to this one fact, the free-list doesn't need to be +/// sorted or traversed. As long as there is a free block, it will do, no matter which block it is. +/// +/// Since the `PoolAllocator` doesn't keep a list of suballocations that are currently in use, +/// resolving [buffer-image granularity] conflicts on a case-by-case basis is not possible. +/// Therefore, it is an all or nothing situation: +/// +/// - you use the allocator for only one type of allocation, [`Linear`] or [`NonLinear`], or +/// - you allow both but align the blocks to the granularity so that no conflics can happen. +/// +/// The way this is done is that every suballocation inherits the allocation type of the region. +/// The latter is done by using a region whose allocation type is [`Unknown`]. You are discouraged +/// from using this type if you can avoid it. +/// +/// The block size can end up bigger than specified if the allocator is created with a region whose +/// allocation type is `Unknown`. In that case all blocks are aligned to the buffer-image +/// granularity, which may or may not cause signifficant memory usage increase. Say for example +/// your driver reports a granularity of 4KiB. If you need a block size of 8KiB, you would waste no +/// memory. On the other hand, if you needed a block size of 6KiB, you would be wasting 25% of the +/// memory. In such a scenario you are highly encouraged to use a different allocation type. +/// +/// The reverse is also true: with an allocation type other than `Unknown`, not all memory within a +/// block may be usable depending on the requested [suballocation]. For instance, with a block size +/// of 1152B (9 * 128B) and a suballocation with `alignment: 256`, a block at an odd index could +/// not utilize its first 128B, reducing its effective size to 1024B. This is usually only relevant +/// with small block sizes, as [alignment requirements] are usually rather small, but it completely +/// depends on the resource and driver. +/// +/// In summary, the block size you choose has a signifficant impact on internal fragmentation due +/// to the two reasons described above. You need to choose your block size carefully, *especially* +/// if you require small allocations. Some rough guidelines: +/// +/// - Always [align] your blocks to a sufficiently large power of 2. This does **not** mean your +/// block size must be a power of two. For example with a block size of 3KiB, your blocks would +/// be aligned to 1KiB. +/// - Prefer not using the allocation type `Unknown`. You can always create as many +/// `PoolAllocator`s as you like for different allocation types and sizes, and they can all work +/// within the same memory block. You should be safe from fragmentation if your blocks are +/// aligned to 1KiB. +/// - If you must use the allocation type `Unknown`, then you should be safe from fragmentation on +/// pretty much any driver if your blocks are aligned to 64KiB. Keep in mind that this might +/// change any time as new devices appear or new drivers come out. Always look at the properties +/// of the devices you want to support before relying on any such data. +/// +/// # Efficiency +/// +/// In theory, a pool allocator is the ideal one because it causes no external fragmentation, and +/// both allocation and freeing is *O*(1). It also never needs to lock and hence also lends itself +/// perfectly to concurrency. But of course, there is the trade-off that block sizes are not +/// dynamic. +/// +/// As you can imagine, the `PoolAllocator` is the perfect fit if you know the sizes of the +/// allocations you will be making, and they are more or less in the same size class. But this +/// allocation algorithm really shines when combined with others, as most do. For one, nothing is +/// stopping you from having multiple `PoolAllocator`s for many different size classes. You could +/// consider a pool of pools, by layering `PoolAllocator` with itself, but this would have the +/// downside that the regions of the pools for all size classes would have to match. Usually this +/// is not desired. If you want pools for different size classes to all have about the same number +/// of blocks, or you even know that some size classes require more or less blocks (because of how +/// many resources you will be allocating for each), then you need an allocator that can allocate +/// regions of different sizes. You can use the [`FreeListAllocator`] for this, if external +/// fragmentation is not an issue, otherwise you might consider using the [`BuddyAllocator`]. On +/// the other hand, you might also want to consider having a `PoolAllocator` at the top of a +/// [hierarchy]. Again, this allocator never needs to lock making it *the* perfect fit for a global +/// concurrent allocator, which hands out large regions which can then be suballocated locally on a +/// thread, by the [`BumpAllocator`] for example. +/// +/// # Examples +/// +/// Basic usage together with [`GenericMemoryAllocator`]: +/// +/// ``` +/// use std::sync::Arc; +/// use vulkano::memory::allocator::{ +/// GenericMemoryAllocator, GenericMemoryAllocatorCreateInfo, PoolAllocator, +/// }; +/// +/// # let device: std::sync::Arc = return; +/// let memory_allocator = GenericMemoryAllocator::>>::new( +/// device.clone(), +/// GenericMemoryAllocatorCreateInfo { +/// block_sizes: &[(0, 64 * 1024 * 1024)], +/// ..Default::default() +/// }, +/// ); +/// +/// // Now you can use `memory_allocator` to allocate whatever it is you need. +/// ``` +/// +/// [suballocator]: Suballocator +/// [free-list]: Suballocator#free-lists +/// [internal fragmentation]: super#internal-fragmentation +/// [external fragmentation]: super#external-fragmentation +/// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc> +/// [region]: Suballocator#regions +/// [buffer-image granularity]: super#buffer-image-granularity +/// [`Linear`]: AllocationType::Linear +/// [`NonLinear`]: AllocationType::NonLinear +/// [`Unknown`]: AllocationType::Unknown +/// [suballocation]: SuballocationCreateInfo +/// [alignment requirements]: super#memory-requirements +/// [align]: super#alignment +/// [hierarchy]: Suballocator#memory-hierarchies +/// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator +#[derive(Debug)] +#[repr(transparent)] +pub struct PoolAllocator { + inner: PoolAllocatorInner, +} + +impl PoolAllocator { + /// Creates a new `PoolAllocator` for the given [region]. + /// + /// # Panics + /// + /// - Panics if `region.size < BLOCK_SIZE`. + /// - Panics if `region` is a [dedicated allocation]. + /// + /// [region]: Suballocator#regions + /// [dedicated allocation]: MemoryAlloc::is_dedicated + #[inline] + pub fn new( + region: MemoryAlloc, + #[cfg(test)] buffer_image_granularity: DeviceSize, + ) -> Arc { + Arc::new(PoolAllocator { + inner: PoolAllocatorInner::new( + region, + BLOCK_SIZE, + #[cfg(test)] + buffer_image_granularity, + ), + }) + } + + /// Size of a block. Can be bigger than `BLOCK_SIZE` due to alignment requirements. + #[inline] + pub fn block_size(&self) -> DeviceSize { + self.inner.block_size + } + + /// Total number of blocks available to the allocator. This is always equal to + /// `self.region().size() / self.block_size()`. + #[inline] + pub fn block_count(&self) -> usize { + self.inner.free_list.capacity() + } + + /// Number of free blocks. + #[inline] + pub fn free_count(&self) -> usize { + self.inner.free_list.len() + } +} + +unsafe impl Suballocator for Arc> { + const IS_BLOCKING: bool = false; + + const NEEDS_CLEANUP: bool = false; + + #[inline] + fn new(region: MemoryAlloc) -> Self { + PoolAllocator::new( + region, + #[cfg(test)] + 1, + ) + } + + /// Creates a new suballocation within the [region]. + /// + /// # Panics + /// + /// - Panics if `create_info.size` is zero. + /// - Panics if `create_info.alignment` is zero. + /// - Panics if `create_info.alignment` is not a power of two. + /// + /// # Errors + /// + /// - Returns [`OutOfRegionMemory`] if the [free-list] is empty. + /// - Returns [`OutOfRegionMemory`] if the allocation can't fit inside a block. Only the first + /// block in the free-list is tried, which means that if one block isn't usable due to + /// [internal fragmentation] but a different one would be, you still get this error. See the + /// [type-level documentation] for details on how to properly configure your allocator. + /// + /// [region]: Suballocator#regions + /// [`allocate`]: Suballocator::allocate + /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory + /// [free-list]: Suballocator#free-lists + /// [internal fragmentation]: super#internal-fragmentation + /// [type-level documentation]: PoolAllocator + #[inline] + fn allocate( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + create_info.validate(); + + unsafe { self.allocate_unchecked(create_info) } + } + + #[inline] + unsafe fn allocate_unchecked( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + // SAFETY: `PoolAllocator` and `PoolAllocatorInner` have the same layout. + // + // This is not quite optimal, because we are always cloning the `Arc` even if allocation + // fails, in which case the `Arc` gets cloned and dropped for no reason. Unfortunately, + // there is currently no way to turn `&Arc` into `&Arc` that is sound. + Arc::from_raw(Arc::into_raw(self.clone()).cast::()) + .allocate_unchecked(create_info) + } + + #[inline] + fn region(&self) -> &MemoryAlloc { + &self.inner.region + } + + #[inline] + fn try_into_region(self) -> Result { + Arc::try_unwrap(self).map(|allocator| allocator.inner.region) + } + + #[inline] + fn free_size(&self) -> DeviceSize { + self.free_count() as DeviceSize * self.block_size() + } + + #[inline] + fn cleanup(&mut self) {} +} + +unsafe impl DeviceOwned for PoolAllocator { + #[inline] + fn device(&self) -> &Arc { + self.inner.device_memory.device() + } +} + +#[derive(Debug)] +struct PoolAllocatorInner { + region: MemoryAlloc, + device_memory: Arc, + atom_size: DeviceSize, + block_size: DeviceSize, + // Unsorted list of free block indices. + free_list: ArrayQueue, +} + +impl PoolAllocatorInner { + fn new( + region: MemoryAlloc, + mut block_size: DeviceSize, + #[cfg(test)] buffer_image_granularity: DeviceSize, + ) -> Self { + let device_memory = region + .root() + .expect("dedicated allocations can't be suballocated") + .clone(); + #[cfg(not(test))] + let buffer_image_granularity = device_memory + .device() + .physical_device() + .properties() + .buffer_image_granularity; + let atom_size = region.atom_size.map(NonZeroU64::get).unwrap_or(1); + if region.allocation_type == AllocationType::Unknown { + block_size = align_up(block_size, buffer_image_granularity); + } + + let block_count = region.size / block_size; + let free_list = ArrayQueue::new(block_count as usize); + for i in 0..block_count { + free_list.push(i).unwrap(); + } + + PoolAllocatorInner { + region, + device_memory, + atom_size, + block_size, + free_list, + } + } + + unsafe fn allocate_unchecked( + self: Arc, + create_info: SuballocationCreateInfo, + ) -> Result { + let SuballocationCreateInfo { + size, + alignment, + allocation_type: _, + _ne: _, + } = create_info; + + let alignment = DeviceSize::max(alignment, self.atom_size); + let index = self + .free_list + .pop() + .ok_or(SuballocationCreationError::OutOfRegionMemory)?; + let unaligned_offset = index * self.block_size; + let offset = align_up(unaligned_offset, alignment); + + if offset + size > unaligned_offset + self.block_size { + self.free_list.push(index).unwrap(); + + return Err(SuballocationCreationError::BlockSizeExceeded); + } + + Ok(MemoryAlloc { + offset, + size, + allocation_type: self.region.allocation_type, + mapped_ptr: self.region.mapped_ptr.and_then(|ptr| { + NonNull::new(ptr.as_ptr().add((offset - self.region.offset) as usize)) + }), + atom_size: self.region.atom_size, + parent: AllocParent::Pool { + allocator: self, + index, + }, + }) + } + + fn free(&self, index: DeviceSize) { + self.free_list.push(index).unwrap(); + } +} + +/// A [suballocator] which can allocate dynamically, but can only free all allocations at once. +/// +/// With bump allocation, the used up space increases linearly as allocations are made and +/// allocations can never be freed individually, which is why this algorithm is also called *linear +/// allocation*. It is also known as *arena allocation*. +/// +/// `BumpAllocator`s are best suited for very short-lived (say a few frames at best) resources that +/// need to be allocated often (say each frame), to really take advantage of the performance gains. +/// For creating long-lived allocations, [`FreeListAllocator`] is best suited. The way you would +/// typically use this allocator is to have one for each frame in flight. At the start of a frame, +/// you reset it and allocate your resources with it. You write to the resources, render with them, +/// and drop them at the end of the frame. +/// +/// See also [the `Suballocator` implementation]. +/// +/// # Algorithm +/// +/// What happens is that every time you make an allocation, you receive one with an offset +/// corresponding to the *free start* within the [region], and then the free start is *bumped*, so +/// that following allocations wouldn't alias it. As you can imagine, this is **extremely fast**, +/// because it doesn't need to keep a [free-list]. It only needs to do a few additions and +/// comparisons. But beware, **fast is about all this is**. It is horribly memory inefficient when +/// used wrong, and is very susceptible to [memory leaks]. +/// +/// Once you know that you are done with the allocations, meaning you know they have all been +/// dropped, you can safely reset the allocator using the [`try_reset`] method as long as the +/// allocator is not shared between threads. It is hard to safely reset a bump allocator that is +/// used concurrently. In such a scenario it's best not to reset it at all and instead drop it once +/// it reaches the end of the [region], freeing the region to a higher level in the [hierarchy] +/// once all threads have dropped their reference to the allocator. This is one of the reasons you +/// are generally advised to use one `BumpAllocator` per thread if you can. +/// +/// # Efficiency +/// +/// Allocation is *O*(1), and so is resetting the allocator (freeing all allocations). Allocation +/// is always lock-free, and most of the time even wait-free. The only case in which it is not +/// wait-free is if a lot of allocations are made concurrently, which results in CPU-level +/// contention. Therefore, if you for example need to allocate a lot of buffers each frame from +/// multiple threads, you might get better performance by using one `BumpAllocator` per thread. +/// +/// The reason synchronization can be avoided entirely is that the created allocations can be +/// dropped without needing to talk back to the allocator to free anything. The other allocation +/// algorithms all have a free-list which needs to be modified once an allocation is dropped. Since +/// Vulkano's buffers and images are `Sync`, that means that even if the allocator only allocates +/// from one thread, it can still be used to free from multiple threads. +/// +/// [suballocator]: Suballocator +/// [the `Suballocator` implementation]: Suballocator#impl-Suballocator-for-Arc +/// [region]: Suballocator#regions +/// [free-list]: Suballocator#free-lists +/// [memory leaks]: super#leakage +/// [`try_reset`]: Self::try_reset +/// [hierarchy]: Suballocator#memory-hierarchies +#[derive(Debug)] +pub struct BumpAllocator { + region: MemoryAlloc, + device_memory: Arc, + buffer_image_granularity: DeviceSize, + atom_size: DeviceSize, + // Encodes the previous allocation type in the 2 least signifficant bits and the free start in + // the rest. + state: AtomicU64, +} + +impl BumpAllocator { + /// Creates a new `BumpAllocator` for the given [region]. + /// + /// # Panics + /// + /// - Panics if `region` is a [dedicated allocation]. + /// + /// [region]: Suballocator#regions + /// [dedicated allocation]: MemoryAlloc::is_dedicated + #[inline] + pub fn new(region: MemoryAlloc) -> Arc { + let device_memory = region + .root() + .expect("dedicated allocations can't be suballocated") + .clone(); + let buffer_image_granularity = device_memory + .device() + .physical_device() + .properties() + .buffer_image_granularity; + let atom_size = region.atom_size.map(NonZeroU64::get).unwrap_or(1); + let state = AtomicU64::new(region.allocation_type as u64); + + Arc::new(BumpAllocator { + region, + device_memory, + buffer_image_granularity, + atom_size, + state, + }) + } + + /// Resets the free start back to the beginning of the [region] if there are no other strong + /// references to the allocator. + /// + /// [region]: Suballocator#regions + #[inline] + pub fn try_reset(self: &mut Arc) -> Result<(), BumpAllocatorResetError> { + Arc::get_mut(self) + .map(|allocator| { + *allocator.state.get_mut() = allocator.region.allocation_type as u64; + }) + .ok_or(BumpAllocatorResetError) + } + + /// Resets the free-start to the beginning of the [region] without checking if there are other + /// strong references to the allocator. + /// + /// This could be useful if you cloned the [`Arc`] yourself, and can guarantee that no + /// allocations currently hold a reference to it. + /// + /// As a safe alternative, you can let the `Arc` do all the work. Simply drop it once it + /// reaches the end of the region. After all threads do that, the region will be freed to the + /// next level up the [hierarchy]. If you only use the allocator on one thread and need shared + /// ownership, you can use `Rc>>` together with [`try_reset`] for a + /// safe alternative as well. + /// + /// # Safety + /// + /// - All allocations made with the allocator must have been dropped. + /// + /// [region]: Suballocator#regions + /// [hierarchy]: Suballocator#memory-hierarchies + /// [`try_reset`]: Self::try_reset + #[inline] + pub unsafe fn reset_unchecked(&self) { + self.state + .store(self.region.allocation_type as u64, Ordering::Relaxed); + } +} + +unsafe impl Suballocator for Arc { + const IS_BLOCKING: bool = false; + + const NEEDS_CLEANUP: bool = true; + + #[inline] + fn new(region: MemoryAlloc) -> Self { + BumpAllocator::new(region) + } + + /// Creates a new suballocation within the [region]. + /// + /// # Panics + /// + /// - Panics if `create_info.size` is zero. + /// - Panics if `create_info.alignment` is zero. + /// - Panics if `create_info.alignment` is not a power of two. + /// + /// # Errors + /// + /// - Returns [`OutOfRegionMemory`] if the requested allocation can't fit in the free space + /// remaining in the region. + /// + /// [region]: Suballocator#regions + /// [`allocate`]: Suballocator::allocate + /// [`OutOfRegionMemory`]: SuballocationCreationError::OutOfRegionMemory + #[inline] + fn allocate( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + create_info.validate(); + + unsafe { self.allocate_unchecked(create_info) } + } + + #[inline] + unsafe fn allocate_unchecked( + &self, + create_info: SuballocationCreateInfo, + ) -> Result { + const SPIN_LIMIT: u32 = 6; + + // NOTE(Marc): The following code is a minimal version `Backoff` taken from + // crossbeam_utils v0.8.11, because we didn't want to add a dependency for a couple lines + // that are used in one place only. + /// Original documentation: + /// https://docs.rs/crossbeam-utils/0.8.11/crossbeam_utils/struct.Backoff.html + struct Backoff { + step: Cell, + } + + impl Backoff { + fn new() -> Self { + Backoff { step: Cell::new(0) } + } + + fn spin(&self) { + for _ in 0..1 << self.step.get().min(SPIN_LIMIT) { + core::hint::spin_loop(); + } + + if self.step.get() <= SPIN_LIMIT { + self.step.set(self.step.get() + 1); + } + } + } + + fn has_granularity_conflict(prev_ty: AllocationType, ty: AllocationType) -> bool { + prev_ty == AllocationType::Unknown || prev_ty != ty + } + + let SuballocationCreateInfo { + size, + alignment, + allocation_type, + _ne: _, + } = create_info; + + let alignment = DeviceSize::max(alignment, self.atom_size); + let backoff = Backoff::new(); + let mut state = self.state.load(Ordering::Relaxed); + + loop { + let free_start = state >> 2; + let prev_alloc_type = match state & 0b11 { + 0 => AllocationType::Unknown, + 1 => AllocationType::Linear, + 2 => AllocationType::NonLinear, + _ => unreachable!(), + }; + let prev_end = self.region.offset + free_start; + let mut offset = align_up(prev_end, alignment); + + if prev_end > 0 + && are_blocks_on_same_page(prev_end, 0, offset, self.buffer_image_granularity) + && has_granularity_conflict(prev_alloc_type, allocation_type) + { + offset = align_up(offset, self.buffer_image_granularity); + } + + let free_start = offset - self.region.offset + size; + + if free_start > self.region.size { + return Err(SuballocationCreationError::OutOfRegionMemory); + } + + let new_state = free_start << 2 | allocation_type as u64; + + match self.state.compare_exchange_weak( + state, + new_state, + Ordering::Release, + Ordering::Relaxed, + ) { + Ok(_) => { + return Ok(MemoryAlloc { + offset, + size, + allocation_type, + mapped_ptr: self.region.mapped_ptr.and_then(|ptr| { + NonNull::new(ptr.as_ptr().add((offset - self.region.offset) as usize)) + }), + atom_size: self.region.atom_size, + parent: AllocParent::Bump(self.clone()), + }); + } + Err(new_state) => { + state = new_state; + backoff.spin(); + } + } + } + } + + #[inline] + fn region(&self) -> &MemoryAlloc { + &self.region + } + + #[inline] + fn try_into_region(self) -> Result { + Arc::try_unwrap(self).map(|allocator| allocator.region) + } + + #[inline] + fn free_size(&self) -> DeviceSize { + self.region.size - (self.state.load(Ordering::Acquire) >> 2) + } + + #[inline] + fn cleanup(&mut self) { + let _ = self.try_reset(); + } +} + +unsafe impl DeviceOwned for BumpAllocator { + #[inline] + fn device(&self) -> &Arc { + self.device_memory.device() + } +} + +/// Error that can be returned when resetting the [`BumpAllocator`]. +#[derive(Clone, Debug, PartialEq, Eq)] +pub struct BumpAllocatorResetError; + +impl Error for BumpAllocatorResetError {} + +impl Display for BumpAllocatorResetError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("the allocator is still in use") + } +} + +fn align_up(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { + align_down(val + alignment - 1, alignment) +} + +fn align_down(val: DeviceSize, alignment: DeviceSize) -> DeviceSize { + debug_assert!(alignment.is_power_of_two()); + + val & !(alignment - 1) +} + +/// Checks if resouces A and B share a page. +/// +/// > **Note**: Assumes `a_offset + a_size > 0` and `a_offset + a_size <= b_offset`. +fn are_blocks_on_same_page( + a_offset: DeviceSize, + a_size: DeviceSize, + b_offset: DeviceSize, + page_size: DeviceSize, +) -> bool { + debug_assert!(a_offset + a_size > 0); + debug_assert!(a_offset + a_size <= b_offset); + + let a_end = a_offset + a_size - 1; + let a_end_page = align_down(a_end, page_size); + let b_start_page = align_down(b_offset, page_size); + + a_end_page == b_start_page +} + +/// Allocators for memory on the host, used to speed up the allocators for the device. +mod host { + use std::num::NonZeroUsize; + + /// Allocates objects from a pool on the host, which has the following benefits: + /// + /// - Allocation is much faster because there is no need to consult the global allocator or even + /// worse, the operating system, each time a small object needs to be created. + /// - Freeing is extremely fast, because the whole pool can be dropped at once. This is + /// particularily useful for linked structures, whose nodes need to be freed one-by-one by + /// traversing the whole structure otherwise. + /// - Cache locality is somewhat improved for linked structures with few nodes. + /// + /// The allocator doesn't hand out pointers but rather IDs that are relative to the pool. This + /// simplifies the logic because the pool can easily be moved and hence also resized, but the + /// downside is that the whole pool and possibly also the free-list must be copied when it runs + /// out of memory. It is therefore best to start out with a safely large capacity. + #[derive(Debug)] + pub(super) struct PoolAllocator { + pool: Vec, + // LIFO list of free allocations, which means that newly freed allocations are always + // reused first before bumping the free start. + free_list: Vec, + } + + impl PoolAllocator { + pub fn new(capacity: usize) -> Self { + debug_assert!(capacity > 0); + + let mut pool = Vec::new(); + let mut free_list = Vec::new(); + pool.reserve_exact(capacity); + free_list.reserve_exact(capacity); + // All IDs are free at the start. + for index in (1..=capacity).rev() { + free_list.push(SlotId(NonZeroUsize::new(index).unwrap())); + } + + PoolAllocator { pool, free_list } + } + + /// Allocates a slot and initializes it with the provided value. Returns the ID of the slot. + pub fn allocate(&mut self, val: T) -> SlotId { + let id = self.free_list.pop().unwrap_or_else(|| { + // The free-list is empty, we need another pool. + let new_len = self.pool.len() * 3 / 2; + let additional = new_len - self.pool.len(); + self.pool.reserve_exact(additional); + self.free_list.reserve_exact(additional); + + // Add the new IDs to the free-list. + let len = self.pool.len(); + let cap = self.pool.capacity(); + for id in (len + 2..=cap).rev() { + // SAFETY: The `new_unchecked` is safe because: + // - `id` is bound to the range [len + 2, cap]. + // - There is no way to add 2 to an unsigned integer (`len`) such that the + // result is 0, except for an overflow, which is why rustc can't optimize this + // out (unlike in the above loop where the range has a constant start). + // - `Vec::reserve_exact` panics if the new capacity exceeds `isize::MAX` bytes, + // so the length of the pool can not be `usize::MAX - 1`. + let id = SlotId(unsafe { NonZeroUsize::new_unchecked(id) }); + self.free_list.push(id); + } + + // Smallest free ID. + SlotId(NonZeroUsize::new(len + 1).unwrap()) + }); + + if let Some(x) = self.pool.get_mut(id.0.get() - 1) { + // We're reusing a slot, initialize it with the new value. + *x = val; + } else { + // We're using a fresh slot. We always put IDs in order into the free-list, so the + // next free ID must be for the slot right after the end of the occupied slots. + debug_assert!(id.0.get() - 1 == self.pool.len()); + self.pool.push(val); + } + + id + } + + /// Returns the slot with the given ID to the allocator to be reused. The [`SlotId`] should + /// not be used again afterward. + pub fn free(&mut self, id: SlotId) { + debug_assert!(!self.free_list.contains(&id)); + self.free_list.push(id); + } + + /// Returns a mutable reference to the slot with the given ID. + pub fn get_mut(&mut self, id: SlotId) -> &mut T { + debug_assert!(!self.free_list.contains(&id)); + + // SAFETY: This is safe because: + // - The only way to obtain a `SlotId` is through `Self::allocate`. + // - `Self::allocate` returns `SlotId`s in the range [1, self.pool.len()]. + // - `self.pool` only grows and never shrinks. + unsafe { self.pool.get_unchecked_mut(id.0.get() - 1) } + } + } + + impl PoolAllocator { + /// Returns a copy of the slot with the given ID. + pub fn get(&self, id: SlotId) -> T { + debug_assert!(!self.free_list.contains(&id)); + + // SAFETY: Same as the `get_unchecked_mut` above. + *unsafe { self.pool.get_unchecked(id.0.get() - 1) } + } + } + + /// ID of a slot in the pool of the `host::PoolAllocator`. This is used to limit the visibility + /// of the actual ID to this `host` module, making it easier to reason about unsafe code. + #[derive(Clone, Copy, Debug, PartialEq, Eq)] + pub(super) struct SlotId(NonZeroUsize); +} + +#[cfg(test)] +mod tests { + use super::*; + use std::thread; + + const DUMMY_INFO: SuballocationCreateInfo = SuballocationCreateInfo { + size: 1, + alignment: 1, + allocation_type: AllocationType::Unknown, + _ne: crate::NonExhaustive(()), + }; + + const DUMMY_INFO_LINEAR: SuballocationCreateInfo = SuballocationCreateInfo { + allocation_type: AllocationType::Linear, + ..DUMMY_INFO + }; + + #[test] + fn free_list_allocator_capacity() { + const THREADS: DeviceSize = 12; + const ALLOCATIONS_PER_THREAD: DeviceSize = 100; + const ALLOCATION_STEP: DeviceSize = 117; + const REGION_SIZE: DeviceSize = + (ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD; + + let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE); + let allocs = ArrayQueue::new((ALLOCATIONS_PER_THREAD * THREADS) as usize); + + // Using threads to randomize allocation order. + thread::scope(|scope| { + for i in 1..=THREADS { + let (allocator, allocs) = (&allocator, &allocs); + + scope.spawn(move || { + let size = i * ALLOCATION_STEP; + + for _ in 0..ALLOCATIONS_PER_THREAD { + allocs + .push( + allocator + .allocate(SuballocationCreateInfo { size, ..DUMMY_INFO }) + .unwrap(), + ) + .unwrap(); + } + }); + } + }); + + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + + drop(allocs); + assert!(allocator.free_size() == REGION_SIZE); + assert!(allocator + .allocate(SuballocationCreateInfo { + size: REGION_SIZE, + ..DUMMY_INFO + }) + .is_ok()); + } + + #[test] + fn free_list_allocator_respects_alignment() { + const INFO: SuballocationCreateInfo = SuballocationCreateInfo { + alignment: 256, + ..DUMMY_INFO + }; + const REGION_SIZE: DeviceSize = 10 * INFO.alignment; + + let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE); + let mut allocs = Vec::with_capacity(10); + + for _ in 0..10 { + allocs.push(allocator.allocate(INFO).unwrap()); + } + + assert!(allocator.allocate(INFO).is_err()); + assert!(allocator.free_size() == REGION_SIZE - 10); + } + + #[test] + fn free_list_allocator_respects_granularity() { + const GRANULARITY: DeviceSize = 16; + const REGION_SIZE: DeviceSize = 2 * GRANULARITY; + + let allocator = dummy_allocator!(FreeListAllocator, REGION_SIZE, GRANULARITY); + let mut linear_allocs = Vec::with_capacity(GRANULARITY as usize); + let mut non_linear_allocs = Vec::with_capacity(GRANULARITY as usize); + + for i in 0..REGION_SIZE { + if i % 2 == 0 { + linear_allocs.push( + allocator + .allocate(SuballocationCreateInfo { + allocation_type: AllocationType::Linear, + ..DUMMY_INFO + }) + .unwrap(), + ); + } else { + non_linear_allocs.push( + allocator + .allocate(SuballocationCreateInfo { + allocation_type: AllocationType::NonLinear, + ..DUMMY_INFO + }) + .unwrap(), + ); + } + } + + assert!(allocator.allocate(DUMMY_INFO_LINEAR).is_err()); + assert!(allocator.free_size() == 0); + + drop(linear_allocs); + assert!(allocator + .allocate(SuballocationCreateInfo { + size: GRANULARITY, + ..DUMMY_INFO + }) + .is_ok()); + + let _alloc = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.allocate(DUMMY_INFO_LINEAR).is_err()); + } + + #[test] + fn pool_allocator_capacity() { + const BLOCK_SIZE: DeviceSize = 1024; + + fn dummy_allocator( + device: Arc, + allocation_size: DeviceSize, + ) -> Arc> { + let device_memory = DeviceMemory::allocate( + device, + MemoryAllocateInfo { + allocation_size, + memory_type_index: 0, + ..Default::default() + }, + ) + .unwrap(); + + PoolAllocator::new(MemoryAlloc::new_root(device_memory).unwrap(), 1) + } + + let (device, _) = gfx_dev_and_queue!(); + + assert_should_panic!({ dummy_allocator(device.clone(), BLOCK_SIZE - 1) }); + + let allocator = dummy_allocator(device.clone(), 2 * BLOCK_SIZE - 1); + { + let alloc = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + + drop(alloc); + let _alloc = allocator.allocate(DUMMY_INFO).unwrap(); + } + + let allocator = dummy_allocator(device, 2 * BLOCK_SIZE); + { + let alloc1 = allocator.allocate(DUMMY_INFO).unwrap(); + let alloc2 = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + + drop(alloc1); + let alloc1 = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + + drop(alloc1); + drop(alloc2); + let _alloc1 = allocator.allocate(DUMMY_INFO).unwrap(); + let _alloc2 = allocator.allocate(DUMMY_INFO).unwrap(); + } + } + + #[test] + fn pool_allocator_respects_alignment() { + const BLOCK_SIZE: DeviceSize = 1024 + 128; + const INFO_A: SuballocationCreateInfo = SuballocationCreateInfo { + size: BLOCK_SIZE, + alignment: 256, + ..DUMMY_INFO + }; + const INFO_B: SuballocationCreateInfo = SuballocationCreateInfo { + size: 1024, + ..INFO_A + }; + + let allocator = { + let (device, _) = gfx_dev_and_queue!(); + let device_memory = DeviceMemory::allocate( + device, + MemoryAllocateInfo { + allocation_size: 10 * BLOCK_SIZE, + memory_type_index: 0, + ..Default::default() + }, + ) + .unwrap(); + + PoolAllocator::::new(MemoryAlloc::new_root(device_memory).unwrap(), 1) + }; + + // This uses the fact that block indices are inserted into the free-list in order, so + // the first allocation succeeds because the block has an even index, while the second + // has an odd index. + allocator.allocate(INFO_A).unwrap(); + assert!(allocator.allocate(INFO_A).is_err()); + allocator.allocate(INFO_A).unwrap(); + assert!(allocator.allocate(INFO_A).is_err()); + + for _ in 0..10 { + allocator.allocate(INFO_B).unwrap(); + } + } + + #[test] + fn pool_allocator_respects_granularity() { + const BLOCK_SIZE: DeviceSize = 128; + + fn dummy_allocator( + device: Arc, + allocation_type: AllocationType, + ) -> Arc> { + let device_memory = DeviceMemory::allocate( + device, + MemoryAllocateInfo { + allocation_size: 1024, + memory_type_index: 0, + ..Default::default() + }, + ) + .unwrap(); + let mut region = MemoryAlloc::new_root(device_memory).unwrap(); + unsafe { region.set_allocation_type(allocation_type) }; + + PoolAllocator::new(region, 256) + } + + let (device, _) = gfx_dev_and_queue!(); + + let allocator = dummy_allocator(device.clone(), AllocationType::Unknown); + assert!(allocator.block_count() == 4); + + let allocator = dummy_allocator(device.clone(), AllocationType::Linear); + assert!(allocator.block_count() == 8); + + let allocator = dummy_allocator(device, AllocationType::NonLinear); + assert!(allocator.block_count() == 8); + } + + #[test] + fn buddy_allocator_capacity() { + const MAX_ORDER: usize = 10; + const REGION_SIZE: DeviceSize = BuddyAllocator::MIN_NODE_SIZE << MAX_ORDER; + + let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE); + let mut allocs = Vec::with_capacity(1 << MAX_ORDER); + + for order in 0..=MAX_ORDER { + let size = BuddyAllocator::MIN_NODE_SIZE << order; + + for _ in 0..1 << (MAX_ORDER - order) { + allocs.push( + allocator + .allocate(SuballocationCreateInfo { size, ..DUMMY_INFO }) + .unwrap(), + ); + } + + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + allocs.clear(); + } + + let mut orders = (0..MAX_ORDER).collect::>(); + + for mid in 0..MAX_ORDER { + orders.rotate_left(mid); + + for &order in &orders { + let size = BuddyAllocator::MIN_NODE_SIZE << order; + allocs.push( + allocator + .allocate(SuballocationCreateInfo { size, ..DUMMY_INFO }) + .unwrap(), + ); + } + + let _alloc = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + allocs.clear(); + } + } + + #[test] + fn buddy_allocator_respects_alignment() { + const REGION_SIZE: DeviceSize = 4096; + + let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE); + + { + const INFO: SuballocationCreateInfo = SuballocationCreateInfo { + alignment: 4096, + ..DUMMY_INFO + }; + + let _alloc = allocator.allocate(INFO).unwrap(); + assert!(allocator.allocate(INFO).is_err()); + assert!(allocator.free_size() == REGION_SIZE - BuddyAllocator::MIN_NODE_SIZE); + } + + { + const INFO_A: SuballocationCreateInfo = SuballocationCreateInfo { + alignment: 256, + ..DUMMY_INFO + }; + const ALLOCATIONS_A: DeviceSize = REGION_SIZE / INFO_A.alignment; + const INFO_B: SuballocationCreateInfo = SuballocationCreateInfo { + alignment: 16, + ..DUMMY_INFO + }; + const ALLOCATIONS_B: DeviceSize = REGION_SIZE / INFO_B.alignment - ALLOCATIONS_A; + + let mut allocs = + Vec::with_capacity((REGION_SIZE / BuddyAllocator::MIN_NODE_SIZE) as usize); + + for _ in 0..ALLOCATIONS_A { + allocs.push(allocator.allocate(INFO_A).unwrap()); + } + + assert!(allocator.allocate(INFO_A).is_err()); + assert!( + allocator.free_size() + == REGION_SIZE - ALLOCATIONS_A * BuddyAllocator::MIN_NODE_SIZE + ); + + for _ in 0..ALLOCATIONS_B { + allocs.push(allocator.allocate(INFO_B).unwrap()); + } + + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + } + } + + #[test] + fn buddy_allocator_respects_granularity() { + const GRANULARITY: DeviceSize = 256; + const REGION_SIZE: DeviceSize = 2 * GRANULARITY; + + let allocator = dummy_allocator!(BuddyAllocator, REGION_SIZE, GRANULARITY); + + { + const ALLOCATIONS: DeviceSize = REGION_SIZE / BuddyAllocator::MIN_NODE_SIZE; + + let mut allocs = Vec::with_capacity(ALLOCATIONS as usize); + for _ in 0..ALLOCATIONS { + allocs.push(allocator.allocate(DUMMY_INFO_LINEAR).unwrap()); + } + + assert!(allocator.allocate(DUMMY_INFO_LINEAR).is_err()); + assert!(allocator.free_size() == 0); + } + + { + let _alloc1 = allocator.allocate(DUMMY_INFO).unwrap(); + let _alloc2 = allocator.allocate(DUMMY_INFO).unwrap(); + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + } + } + + #[test] + fn bump_allocator_respects_alignment() { + const INFO: SuballocationCreateInfo = SuballocationCreateInfo { + alignment: 16, + ..DUMMY_INFO + }; + + let allocator = dummy_allocator!(BumpAllocator, INFO.alignment * 10); + + for _ in 0..10 { + allocator.allocate(INFO).unwrap(); + } + + assert!(allocator.allocate(INFO).is_err()); + + for _ in 0..INFO.alignment - 1 { + allocator.allocate(DUMMY_INFO).unwrap(); + } + + assert!(allocator.allocate(INFO).is_err()); + assert!(allocator.free_size() == 0); + } + + #[test] + fn bump_allocator_respects_granularity() { + const ALLOCATIONS: DeviceSize = 10; + const GRANULARITY: DeviceSize = 1024; + + let mut allocator = dummy_allocator!(BumpAllocator, GRANULARITY * ALLOCATIONS, GRANULARITY); + + for i in 0..ALLOCATIONS { + for _ in 0..GRANULARITY { + allocator + .allocate(SuballocationCreateInfo { + allocation_type: if i % 2 == 0 { + AllocationType::NonLinear + } else { + AllocationType::Linear + }, + ..DUMMY_INFO + }) + .unwrap(); + } + } + + assert!(allocator.allocate(DUMMY_INFO_LINEAR).is_err()); + assert!(allocator.free_size() == 0); + + allocator.try_reset().unwrap(); + + for i in 0..ALLOCATIONS { + allocator + .allocate(SuballocationCreateInfo { + allocation_type: if i % 2 == 0 { + AllocationType::Linear + } else { + AllocationType::NonLinear + }, + ..DUMMY_INFO + }) + .unwrap(); + } + + assert!(allocator.allocate(DUMMY_INFO_LINEAR).is_err()); + assert!(allocator.free_size() == GRANULARITY - 1); + } + + #[test] + fn bump_allocator_syncness() { + const THREADS: DeviceSize = 12; + const ALLOCATIONS_PER_THREAD: DeviceSize = 100_000; + const ALLOCATION_STEP: DeviceSize = 117; + const REGION_SIZE: DeviceSize = + (ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD; + + let mut allocator = dummy_allocator!(BumpAllocator, REGION_SIZE); + + thread::scope(|scope| { + for i in 1..=THREADS { + let allocator = &allocator; + + scope.spawn(move || { + let size = i * ALLOCATION_STEP; + + for _ in 0..ALLOCATIONS_PER_THREAD { + allocator + .allocate(SuballocationCreateInfo { size, ..DUMMY_INFO }) + .unwrap(); + } + }); + } + }); + + assert!(allocator.allocate(DUMMY_INFO).is_err()); + assert!(allocator.free_size() == 0); + + allocator.try_reset().unwrap(); + assert!(allocator.free_size() == REGION_SIZE); + } + + macro_rules! dummy_allocator { + ($type:ty, $size:expr) => { + dummy_allocator!($type, $size, 1) + }; + ($type:ty, $size:expr, $granularity:expr) => { + dummy_allocator!($type, $size, $granularity, AllocationType::Unknown) + }; + ($type:ty, $size:expr, $granularity:expr, $allocation_type:expr) => {{ + let (device, _) = gfx_dev_and_queue!(); + let device_memory = DeviceMemory::allocate( + device, + MemoryAllocateInfo { + allocation_size: $size, + memory_type_index: 0, + ..Default::default() + }, + ) + .unwrap(); + let mut allocator = <$type>::new(MemoryAlloc::new_root(device_memory).unwrap()); + Arc::get_mut(&mut allocator) + .unwrap() + .buffer_image_granularity = $granularity; + + allocator + }}; + } + + use crate::memory::MemoryAllocateInfo; + pub(self) use dummy_allocator; +} diff --git a/vulkano/src/memory/mod.rs b/vulkano/src/memory/mod.rs index fe9bd282..7a5618a6 100644 --- a/vulkano/src/memory/mod.rs +++ b/vulkano/src/memory/mod.rs @@ -92,13 +92,9 @@ //! get memory from that pool. By default if you don't specify any pool when creating a buffer or //! an image, an instance of `StandardMemoryPool` that is shared by the `Device` object is used. -pub use self::{ - device_memory::{ - DeviceMemory, DeviceMemoryError, ExternalMemoryHandleType, ExternalMemoryHandleTypes, - MappedDeviceMemory, MemoryAllocateFlags, MemoryAllocateInfo, MemoryImportInfo, - MemoryMapError, - }, - pool::MemoryPool, +pub use self::device_memory::{ + DeviceMemory, DeviceMemoryError, ExternalMemoryHandleType, ExternalMemoryHandleTypes, + MappedDeviceMemory, MemoryAllocateFlags, MemoryAllocateInfo, MemoryImportInfo, MemoryMapError, }; use crate::{ buffer::{sys::UnsafeBuffer, BufferAccess}, @@ -109,8 +105,8 @@ use crate::{ }; use std::sync::Arc; +pub mod allocator; mod device_memory; -pub mod pool; /// Properties of the memory in a physical device. #[derive(Clone, Debug)] diff --git a/vulkano/src/memory/pool/host_visible.rs b/vulkano/src/memory/pool/host_visible.rs deleted file mode 100644 index c04ebbb7..00000000 --- a/vulkano/src/memory/pool/host_visible.rs +++ /dev/null @@ -1,181 +0,0 @@ -// Copyright (c) 2016 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -use crate::{ - device::Device, - memory::{ - device_memory::MemoryAllocateInfo, DeviceMemory, DeviceMemoryError, MappedDeviceMemory, - }, - DeviceSize, -}; -use parking_lot::Mutex; -use std::{cmp, ops::Range, sync::Arc}; - -/// Memory pool that operates on a given memory type. -#[derive(Debug)] -pub struct StandardHostVisibleMemoryTypePool { - device: Arc, - memory_type_index: u32, - // TODO: obviously very inefficient - occupied: Mutex, Vec>)>>, -} - -impl StandardHostVisibleMemoryTypePool { - /// Creates a new pool that will operate on the given memory type. - /// - /// # Panic - /// - /// - Panics if `memory_type_index` is out of range. - /// - Panics if `memory_type_index` refers to a memory type that is not host-visible. - /// - #[inline] - pub fn new( - device: Arc, - memory_type_index: u32, - ) -> Arc { - let memory_type = - &device.physical_device().memory_properties().memory_types[memory_type_index as usize]; - assert!(memory_type.property_flags.host_visible); - - Arc::new(StandardHostVisibleMemoryTypePool { - device, - memory_type_index, - occupied: Mutex::new(Vec::new()), - }) - } - - /// Allocates memory from the pool. - /// - /// # Panic - /// - /// - Panics if `size` is 0. - /// - Panics if `alignment` is 0. - /// - pub fn alloc( - self: &Arc, - size: DeviceSize, - alignment: DeviceSize, - ) -> Result { - assert!(size != 0); - assert!(alignment != 0); - - #[inline] - fn align(val: DeviceSize, al: DeviceSize) -> DeviceSize { - al * (1 + (val - 1) / al) - } - - // Find a location. - let mut occupied = self.occupied.lock(); - - // Try finding an entry in already-allocated chunks. - for &mut (ref dev_mem, ref mut entries) in occupied.iter_mut() { - // Try find some free space in-between two entries. - for i in 0..entries.len().saturating_sub(1) { - let entry1 = entries[i].clone(); - let entry1_end = align(entry1.end, alignment); - let entry2 = entries[i + 1].clone(); - if entry1_end + size <= entry2.start { - entries.insert(i + 1, entry1_end..entry1_end + size); - return Ok(StandardHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: dev_mem.clone(), - offset: entry1_end, - size, - }); - } - } - - // Try append at the end. - let last_end = entries.last().map(|e| align(e.end, alignment)).unwrap_or(0); - if last_end + size <= (**dev_mem).as_ref().allocation_size() { - entries.push(last_end..last_end + size); - return Ok(StandardHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: dev_mem.clone(), - offset: last_end, - size, - }); - } - } - - // We need to allocate a new block. - let new_block = { - const MIN_BLOCK_SIZE: DeviceSize = 8 * 1024 * 1024; // 8 MB - let allocation_size = cmp::max(MIN_BLOCK_SIZE, size.next_power_of_two()); - let memory = DeviceMemory::allocate( - self.device.clone(), - MemoryAllocateInfo { - allocation_size, - memory_type_index: self.memory_type_index, - ..Default::default() - }, - )?; - let new_block = MappedDeviceMemory::new(memory, 0..allocation_size)?; - Arc::new(new_block) - }; - - occupied.push((new_block.clone(), vec![0..size])); - Ok(StandardHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: new_block, - offset: 0, - size, - }) - } - - /// Returns the device this pool operates on. - #[inline] - pub fn device(&self) -> &Arc { - &self.device - } - - /// Returns the index of the memory type this pool operates on. - #[inline] - pub fn memory_type_index(&self) -> u32 { - self.memory_type_index - } -} - -#[derive(Debug)] -pub struct StandardHostVisibleMemoryTypePoolAlloc { - pool: Arc, - memory: Arc, - offset: DeviceSize, - size: DeviceSize, -} - -impl StandardHostVisibleMemoryTypePoolAlloc { - #[inline] - pub fn memory(&self) -> &MappedDeviceMemory { - &self.memory - } - - #[inline] - pub fn offset(&self) -> DeviceSize { - self.offset - } - - #[inline] - pub fn size(&self) -> DeviceSize { - self.size - } -} - -impl Drop for StandardHostVisibleMemoryTypePoolAlloc { - fn drop(&mut self) { - let mut occupied = self.pool.occupied.lock(); - - let entries = occupied - .iter_mut() - .find(|e| &*e.0 as *const MappedDeviceMemory == &*self.memory) - .unwrap(); - - entries.1.retain(|e| e.start != self.offset); - } -} diff --git a/vulkano/src/memory/pool/mod.rs b/vulkano/src/memory/pool/mod.rs deleted file mode 100644 index ac4e0aa6..00000000 --- a/vulkano/src/memory/pool/mod.rs +++ /dev/null @@ -1,322 +0,0 @@ -// Copyright (c) 2016 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -pub use self::{ - host_visible::{StandardHostVisibleMemoryTypePool, StandardHostVisibleMemoryTypePoolAlloc}, - non_host_visible::{ - StandardNonHostVisibleMemoryTypePool, StandardNonHostVisibleMemoryTypePoolAlloc, - }, - pool::{StandardMemoryPool, StandardMemoryPoolAlloc}, -}; -use super::MemoryType; -use crate::{ - device::{Device, DeviceOwned}, - memory::{ - device_memory::MemoryAllocateInfo, DedicatedAllocation, DeviceMemory, DeviceMemoryError, - ExternalMemoryHandleTypes, MappedDeviceMemory, MemoryRequirements, - }, - DeviceSize, -}; -use std::sync::Arc; - -mod host_visible; -mod non_host_visible; -mod pool; - -// If the allocation size goes beyond this, then we perform a dedicated allocation which bypasses -// the pool. This prevents the pool from overallocating a significant amount of memory. -const MAX_POOL_ALLOC: DeviceSize = 256 * 1024 * 1024; - -fn choose_allocation_memory_type( - device: &Arc, - requirements: &MemoryRequirements, - mut filter: F, - map: MappingRequirement, -) -> u32 -where - F: FnMut(&MemoryType) -> AllocFromRequirementsFilter, -{ - let mem_ty = { - let mut filter = |ty: &MemoryType| { - if map == MappingRequirement::Map && !ty.property_flags.host_visible { - return AllocFromRequirementsFilter::Forbidden; - } - filter(ty) - }; - let first_loop = device - .physical_device() - .memory_properties() - .memory_types - .iter() - .enumerate() - .map(|(i, t)| (i as u32, t, AllocFromRequirementsFilter::Preferred)); - let second_loop = device - .physical_device() - .memory_properties() - .memory_types - .iter() - .enumerate() - .map(|(i, t)| (i as u32, t, AllocFromRequirementsFilter::Allowed)); - first_loop - .chain(second_loop) - .filter(|(i, _, _)| (requirements.memory_type_bits & (1 << *i)) != 0) - .find(|&(_, t, rq)| filter(t) == rq) - .expect("Couldn't find a memory type to allocate from") - .0 - }; - mem_ty -} - -/// Allocate dedicated memory with exportable fd. -/// Memory pool memory always exports the same fd, thus dedicated is preferred. -pub(crate) fn alloc_dedicated_with_exportable_fd( - device: Arc, - requirements: &MemoryRequirements, - _layout: AllocLayout, - map: MappingRequirement, - dedicated_allocation: DedicatedAllocation<'_>, - filter: F, -) -> Result, DeviceMemoryError> -where - F: FnMut(&MemoryType) -> AllocFromRequirementsFilter, -{ - assert!(device.enabled_extensions().khr_external_memory_fd); - assert!(device.enabled_extensions().khr_external_memory); - - let memory_type_index = choose_allocation_memory_type(&device, requirements, filter, map); - let memory = DeviceMemory::allocate( - device, - MemoryAllocateInfo { - allocation_size: requirements.size, - memory_type_index, - export_handle_types: ExternalMemoryHandleTypes { - opaque_fd: true, - ..ExternalMemoryHandleTypes::empty() - }, - ..MemoryAllocateInfo::dedicated_allocation(dedicated_allocation) - }, - )?; - - match map { - MappingRequirement::Map => { - let mapped_memory = MappedDeviceMemory::new(memory, 0..requirements.size)?; - Ok(PotentialDedicatedAllocation::DedicatedMapped(mapped_memory)) - } - MappingRequirement::DoNotMap => Ok(PotentialDedicatedAllocation::Dedicated(memory)), - } -} - -/// Pool of GPU-visible memory that can be allocated from. -pub unsafe trait MemoryPool: DeviceOwned { - /// Object that represents a single allocation. Its destructor should free the chunk. - type Alloc: MemoryPoolAlloc; - - /// Allocates memory from the pool. - /// - /// # Safety - /// - /// Implementation safety: - /// - /// - The returned object must match the requirements. - /// - When a linear object is allocated next to an optimal object, it is mandatory that - /// the boundary is aligned to the value of the `buffer_image_granularity` limit. - /// - /// Note that it is not unsafe to *call* this function, but it is unsafe to bind the memory - /// returned by this function to a resource. - /// - /// # Panic - /// - /// - Panics if `memory_type` doesn't belong to the same physical device as the device which - /// was used to create this pool. - /// - Panics if the memory type is not host-visible and `map` is `MappingRequirement::Map`. - /// - Panics if `size` is 0. - /// - Panics if `alignment` is 0. - /// - fn alloc_generic( - &self, - memory_type_index: u32, - size: DeviceSize, - alignment: DeviceSize, - layout: AllocLayout, - map: MappingRequirement, - ) -> Result; - - /// Chooses a memory type and allocates memory from it. - /// - /// Contrary to `alloc_generic`, this function may allocate a whole new block of memory - /// dedicated to a resource based on `requirements.prefer_dedicated`. - /// - /// `filter` can be used to restrict the memory types and to indicate which are preferred. - /// If `map` is `MappingRequirement::Map`, then non-host-visible memory types will - /// automatically be filtered out. - /// - /// # Safety - /// - /// Implementation safety: - /// - /// - The returned object must match the requirements. - /// - When a linear object is allocated next to an optimal object, it is mandatory that - /// the boundary is aligned to the value of the `buffer_image_granularity` limit. - /// - If `dedicated` is not `None`, the returned memory must either not be dedicated or be - /// dedicated to the resource that was passed. - /// - /// Note that it is not unsafe to *call* this function, but it is unsafe to bind the memory - /// returned by this function to a resource. - /// - /// # Panic - /// - /// - Panics if no memory type could be found, which can happen if `filter` is too restrictive. - // TODO: ^ is this a good idea? - /// - Panics if `size` is 0. - /// - Panics if `alignment` is 0. - /// - fn alloc_from_requirements( - &self, - requirements: &MemoryRequirements, - layout: AllocLayout, - map: MappingRequirement, - dedicated_allocation: Option>, - filter: F, - ) -> Result, DeviceMemoryError> - where - F: FnMut(&MemoryType) -> AllocFromRequirementsFilter, - { - // Choose a suitable memory type. - let memory_type_index = - choose_allocation_memory_type(self.device(), requirements, filter, map); - - // Redirect to `self.alloc_generic` if we don't perform a dedicated allocation. - if !requirements.prefer_dedicated && requirements.size <= MAX_POOL_ALLOC { - let alloc = self.alloc_generic( - memory_type_index, - requirements.size, - requirements.alignment, - layout, - map, - )?; - return Ok(alloc.into()); - } - if dedicated_allocation.is_none() { - let alloc = self.alloc_generic( - memory_type_index, - requirements.size, - requirements.alignment, - layout, - map, - )?; - return Ok(alloc.into()); - } - - // If we reach here, then we perform a dedicated alloc. - let memory = DeviceMemory::allocate( - self.device().clone(), - MemoryAllocateInfo { - allocation_size: requirements.size, - memory_type_index, - dedicated_allocation, - ..Default::default() - }, - )?; - - match map { - MappingRequirement::Map => { - let mapped_memory = MappedDeviceMemory::new(memory, 0..requirements.size)?; - Ok(PotentialDedicatedAllocation::DedicatedMapped(mapped_memory)) - } - MappingRequirement::DoNotMap => Ok(PotentialDedicatedAllocation::Dedicated(memory)), - } - } -} - -#[derive(Debug, Copy, Clone, PartialEq, Eq)] -pub enum AllocFromRequirementsFilter { - Preferred, - Allowed, - Forbidden, -} - -/// Object that represents a single allocation. Its destructor should free the chunk. -pub unsafe trait MemoryPoolAlloc: Send + Sync { - /// Returns the memory object from which this is allocated. Returns `None` if the memory is - /// not mapped. - fn mapped_memory(&self) -> Option<&MappedDeviceMemory>; - - /// Returns the memory object from which this is allocated. - fn memory(&self) -> &DeviceMemory; - - /// Returns the offset at the start of the memory where the first byte of this allocation - /// resides. - fn offset(&self) -> DeviceSize; -} - -/// Whether an allocation should map the memory or not. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum MappingRequirement { - /// Should map. - Map, - /// Shouldn't map. - DoNotMap, -} - -/// Layout of the object being allocated. -#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] -pub enum AllocLayout { - /// The object has a linear layout. - Linear, - /// The object has an optimal layout. - Optimal, -} - -/// Enumeration that can contain either a generic allocation coming from a pool, or a dedicated -/// allocation for one specific resource. -#[derive(Debug)] -pub enum PotentialDedicatedAllocation { - Generic(A), - Dedicated(DeviceMemory), - DedicatedMapped(MappedDeviceMemory), -} - -unsafe impl MemoryPoolAlloc for PotentialDedicatedAllocation -where - A: MemoryPoolAlloc, -{ - #[inline] - fn mapped_memory(&self) -> Option<&MappedDeviceMemory> { - match *self { - PotentialDedicatedAllocation::Generic(ref alloc) => alloc.mapped_memory(), - PotentialDedicatedAllocation::Dedicated(_) => None, - PotentialDedicatedAllocation::DedicatedMapped(ref mem) => Some(mem), - } - } - - #[inline] - fn memory(&self) -> &DeviceMemory { - match *self { - PotentialDedicatedAllocation::Generic(ref alloc) => alloc.memory(), - PotentialDedicatedAllocation::Dedicated(ref mem) => mem, - PotentialDedicatedAllocation::DedicatedMapped(ref mem) => mem.as_ref(), - } - } - - #[inline] - fn offset(&self) -> DeviceSize { - match *self { - PotentialDedicatedAllocation::Generic(ref alloc) => alloc.offset(), - PotentialDedicatedAllocation::Dedicated(_) => 0, - PotentialDedicatedAllocation::DedicatedMapped(_) => 0, - } - } -} - -impl From for PotentialDedicatedAllocation { - #[inline] - fn from(alloc: A) -> PotentialDedicatedAllocation { - PotentialDedicatedAllocation::Generic(alloc) - } -} diff --git a/vulkano/src/memory/pool/non_host_visible.rs b/vulkano/src/memory/pool/non_host_visible.rs deleted file mode 100644 index 54c0e900..00000000 --- a/vulkano/src/memory/pool/non_host_visible.rs +++ /dev/null @@ -1,169 +0,0 @@ -// Copyright (c) 2016 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -use crate::{ - device::Device, - memory::{device_memory::MemoryAllocateInfo, DeviceMemory, DeviceMemoryError}, - DeviceSize, -}; -use parking_lot::Mutex; -use std::{cmp, ops::Range, sync::Arc}; - -/// Memory pool that operates on a given memory type. -#[derive(Debug)] -pub struct StandardNonHostVisibleMemoryTypePool { - device: Arc, - memory_type_index: u32, - // TODO: obviously very inefficient - occupied: Mutex, Vec>)>>, -} - -impl StandardNonHostVisibleMemoryTypePool { - /// Creates a new pool that will operate on the given memory type. - /// - /// # Panic - /// - /// - Panics if `memory_type_index` is out of range. - #[inline] - pub fn new( - device: Arc, - memory_type_index: u32, - ) -> Arc { - let _ = - &device.physical_device().memory_properties().memory_types[memory_type_index as usize]; - - Arc::new(StandardNonHostVisibleMemoryTypePool { - device, - memory_type_index, - occupied: Mutex::new(Vec::new()), - }) - } - - /// Allocates memory from the pool. - /// - /// # Panic - /// - /// - Panics if `size` is 0. - /// - Panics if `alignment` is 0. - /// - pub fn alloc( - self: &Arc, - size: DeviceSize, - alignment: DeviceSize, - ) -> Result { - assert!(size != 0); - assert!(alignment != 0); - - #[inline] - fn align(val: DeviceSize, al: DeviceSize) -> DeviceSize { - al * (1 + (val - 1) / al) - } - - // Find a location. - let mut occupied = self.occupied.lock(); - - // Try finding an entry in already-allocated chunks. - for &mut (ref dev_mem, ref mut entries) in occupied.iter_mut() { - // Try find some free space in-between two entries. - for i in 0..entries.len().saturating_sub(1) { - let entry1 = entries[i].clone(); - let entry1_end = align(entry1.end, alignment); - let entry2 = entries[i + 1].clone(); - if entry1_end + size <= entry2.start { - entries.insert(i + 1, entry1_end..entry1_end + size); - return Ok(StandardNonHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: dev_mem.clone(), - offset: entry1_end, - size, - }); - } - } - - // Try append at the end. - let last_end = entries.last().map(|e| align(e.end, alignment)).unwrap_or(0); - if last_end + size <= dev_mem.allocation_size() { - entries.push(last_end..last_end + size); - return Ok(StandardNonHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: dev_mem.clone(), - offset: last_end, - size, - }); - } - } - - // We need to allocate a new block. - let new_block = { - const MIN_BLOCK_SIZE: DeviceSize = 8 * 1024 * 1024; // 8 MB - let allocation_size = cmp::max(MIN_BLOCK_SIZE, size.next_power_of_two()); - let new_block = DeviceMemory::allocate( - self.device.clone(), - MemoryAllocateInfo { - allocation_size, - memory_type_index: self.memory_type_index, - ..Default::default() - }, - )?; - Arc::new(new_block) - }; - - occupied.push((new_block.clone(), vec![0..size])); - Ok(StandardNonHostVisibleMemoryTypePoolAlloc { - pool: self.clone(), - memory: new_block, - offset: 0, - size, - }) - } - - /// Returns the index of the memory type this pool operates on. - #[inline] - pub fn memory_type_index(&self) -> u32 { - self.memory_type_index - } -} - -#[derive(Debug)] -pub struct StandardNonHostVisibleMemoryTypePoolAlloc { - pool: Arc, - memory: Arc, - offset: DeviceSize, - size: DeviceSize, -} - -impl StandardNonHostVisibleMemoryTypePoolAlloc { - #[inline] - pub fn memory(&self) -> &DeviceMemory { - &self.memory - } - - #[inline] - pub fn offset(&self) -> DeviceSize { - self.offset - } - - #[inline] - pub fn size(&self) -> DeviceSize { - self.size - } -} - -impl Drop for StandardNonHostVisibleMemoryTypePoolAlloc { - fn drop(&mut self) { - let mut occupied = self.pool.occupied.lock(); - - let entries = occupied - .iter_mut() - .find(|e| &*e.0 as *const DeviceMemory == &*self.memory) - .unwrap(); - - entries.1.retain(|e| e.start != self.offset); - } -} diff --git a/vulkano/src/memory/pool/pool.rs b/vulkano/src/memory/pool/pool.rs deleted file mode 100644 index 496c2b6a..00000000 --- a/vulkano/src/memory/pool/pool.rs +++ /dev/null @@ -1,206 +0,0 @@ -// Copyright (c) 2016 The vulkano developers -// Licensed under the Apache License, Version 2.0 -// or the MIT -// license , -// at your option. All files in the project carrying such -// notice may not be copied, modified, or distributed except -// according to those terms. - -use crate::{ - device::{Device, DeviceOwned}, - memory::{ - pool::{ - AllocLayout, MappingRequirement, MemoryPool, MemoryPoolAlloc, - StandardHostVisibleMemoryTypePool, StandardHostVisibleMemoryTypePoolAlloc, - StandardNonHostVisibleMemoryTypePool, StandardNonHostVisibleMemoryTypePoolAlloc, - }, - DeviceMemory, DeviceMemoryError, MappedDeviceMemory, - }, - DeviceSize, -}; -use parking_lot::Mutex; -use std::{ - collections::{hash_map::Entry, HashMap}, - sync::Arc, -}; - -#[derive(Debug)] -pub struct StandardMemoryPool { - device: Arc, - - // For each memory type index, stores the associated pool. - pools: Mutex>, -} - -impl StandardMemoryPool { - /// Creates a new pool. - #[inline] - pub fn new(device: Arc) -> Arc { - let cap = device - .physical_device() - .memory_properties() - .memory_types - .len(); - - Arc::new(StandardMemoryPool { - device, - pools: Mutex::new(HashMap::with_capacity(cap)), - }) - } -} - -fn generic_allocation( - mem_pool: Arc, - memory_type_index: u32, - size: DeviceSize, - alignment: DeviceSize, - layout: AllocLayout, - map: MappingRequirement, -) -> Result { - let mut pools = mem_pool.pools.lock(); - - let memory_properties = mem_pool.device().physical_device().memory_properties(); - let memory_type = memory_properties - .memory_types - .get(memory_type_index as usize) - .ok_or(DeviceMemoryError::MemoryTypeIndexOutOfRange { - memory_type_index, - memory_type_count: memory_properties.memory_types.len() as u32, - })?; - - let memory_type_host_visible = memory_type.property_flags.host_visible; - assert!(memory_type_host_visible || map == MappingRequirement::DoNotMap); - - match pools.entry((memory_type_index, layout, map)) { - Entry::Occupied(entry) => match *entry.get() { - Pool::HostVisible(ref pool) => { - let alloc = pool.alloc(size, alignment)?; - let inner = StandardMemoryPoolAllocInner::HostVisible(alloc); - Ok(StandardMemoryPoolAlloc { - inner, - _pool: mem_pool.clone(), - }) - } - Pool::NonHostVisible(ref pool) => { - let alloc = pool.alloc(size, alignment)?; - let inner = StandardMemoryPoolAllocInner::NonHostVisible(alloc); - Ok(StandardMemoryPoolAlloc { - inner, - _pool: mem_pool.clone(), - }) - } - }, - - Entry::Vacant(entry) => { - if memory_type_host_visible { - let pool = StandardHostVisibleMemoryTypePool::new( - mem_pool.device.clone(), - memory_type_index, - ); - entry.insert(Pool::HostVisible(pool.clone())); - let alloc = pool.alloc(size, alignment)?; - let inner = StandardMemoryPoolAllocInner::HostVisible(alloc); - Ok(StandardMemoryPoolAlloc { - inner, - _pool: mem_pool.clone(), - }) - } else { - let pool = StandardNonHostVisibleMemoryTypePool::new( - mem_pool.device.clone(), - memory_type_index, - ); - entry.insert(Pool::NonHostVisible(pool.clone())); - let alloc = pool.alloc(size, alignment)?; - let inner = StandardMemoryPoolAllocInner::NonHostVisible(alloc); - Ok(StandardMemoryPoolAlloc { - inner, - _pool: mem_pool.clone(), - }) - } - } - } -} - -unsafe impl MemoryPool for Arc { - type Alloc = StandardMemoryPoolAlloc; - - fn alloc_generic( - &self, - memory_type_index: u32, - size: DeviceSize, - alignment: DeviceSize, - layout: AllocLayout, - map: MappingRequirement, - ) -> Result { - generic_allocation( - self.clone(), - memory_type_index, - size, - alignment, - layout, - map, - ) - } -} - -unsafe impl DeviceOwned for StandardMemoryPool { - #[inline] - fn device(&self) -> &Arc { - &self.device - } -} - -#[derive(Debug)] -enum Pool { - HostVisible(Arc), - NonHostVisible(Arc), -} - -#[derive(Debug)] -pub struct StandardMemoryPoolAlloc { - inner: StandardMemoryPoolAllocInner, - _pool: Arc, -} - -impl StandardMemoryPoolAlloc { - #[inline] - pub fn size(&self) -> DeviceSize { - match self.inner { - StandardMemoryPoolAllocInner::NonHostVisible(ref mem) => mem.size(), - StandardMemoryPoolAllocInner::HostVisible(ref mem) => mem.size(), - } - } -} - -unsafe impl MemoryPoolAlloc for StandardMemoryPoolAlloc { - #[inline] - fn memory(&self) -> &DeviceMemory { - match self.inner { - StandardMemoryPoolAllocInner::NonHostVisible(ref mem) => mem.memory(), - StandardMemoryPoolAllocInner::HostVisible(ref mem) => mem.memory().as_ref(), - } - } - - #[inline] - fn mapped_memory(&self) -> Option<&MappedDeviceMemory> { - match self.inner { - StandardMemoryPoolAllocInner::NonHostVisible(_) => None, - StandardMemoryPoolAllocInner::HostVisible(ref mem) => Some(mem.memory()), - } - } - - #[inline] - fn offset(&self) -> DeviceSize { - match self.inner { - StandardMemoryPoolAllocInner::NonHostVisible(ref mem) => mem.offset(), - StandardMemoryPoolAllocInner::HostVisible(ref mem) => mem.offset(), - } - } -} - -#[derive(Debug)] -enum StandardMemoryPoolAllocInner { - NonHostVisible(StandardNonHostVisibleMemoryTypePoolAlloc), - HostVisible(StandardHostVisibleMemoryTypePoolAlloc), -} diff --git a/vulkano/src/pipeline/compute.rs b/vulkano/src/pipeline/compute.rs index dc332d27..36342e61 100644 --- a/vulkano/src/pipeline/compute.rs +++ b/vulkano/src/pipeline/compute.rs @@ -409,6 +409,7 @@ mod tests { descriptor_set::{ allocator::StandardDescriptorSetAllocator, PersistentDescriptorSet, WriteDescriptorSet, }, + memory::allocator::StandardMemoryAllocator, pipeline::{ComputePipeline, Pipeline, PipelineBindPoint}, shader::{ShaderModule, SpecializationConstants, SpecializationMapEntry}, sync::{now, GpuFuture}, @@ -491,8 +492,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device.clone()); let data_buffer = CpuAccessibleBuffer::from_data( - device.clone(), + &memory_allocator, BufferUsage { storage_buffer: true, ..BufferUsage::empty() diff --git a/vulkano/src/render_pass/framebuffer.rs b/vulkano/src/render_pass/framebuffer.rs index f88287e8..2b8805c4 100644 --- a/vulkano/src/render_pass/framebuffer.rs +++ b/vulkano/src/render_pass/framebuffer.rs @@ -720,6 +720,7 @@ mod tests { use crate::{ format::Format, image::{attachment::AttachmentImage, view::ImageView}, + memory::allocator::StandardMemoryAllocator, render_pass::{Framebuffer, FramebufferCreateInfo, FramebufferCreationError, RenderPass}, }; @@ -743,8 +744,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let view = ImageView::new_default( - AttachmentImage::new(device, [1024, 768], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [1024, 768], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); let _ = Framebuffer::new( @@ -810,8 +812,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let view = ImageView::new_default( - AttachmentImage::new(device, [1024, 768], Format::R8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [1024, 768], Format::R8_UNORM).unwrap(), ) .unwrap(); @@ -849,8 +852,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let view = ImageView::new_default( - AttachmentImage::new(device, [600, 600], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [600, 600], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); @@ -886,8 +890,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let view = ImageView::new_default( - AttachmentImage::new(device, [512, 700], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [512, 700], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); @@ -931,12 +936,13 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let a = ImageView::new_default( - AttachmentImage::new(device.clone(), [256, 512], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); let b = ImageView::new_default( - AttachmentImage::new(device, [512, 128], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [512, 128], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); @@ -981,8 +987,9 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let view = ImageView::new_default( - AttachmentImage::new(device, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); @@ -1023,12 +1030,13 @@ mod tests { ) .unwrap(); + let memory_allocator = StandardMemoryAllocator::new_default(device); let a = ImageView::new_default( - AttachmentImage::new(device.clone(), [256, 512], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); let b = ImageView::new_default( - AttachmentImage::new(device, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), + AttachmentImage::new(&memory_allocator, [256, 512], Format::R8G8B8A8_UNORM).unwrap(), ) .unwrap(); diff --git a/vulkano/src/sampler/ycbcr.rs b/vulkano/src/sampler/ycbcr.rs index 456980b0..3376312f 100644 --- a/vulkano/src/sampler/ycbcr.rs +++ b/vulkano/src/sampler/ycbcr.rs @@ -26,6 +26,7 @@ //! # let device: std::sync::Arc = return; //! # let image_data: Vec = return; //! # let queue: std::sync::Arc = return; +//! # let memory_allocator: vulkano::memory::allocator::StandardMemoryAllocator = return; //! # let descriptor_set_allocator: vulkano::descriptor_set::allocator::StandardDescriptorSetAllocator = return; //! # let mut command_buffer_builder: vulkano::command_buffer::AutoCommandBufferBuilder = return; //! use vulkano::descriptor_set::{PersistentDescriptorSet, WriteDescriptorSet}; @@ -67,6 +68,7 @@ //! ).unwrap(); //! //! let image = ImmutableImage::from_iter( +//! &memory_allocator, //! image_data, //! ImageDimensions::Dim2d { width: 1920, height: 1080, array_layers: 1 }, //! MipmapsCount::One,