mirror of
https://github.com/vulkano-rs/vulkano.git
synced 2024-11-22 06:45:23 +00:00
Make the suballocators !Sync
(#2317)
This commit is contained in:
parent
64ea44c25e
commit
c93d71e064
@ -239,7 +239,7 @@ use crate::{
|
|||||||
VulkanError,
|
VulkanError,
|
||||||
};
|
};
|
||||||
use ash::vk::{MAX_MEMORY_HEAPS, MAX_MEMORY_TYPES};
|
use ash::vk::{MAX_MEMORY_HEAPS, MAX_MEMORY_TYPES};
|
||||||
use parking_lot::RwLock;
|
use parking_lot::Mutex;
|
||||||
use std::{
|
use std::{
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{Debug, Display, Error as FmtError, Formatter},
|
fmt::{Debug, Display, Error as FmtError, Formatter},
|
||||||
@ -878,7 +878,7 @@ pub struct GenericMemoryAllocator<S> {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct Pool<S> {
|
struct Pool<S> {
|
||||||
blocks: RwLock<Vec<Box<Block<S>>>>,
|
blocks: Mutex<Vec<Box<Block<S>>>>,
|
||||||
// This is cached here for faster access, so we don't need to hop through 3 pointers.
|
// This is cached here for faster access, so we don't need to hop through 3 pointers.
|
||||||
memory_type: ash::vk::MemoryType,
|
memory_type: ash::vk::MemoryType,
|
||||||
atom_size: DeviceAlignment,
|
atom_size: DeviceAlignment,
|
||||||
@ -888,7 +888,7 @@ impl<S> GenericMemoryAllocator<S> {
|
|||||||
// This is a false-positive, we only use this const for static initialization.
|
// This is a false-positive, we only use this const for static initialization.
|
||||||
#[allow(clippy::declare_interior_mutable_const)]
|
#[allow(clippy::declare_interior_mutable_const)]
|
||||||
const EMPTY_POOL: Pool<S> = Pool {
|
const EMPTY_POOL: Pool<S> = Pool {
|
||||||
blocks: RwLock::new(Vec::new()),
|
blocks: Mutex::new(Vec::new()),
|
||||||
memory_type: ash::vk::MemoryType {
|
memory_type: ash::vk::MemoryType {
|
||||||
property_flags: ash::vk::MemoryPropertyFlags::empty(),
|
property_flags: ash::vk::MemoryPropertyFlags::empty(),
|
||||||
heap_index: 0,
|
heap_index: 0,
|
||||||
@ -1068,7 +1068,7 @@ impl<S> GenericMemoryAllocator<S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl<S: Suballocator + Send + Sync + 'static> MemoryAllocator for GenericMemoryAllocator<S> {
|
unsafe impl<S: Suballocator + Send + 'static> MemoryAllocator for GenericMemoryAllocator<S> {
|
||||||
fn find_memory_type_index(
|
fn find_memory_type_index(
|
||||||
&self,
|
&self,
|
||||||
memory_type_bits: u32,
|
memory_type_bits: u32,
|
||||||
@ -1145,64 +1145,19 @@ unsafe impl<S: Suballocator + Send + Sync + 'static> MemoryAllocator for Generic
|
|||||||
|
|
||||||
layout = layout.align_to(pool.atom_size).unwrap();
|
layout = layout.align_to(pool.atom_size).unwrap();
|
||||||
|
|
||||||
let mut blocks = if S::IS_BLOCKING {
|
let mut blocks = pool.blocks.lock();
|
||||||
// If the allocation algorithm needs to block, then there's no point in trying to avoid
|
|
||||||
// locks here either. In that case the best strategy is to take full advantage of it by
|
|
||||||
// always taking an exclusive lock, which lets us sort the blocks by free size. If you
|
|
||||||
// as a user want to avoid locks, simply don't share the allocator between threads. You
|
|
||||||
// can create as many allocators as you wish, but keep in mind that that will waste a
|
|
||||||
// huge amount of memory unless you configure your block sizes properly!
|
|
||||||
|
|
||||||
let mut blocks = pool.blocks.write();
|
// TODO: Incremental sorting
|
||||||
blocks.sort_by_key(|block| block.free_size());
|
blocks.sort_by_key(|block| block.free_size());
|
||||||
let (Ok(idx) | Err(idx)) =
|
let (Ok(idx) | Err(idx)) = blocks.binary_search_by_key(&size, |block| block.free_size());
|
||||||
blocks.binary_search_by_key(&size, |block| block.free_size());
|
|
||||||
|
|
||||||
for block in &blocks[idx..] {
|
for block in &blocks[idx..] {
|
||||||
if let Ok(allocation) =
|
if let Ok(allocation) =
|
||||||
block.allocate(layout, allocation_type, self.buffer_image_granularity)
|
block.allocate(layout, allocation_type, self.buffer_image_granularity)
|
||||||
{
|
{
|
||||||
return Ok(allocation);
|
return Ok(allocation);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
blocks
|
|
||||||
} else {
|
|
||||||
// If the allocation algorithm is lock-free, then we should avoid taking an exclusive
|
|
||||||
// lock unless it is absolutely neccessary (meaning, only when allocating a new
|
|
||||||
// `DeviceMemory` block and inserting it into a pool). This has the disadvantage that
|
|
||||||
// traversing the pool is O(n), which is not a problem since the number of blocks is
|
|
||||||
// expected to be small. If there are more than 10 blocks in a pool then that's a
|
|
||||||
// configuration error. Also, sorting the blocks before each allocation would be less
|
|
||||||
// efficient because to get the free size of the `PoolAllocator` and `BumpAllocator`
|
|
||||||
// has the same performance as trying to allocate.
|
|
||||||
|
|
||||||
let blocks = pool.blocks.read();
|
|
||||||
|
|
||||||
// Search in reverse order because we always append new blocks at the end.
|
|
||||||
for block in blocks.iter().rev() {
|
|
||||||
if let Ok(allocation) =
|
|
||||||
block.allocate(layout, allocation_type, self.buffer_image_granularity)
|
|
||||||
{
|
|
||||||
return Ok(allocation);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let len = blocks.len();
|
|
||||||
drop(blocks);
|
|
||||||
let blocks = pool.blocks.write();
|
|
||||||
|
|
||||||
if blocks.len() > len {
|
|
||||||
// Another thread beat us to it and inserted a fresh block, try to suballocate it.
|
|
||||||
if let Ok(allocation) =
|
|
||||||
blocks[len].allocate(layout, allocation_type, self.buffer_image_granularity)
|
|
||||||
{
|
|
||||||
return Ok(allocation);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
blocks
|
|
||||||
};
|
|
||||||
|
|
||||||
// For bump allocators, first do a garbage sweep and try to allocate again.
|
// For bump allocators, first do a garbage sweep and try to allocate again.
|
||||||
if S::NEEDS_CLEANUP {
|
if S::NEEDS_CLEANUP {
|
||||||
@ -1484,33 +1439,30 @@ unsafe impl<S: Suballocator + Send + Sync + 'static> MemoryAllocator for Generic
|
|||||||
|
|
||||||
unsafe fn deallocate(&self, allocation: MemoryAlloc) {
|
unsafe fn deallocate(&self, allocation: MemoryAlloc) {
|
||||||
if let Some(suballocation) = allocation.suballocation {
|
if let Some(suballocation) = allocation.suballocation {
|
||||||
|
let memory_type_index = allocation.device_memory.memory_type_index();
|
||||||
|
let pool = self.pools[memory_type_index as usize].blocks.lock();
|
||||||
let block_ptr = allocation.allocation_handle.0 as *const Block<S>;
|
let block_ptr = allocation.allocation_handle.0 as *const Block<S>;
|
||||||
|
|
||||||
// TODO: Maybe do a similar check for dedicated blocks.
|
// TODO: Maybe do a similar check for dedicated blocks.
|
||||||
#[cfg(debug_assertions)]
|
debug_assert!(
|
||||||
{
|
pool.iter()
|
||||||
let memory_type_index = allocation.device_memory.memory_type_index();
|
.any(|block| &**block as *const Block<S> == block_ptr),
|
||||||
let pool = self.pools[memory_type_index as usize].blocks.read();
|
"attempted to deallocate a memory block that does not belong to this allocator",
|
||||||
|
);
|
||||||
assert!(
|
|
||||||
pool.iter()
|
|
||||||
.any(|block| &**block as *const Block<S> == block_ptr),
|
|
||||||
"attempted to deallocate a memory block that does not belong to this allocator",
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SAFETY: The caller must guarantee that `allocation` refers to one allocated by
|
// SAFETY: The caller must guarantee that `allocation` refers to one allocated by
|
||||||
// `self`, therefore `block_ptr` must be the same one we gave out on allocation. We
|
// `self`, therefore `block_ptr` must be the same one we gave out on allocation. We
|
||||||
// know that this pointer must be valid, because all blocks are boxed and pinned in
|
// know that this pointer must be valid, because all blocks are boxed and pinned in
|
||||||
// memory and because a block isn't dropped until the allocator itself is dropped, at
|
// memory and because a block isn't dropped until the allocator itself is dropped, at
|
||||||
// which point it would be impossible to call this method. We also know that it must be
|
// which point it would be impossible to call this method. We also know that it must be
|
||||||
// valid to create a reference to the block, because we only ever access it via shared
|
// valid to create a reference to the block, because we locked the pool it belongs to.
|
||||||
// references.
|
|
||||||
let block = &*block_ptr;
|
let block = &*block_ptr;
|
||||||
|
|
||||||
// SAFETY: The caller must guarantee that `allocation` refers to a currently allocated
|
// SAFETY: The caller must guarantee that `allocation` refers to a currently allocated
|
||||||
// allocation of `self`.
|
// allocation of `self`.
|
||||||
block.deallocate(suballocation);
|
block.deallocate(suballocation);
|
||||||
|
|
||||||
|
drop(pool);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -18,14 +18,12 @@ use super::{
|
|||||||
align_down, align_up, array_vec::ArrayVec, AllocationHandle, DeviceAlignment, DeviceLayout,
|
align_down, align_up, array_vec::ArrayVec, AllocationHandle, DeviceAlignment, DeviceLayout,
|
||||||
};
|
};
|
||||||
use crate::{image::ImageTiling, memory::is_aligned, DeviceSize, NonZeroDeviceSize};
|
use crate::{image::ImageTiling, memory::is_aligned, DeviceSize, NonZeroDeviceSize};
|
||||||
use parking_lot::Mutex;
|
|
||||||
use std::{
|
use std::{
|
||||||
cell::Cell,
|
cell::{Cell, UnsafeCell},
|
||||||
cmp,
|
cmp,
|
||||||
error::Error,
|
error::Error,
|
||||||
fmt::{self, Display},
|
fmt::{self, Display},
|
||||||
ptr,
|
ptr,
|
||||||
sync::atomic::{AtomicU64, Ordering},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/// Suballocators are used to divide a *region* into smaller *suballocations*.
|
/// Suballocators are used to divide a *region* into smaller *suballocations*.
|
||||||
@ -69,14 +67,6 @@ use std::{
|
|||||||
/// [`DeviceMemory`]: crate::memory::DeviceMemory
|
/// [`DeviceMemory`]: crate::memory::DeviceMemory
|
||||||
/// [pages]: super#pages
|
/// [pages]: super#pages
|
||||||
pub unsafe trait Suballocator {
|
pub unsafe trait Suballocator {
|
||||||
/// Whether this allocator needs to block or not.
|
|
||||||
///
|
|
||||||
/// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the
|
|
||||||
/// suballocator at compile time.
|
|
||||||
///
|
|
||||||
/// [`GenericMemoryAllocator`]: super::GenericMemoryAllocator
|
|
||||||
const IS_BLOCKING: bool;
|
|
||||||
|
|
||||||
/// Whether the allocator needs [`cleanup`] to be called before memory can be released.
|
/// Whether the allocator needs [`cleanup`] to be called before memory can be released.
|
||||||
///
|
///
|
||||||
/// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the
|
/// This is used by the [`GenericMemoryAllocator`] to specialize the allocation strategy to the
|
||||||
@ -280,13 +270,11 @@ impl Display for SuballocatorError {
|
|||||||
pub struct FreeListAllocator {
|
pub struct FreeListAllocator {
|
||||||
region_offset: DeviceSize,
|
region_offset: DeviceSize,
|
||||||
// Total memory remaining in the region.
|
// Total memory remaining in the region.
|
||||||
free_size: AtomicU64,
|
free_size: Cell<DeviceSize>,
|
||||||
state: Mutex<FreeListAllocatorState>,
|
state: UnsafeCell<FreeListAllocatorState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Suballocator for FreeListAllocator {
|
unsafe impl Suballocator for FreeListAllocator {
|
||||||
const IS_BLOCKING: bool = true;
|
|
||||||
|
|
||||||
const NEEDS_CLEANUP: bool = false;
|
const NEEDS_CLEANUP: bool = false;
|
||||||
|
|
||||||
/// Creates a new `FreeListAllocator` for the given [region].
|
/// Creates a new `FreeListAllocator` for the given [region].
|
||||||
@ -296,7 +284,7 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
// NOTE(Marc): This number was pulled straight out of my a-
|
// NOTE(Marc): This number was pulled straight out of my a-
|
||||||
const AVERAGE_ALLOCATION_SIZE: DeviceSize = 64 * 1024;
|
const AVERAGE_ALLOCATION_SIZE: DeviceSize = 64 * 1024;
|
||||||
|
|
||||||
let free_size = AtomicU64::new(region_size);
|
let free_size = Cell::new(region_size);
|
||||||
|
|
||||||
let capacity = (region_size / AVERAGE_ALLOCATION_SIZE) as usize;
|
let capacity = (region_size / AVERAGE_ALLOCATION_SIZE) as usize;
|
||||||
let mut nodes = host::PoolAllocator::new(capacity + 64);
|
let mut nodes = host::PoolAllocator::new(capacity + 64);
|
||||||
@ -309,7 +297,7 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
ty: SuballocationType::Free,
|
ty: SuballocationType::Free,
|
||||||
});
|
});
|
||||||
free_list.push(root_id);
|
free_list.push(root_id);
|
||||||
let state = Mutex::new(FreeListAllocatorState { nodes, free_list });
|
let state = UnsafeCell::new(FreeListAllocatorState { nodes, free_list });
|
||||||
|
|
||||||
FreeListAllocator {
|
FreeListAllocator {
|
||||||
region_offset,
|
region_offset,
|
||||||
@ -337,7 +325,7 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
|
|
||||||
let size = layout.size();
|
let size = layout.size();
|
||||||
let alignment = layout.alignment();
|
let alignment = layout.alignment();
|
||||||
let mut state = self.state.lock();
|
let state = unsafe { &mut *self.state.get() };
|
||||||
|
|
||||||
unsafe {
|
unsafe {
|
||||||
match state.free_list.last() {
|
match state.free_list.last() {
|
||||||
@ -392,7 +380,7 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
|
|
||||||
// This can't overflow because suballocation sizes in the free-list are
|
// This can't overflow because suballocation sizes in the free-list are
|
||||||
// constrained by the remaining size of the region.
|
// constrained by the remaining size of the region.
|
||||||
self.free_size.fetch_sub(size, Ordering::Release);
|
self.free_size.set(self.free_size.get() - size);
|
||||||
|
|
||||||
return Ok(Suballocation {
|
return Ok(Suballocation {
|
||||||
offset,
|
offset,
|
||||||
@ -421,14 +409,14 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
// allocation of `self`.
|
// allocation of `self`.
|
||||||
let node_id = SlotId::new(suballocation.handle.0 as _);
|
let node_id = SlotId::new(suballocation.handle.0 as _);
|
||||||
|
|
||||||
let mut state = self.state.lock();
|
let state = unsafe { &mut *self.state.get() };
|
||||||
let node = state.nodes.get_mut(node_id);
|
let node = state.nodes.get_mut(node_id);
|
||||||
|
|
||||||
debug_assert!(node.ty != SuballocationType::Free);
|
debug_assert!(node.ty != SuballocationType::Free);
|
||||||
|
|
||||||
// Suballocation sizes are constrained by the size of the region, so they can't possibly
|
// Suballocation sizes are constrained by the size of the region, so they can't possibly
|
||||||
// overflow when added up.
|
// overflow when added up.
|
||||||
self.free_size.fetch_add(node.size, Ordering::Release);
|
self.free_size.set(self.free_size.get() + node.size);
|
||||||
|
|
||||||
node.ty = SuballocationType::Free;
|
node.ty = SuballocationType::Free;
|
||||||
state.coalesce(node_id);
|
state.coalesce(node_id);
|
||||||
@ -437,7 +425,7 @@ unsafe impl Suballocator for FreeListAllocator {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn free_size(&self) -> DeviceSize {
|
fn free_size(&self) -> DeviceSize {
|
||||||
self.free_size.load(Ordering::Acquire)
|
self.free_size.get()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -748,8 +736,8 @@ impl FreeListAllocatorState {
|
|||||||
pub struct BuddyAllocator {
|
pub struct BuddyAllocator {
|
||||||
region_offset: DeviceSize,
|
region_offset: DeviceSize,
|
||||||
// Total memory remaining in the region.
|
// Total memory remaining in the region.
|
||||||
free_size: AtomicU64,
|
free_size: Cell<DeviceSize>,
|
||||||
state: Mutex<BuddyAllocatorState>,
|
state: UnsafeCell<BuddyAllocatorState>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BuddyAllocator {
|
impl BuddyAllocator {
|
||||||
@ -761,8 +749,6 @@ impl BuddyAllocator {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Suballocator for BuddyAllocator {
|
unsafe impl Suballocator for BuddyAllocator {
|
||||||
const IS_BLOCKING: bool = true;
|
|
||||||
|
|
||||||
const NEEDS_CLEANUP: bool = false;
|
const NEEDS_CLEANUP: bool = false;
|
||||||
|
|
||||||
/// Creates a new `BuddyAllocator` for the given [region].
|
/// Creates a new `BuddyAllocator` for the given [region].
|
||||||
@ -783,13 +769,13 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
|
|
||||||
assert!(max_order < BuddyAllocator::MAX_ORDERS);
|
assert!(max_order < BuddyAllocator::MAX_ORDERS);
|
||||||
|
|
||||||
let free_size = AtomicU64::new(region_size);
|
let free_size = Cell::new(region_size);
|
||||||
|
|
||||||
let mut free_list =
|
let mut free_list =
|
||||||
ArrayVec::new(max_order + 1, [EMPTY_FREE_LIST; BuddyAllocator::MAX_ORDERS]);
|
ArrayVec::new(max_order + 1, [EMPTY_FREE_LIST; BuddyAllocator::MAX_ORDERS]);
|
||||||
// The root node has the lowest offset and highest order, so it's the whole region.
|
// The root node has the lowest offset and highest order, so it's the whole region.
|
||||||
free_list[max_order].push(region_offset);
|
free_list[max_order].push(region_offset);
|
||||||
let state = Mutex::new(BuddyAllocatorState { free_list });
|
let state = UnsafeCell::new(BuddyAllocatorState { free_list });
|
||||||
|
|
||||||
BuddyAllocator {
|
BuddyAllocator {
|
||||||
region_offset,
|
region_offset,
|
||||||
@ -840,7 +826,7 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
let size = cmp::max(size, BuddyAllocator::MIN_NODE_SIZE).next_power_of_two();
|
let size = cmp::max(size, BuddyAllocator::MIN_NODE_SIZE).next_power_of_two();
|
||||||
|
|
||||||
let min_order = (size / BuddyAllocator::MIN_NODE_SIZE).trailing_zeros() as usize;
|
let min_order = (size / BuddyAllocator::MIN_NODE_SIZE).trailing_zeros() as usize;
|
||||||
let mut state = self.state.lock();
|
let state = unsafe { &mut *self.state.get() };
|
||||||
|
|
||||||
// Start searching at the lowest possible order going up.
|
// Start searching at the lowest possible order going up.
|
||||||
for (order, free_list) in state.free_list.iter_mut().enumerate().skip(min_order) {
|
for (order, free_list) in state.free_list.iter_mut().enumerate().skip(min_order) {
|
||||||
@ -875,7 +861,7 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
|
|
||||||
// This can't overflow because suballocation sizes in the free-list are
|
// This can't overflow because suballocation sizes in the free-list are
|
||||||
// constrained by the remaining size of the region.
|
// constrained by the remaining size of the region.
|
||||||
self.free_size.fetch_sub(size, Ordering::Release);
|
self.free_size.set(self.free_size.get() - size);
|
||||||
|
|
||||||
return Ok(Suballocation {
|
return Ok(Suballocation {
|
||||||
offset,
|
offset,
|
||||||
@ -900,7 +886,7 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
let order = suballocation.handle.0 as usize;
|
let order = suballocation.handle.0 as usize;
|
||||||
|
|
||||||
let min_order = order;
|
let min_order = order;
|
||||||
let mut state = self.state.lock();
|
let state = unsafe { &mut *self.state.get() };
|
||||||
|
|
||||||
debug_assert!(!state.free_list[order].contains(&offset));
|
debug_assert!(!state.free_list[order].contains(&offset));
|
||||||
|
|
||||||
@ -930,7 +916,7 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
|
|
||||||
// The sizes of suballocations allocated by `self` are constrained by that of
|
// The sizes of suballocations allocated by `self` are constrained by that of
|
||||||
// its region, so they can't possibly overflow when added up.
|
// its region, so they can't possibly overflow when added up.
|
||||||
self.free_size.fetch_add(size, Ordering::Release);
|
self.free_size.set(self.free_size.get() + size);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -945,7 +931,7 @@ unsafe impl Suballocator for BuddyAllocator {
|
|||||||
/// [internal fragmentation]: super#internal-fragmentation
|
/// [internal fragmentation]: super#internal-fragmentation
|
||||||
#[inline]
|
#[inline]
|
||||||
fn free_size(&self) -> DeviceSize {
|
fn free_size(&self) -> DeviceSize {
|
||||||
self.free_size.load(Ordering::Acquire)
|
self.free_size.get()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -1014,9 +1000,8 @@ struct BuddyAllocatorState {
|
|||||||
pub struct BumpAllocator {
|
pub struct BumpAllocator {
|
||||||
region_offset: DeviceSize,
|
region_offset: DeviceSize,
|
||||||
region_size: DeviceSize,
|
region_size: DeviceSize,
|
||||||
// Encodes the previous allocation type in the 2 least signifficant bits and the free start in
|
free_start: Cell<DeviceSize>,
|
||||||
// the rest.
|
prev_allocation_type: Cell<AllocationType>,
|
||||||
state: AtomicU64,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl BumpAllocator {
|
impl BumpAllocator {
|
||||||
@ -1025,29 +1010,23 @@ impl BumpAllocator {
|
|||||||
/// [region]: Suballocator#regions
|
/// [region]: Suballocator#regions
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn reset(&mut self) {
|
pub fn reset(&mut self) {
|
||||||
*self.state.get_mut() = AllocationType::Unknown as DeviceSize;
|
*self.free_start.get_mut() = 0;
|
||||||
|
*self.prev_allocation_type.get_mut() = AllocationType::Unknown;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe impl Suballocator for BumpAllocator {
|
unsafe impl Suballocator for BumpAllocator {
|
||||||
const IS_BLOCKING: bool = false;
|
|
||||||
|
|
||||||
const NEEDS_CLEANUP: bool = true;
|
const NEEDS_CLEANUP: bool = true;
|
||||||
|
|
||||||
/// Creates a new `BumpAllocator` for the given [region].
|
/// Creates a new `BumpAllocator` for the given [region].
|
||||||
///
|
///
|
||||||
/// [region]: Suballocator#regions
|
/// [region]: Suballocator#regions
|
||||||
fn new(region_offset: DeviceSize, region_size: DeviceSize) -> Self {
|
fn new(region_offset: DeviceSize, region_size: DeviceSize) -> Self {
|
||||||
// Sanity check: this would lead to UB because of the left-shifting by 2 needed to encode
|
|
||||||
// the free-start into the state.
|
|
||||||
assert!(region_size <= (DeviceLayout::MAX_SIZE >> 2));
|
|
||||||
|
|
||||||
let state = AtomicU64::new(AllocationType::Unknown as DeviceSize);
|
|
||||||
|
|
||||||
BumpAllocator {
|
BumpAllocator {
|
||||||
region_offset,
|
region_offset,
|
||||||
region_size,
|
region_size,
|
||||||
state,
|
free_start: Cell::new(0),
|
||||||
|
prev_allocation_type: Cell::new(AllocationType::Unknown),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1058,97 +1037,42 @@ unsafe impl Suballocator for BumpAllocator {
|
|||||||
allocation_type: AllocationType,
|
allocation_type: AllocationType,
|
||||||
buffer_image_granularity: DeviceAlignment,
|
buffer_image_granularity: DeviceAlignment,
|
||||||
) -> Result<Suballocation, SuballocatorError> {
|
) -> Result<Suballocation, SuballocatorError> {
|
||||||
const SPIN_LIMIT: u32 = 6;
|
|
||||||
|
|
||||||
// NOTE(Marc): The following code is a minimal version `Backoff` taken from
|
|
||||||
// crossbeam_utils v0.8.11, because we didn't want to add a dependency for a couple lines
|
|
||||||
// that are used in one place only.
|
|
||||||
/// Original documentation:
|
|
||||||
/// https://docs.rs/crossbeam-utils/0.8.11/crossbeam_utils/struct.Backoff.html
|
|
||||||
struct Backoff {
|
|
||||||
step: Cell<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Backoff {
|
|
||||||
fn new() -> Self {
|
|
||||||
Backoff { step: Cell::new(0) }
|
|
||||||
}
|
|
||||||
|
|
||||||
fn spin(&self) {
|
|
||||||
for _ in 0..1 << self.step.get().min(SPIN_LIMIT) {
|
|
||||||
core::hint::spin_loop();
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.step.get() <= SPIN_LIMIT {
|
|
||||||
self.step.set(self.step.get() + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn has_granularity_conflict(prev_ty: AllocationType, ty: AllocationType) -> bool {
|
fn has_granularity_conflict(prev_ty: AllocationType, ty: AllocationType) -> bool {
|
||||||
prev_ty == AllocationType::Unknown || prev_ty != ty
|
prev_ty == AllocationType::Unknown || prev_ty != ty
|
||||||
}
|
}
|
||||||
|
|
||||||
let size = layout.size();
|
let size = layout.size();
|
||||||
let alignment = layout.alignment();
|
let alignment = layout.alignment();
|
||||||
let backoff = Backoff::new();
|
|
||||||
let mut state = self.state.load(Ordering::Relaxed);
|
|
||||||
|
|
||||||
loop {
|
// These can't overflow because offsets are constrained by the size of the root
|
||||||
let free_start = state >> 2;
|
// allocation, which can itself not exceed `DeviceLayout::MAX_SIZE`.
|
||||||
|
let prev_end = self.region_offset + self.free_start.get();
|
||||||
|
let mut offset = align_up(prev_end, alignment);
|
||||||
|
|
||||||
// These can't overflow because offsets are constrained by the size of the root
|
if buffer_image_granularity != DeviceAlignment::MIN
|
||||||
// allocation, which can itself not exceed `DeviceLayout::MAX_SIZE`.
|
&& prev_end > 0
|
||||||
let prev_end = self.region_offset + free_start;
|
&& are_blocks_on_same_page(0, prev_end, offset, buffer_image_granularity)
|
||||||
let mut offset = align_up(prev_end, alignment);
|
&& has_granularity_conflict(self.prev_allocation_type.get(), allocation_type)
|
||||||
|
{
|
||||||
if buffer_image_granularity != DeviceAlignment::MIN {
|
offset = align_up(offset, buffer_image_granularity);
|
||||||
let prev_alloc_type = match state & 0b11 {
|
|
||||||
0 => AllocationType::Unknown,
|
|
||||||
1 => AllocationType::Linear,
|
|
||||||
2 => AllocationType::NonLinear,
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
|
|
||||||
if prev_end > 0
|
|
||||||
&& are_blocks_on_same_page(0, prev_end, offset, buffer_image_granularity)
|
|
||||||
&& has_granularity_conflict(prev_alloc_type, allocation_type)
|
|
||||||
{
|
|
||||||
offset = align_up(offset, buffer_image_granularity);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let relative_offset = offset - self.region_offset;
|
|
||||||
|
|
||||||
let free_start = relative_offset + size;
|
|
||||||
|
|
||||||
if free_start > self.region_size {
|
|
||||||
return Err(SuballocatorError::OutOfRegionMemory);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This can't discard any bits because we checked that `region_size` does not exceed
|
|
||||||
// `DeviceLayout::MAX_SIZE >> 2`.
|
|
||||||
let new_state = free_start << 2 | allocation_type as DeviceSize;
|
|
||||||
|
|
||||||
match self.state.compare_exchange_weak(
|
|
||||||
state,
|
|
||||||
new_state,
|
|
||||||
Ordering::Release,
|
|
||||||
Ordering::Relaxed,
|
|
||||||
) {
|
|
||||||
Ok(_) => {
|
|
||||||
return Ok(Suballocation {
|
|
||||||
offset,
|
|
||||||
size,
|
|
||||||
handle: AllocationHandle(ptr::null_mut()),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Err(new_state) => {
|
|
||||||
state = new_state;
|
|
||||||
backoff.spin();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let relative_offset = offset - self.region_offset;
|
||||||
|
|
||||||
|
let free_start = relative_offset + size;
|
||||||
|
|
||||||
|
if free_start > self.region_size {
|
||||||
|
return Err(SuballocatorError::OutOfRegionMemory);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.free_start.set(free_start);
|
||||||
|
self.prev_allocation_type.set(allocation_type);
|
||||||
|
|
||||||
|
Ok(Suballocation {
|
||||||
|
offset,
|
||||||
|
size,
|
||||||
|
handle: AllocationHandle(ptr::null_mut()),
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -1158,7 +1082,7 @@ unsafe impl Suballocator for BumpAllocator {
|
|||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn free_size(&self) -> DeviceSize {
|
fn free_size(&self) -> DeviceSize {
|
||||||
self.region_size - (self.state.load(Ordering::Acquire) >> 2)
|
self.region_size - self.free_start.get()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -1303,6 +1227,7 @@ mod host {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use super::*;
|
use super::*;
|
||||||
use crossbeam_queue::ArrayQueue;
|
use crossbeam_queue::ArrayQueue;
|
||||||
|
use parking_lot::Mutex;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
|
|
||||||
const fn unwrap<T: Copy>(opt: Option<T>) -> T {
|
const fn unwrap<T: Copy>(opt: Option<T>) -> T {
|
||||||
@ -1322,7 +1247,7 @@ mod tests {
|
|||||||
const REGION_SIZE: DeviceSize =
|
const REGION_SIZE: DeviceSize =
|
||||||
(ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD;
|
(ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD;
|
||||||
|
|
||||||
let allocator = FreeListAllocator::new(0, REGION_SIZE);
|
let allocator = Mutex::new(FreeListAllocator::new(0, REGION_SIZE));
|
||||||
let allocs = ArrayQueue::new((ALLOCATIONS_PER_THREAD * THREADS) as usize);
|
let allocs = ArrayQueue::new((ALLOCATIONS_PER_THREAD * THREADS) as usize);
|
||||||
|
|
||||||
// Using threads to randomize allocation order.
|
// Using threads to randomize allocation order.
|
||||||
@ -1337,6 +1262,7 @@ mod tests {
|
|||||||
allocs
|
allocs
|
||||||
.push(
|
.push(
|
||||||
allocator
|
allocator
|
||||||
|
.lock()
|
||||||
.allocate(layout, AllocationType::Unknown, DeviceAlignment::MIN)
|
.allocate(layout, AllocationType::Unknown, DeviceAlignment::MIN)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
)
|
)
|
||||||
@ -1346,6 +1272,8 @@ mod tests {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let allocator = allocator.into_inner();
|
||||||
|
|
||||||
assert!(allocator
|
assert!(allocator
|
||||||
.allocate(DUMMY_LAYOUT, AllocationType::Unknown, DeviceAlignment::MIN)
|
.allocate(DUMMY_LAYOUT, AllocationType::Unknown, DeviceAlignment::MIN)
|
||||||
.is_err());
|
.is_err());
|
||||||
@ -1709,39 +1637,4 @@ mod tests {
|
|||||||
allocator.reset();
|
allocator.reset();
|
||||||
assert!(allocator.free_size() == REGION_SIZE);
|
assert!(allocator.free_size() == REGION_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn bump_allocator_syncness() {
|
|
||||||
const THREADS: DeviceSize = 12;
|
|
||||||
const ALLOCATIONS_PER_THREAD: DeviceSize = 100_000;
|
|
||||||
const ALLOCATION_STEP: DeviceSize = 117;
|
|
||||||
const REGION_SIZE: DeviceSize =
|
|
||||||
(ALLOCATION_STEP * (THREADS + 1) * THREADS / 2) * ALLOCATIONS_PER_THREAD;
|
|
||||||
|
|
||||||
let mut allocator = BumpAllocator::new(0, REGION_SIZE);
|
|
||||||
|
|
||||||
thread::scope(|scope| {
|
|
||||||
for i in 1..=THREADS {
|
|
||||||
let allocator = &allocator;
|
|
||||||
|
|
||||||
scope.spawn(move || {
|
|
||||||
let layout = DeviceLayout::from_size_alignment(i * ALLOCATION_STEP, 1).unwrap();
|
|
||||||
|
|
||||||
for _ in 0..ALLOCATIONS_PER_THREAD {
|
|
||||||
allocator
|
|
||||||
.allocate(layout, AllocationType::Unknown, DeviceAlignment::MIN)
|
|
||||||
.unwrap();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
assert!(allocator
|
|
||||||
.allocate(DUMMY_LAYOUT, AllocationType::Unknown, DeviceAlignment::MIN)
|
|
||||||
.is_err());
|
|
||||||
assert!(allocator.free_size() == 0);
|
|
||||||
|
|
||||||
allocator.reset();
|
|
||||||
assert!(allocator.free_size() == REGION_SIZE);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user