Expose GPU allocation reports in wgpu, wgpu-core and wgpu-hal

This commit is contained in:
Nicolas Silva 2024-07-18 17:35:26 +02:00 committed by Teodor Tanasoaia
parent 20973d1cdc
commit bc7622f641
9 changed files with 195 additions and 1 deletions

View File

@ -2458,6 +2458,17 @@ impl Global {
}
}
pub fn device_generate_allocator_report<A: HalApi>(
&self,
device_id: DeviceId,
) -> Option<wgt::AllocatorReport> {
let hub = A::hub(self);
hub.devices
.get(device_id)
.ok()
.and_then(|device| device.generate_allocator_report())
}
pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
profiling::scope!("Queue::drop");
api_log!("Queue::drop {queue_id:?}");

View File

@ -3598,6 +3598,13 @@ impl<A: HalApi> Device<A> {
.map(|raw| raw.get_internal_counters())
.unwrap_or_default()
}
pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
self.raw
.as_ref()
.map(|raw| raw.generate_allocator_report())
.unwrap_or_default()
}
}
impl<A: HalApi> Device<A> {

View File

@ -1801,4 +1801,41 @@ impl crate::Device for super::Device {
fn get_internal_counters(&self) -> wgt::HalCounters {
self.counters.clone()
}
#[cfg(feature = "windows_rs")]
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
let mut upstream = {
self.mem_allocator
.as_ref()?
.lock()
.allocator
.generate_report()
};
let allocations = upstream
.allocations
.iter_mut()
.map(|alloc| wgt::AllocationReport {
name: std::mem::take(&mut alloc.name),
offset: alloc.offset,
size: alloc.size,
})
.collect();
let blocks = upstream
.blocks
.iter()
.map(|block| wgt::MemoryBlockReport {
size: block.size,
allocations: block.allocations.clone(),
})
.collect();
Some(wgt::AllocatorReport {
allocations,
blocks,
total_allocated_bytes: upstream.total_allocated_bytes,
total_reserved_bytes: upstream.total_reserved_bytes,
})
}
}

View File

@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync {
);
fn get_internal_counters(&self) -> wgt::HalCounters;
fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
None
}
}
pub trait Queue: WasmNotSendSync {

View File

@ -1,5 +1,6 @@
#[cfg(feature = "counters")]
use std::sync::atomic::{AtomicIsize, Ordering};
use std::{fmt, ops::Range};
/// An internal counter for debugging purposes
///
@ -128,7 +129,7 @@ pub struct HalCounters {
/// `wgpu-core`'s internal counters.
#[derive(Clone, Default)]
pub struct CoreCounters {
// TODO
// TODO #[cfg(features=)]
}
/// All internal counters, exposed for debugging purposes.
@ -139,3 +140,90 @@ pub struct InternalCounters {
/// `wgpu-hal` counters.
pub hal: HalCounters,
}
/// Describes an allocation in the [`AllocatorReport`].
#[derive(Clone)]
pub struct AllocationReport {
/// The name provided to the `allocate()` function.
pub name: String,
/// The offset in bytes of the allocation in its memory block.
pub offset: u64,
/// The size in bytes of the allocation.
pub size: u64,
}
/// Describes a memory block in the [`AllocatorReport`].
#[derive(Clone)]
pub struct MemoryBlockReport {
/// The size in bytes of this memory block.
pub size: u64,
/// The range of allocations in [`AllocatorReport::allocations`] that are associated
/// to this memory block.
pub allocations: Range<usize>,
}
/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
#[derive(Clone)]
pub struct AllocatorReport {
/// All live allocations, sub-allocated from memory blocks.
pub allocations: Vec<AllocationReport>,
/// All memory blocks.
pub blocks: Vec<MemoryBlockReport>,
/// Sum of the memory used by all allocations, in bytes.
pub total_allocated_bytes: u64,
/// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
pub total_reserved_bytes: u64,
}
impl fmt::Debug for AllocationReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let name = if !self.name.is_empty() {
self.name.as_str()
} else {
"--"
};
write!(f, "{name:?}: {}", FmtBytes(self.size))
}
}
impl fmt::Debug for AllocatorReport {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let mut allocations = self.allocations.clone();
allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
allocations.truncate(max_num_allocations_to_print);
f.debug_struct("AllocatorReport")
.field(
"summary",
&std::format_args!(
"{} / {}",
FmtBytes(self.total_allocated_bytes),
FmtBytes(self.total_reserved_bytes)
),
)
.field("blocks", &self.blocks.len())
.field("allocations", &self.allocations.len())
.field("largest", &allocations.as_slice())
.finish()
}
}
struct FmtBytes(u64);
impl fmt::Display for FmtBytes {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
let mut idx = 0;
let mut amount = self.0 as f64;
loop {
if amount < 1024.0 || idx == SUFFIX.len() - 1 {
return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
}
amount /= 1024.0;
idx += 1;
}
}
}

View File

@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu {
Default::default()
}
fn device_generate_allocator_report(
&self,
_device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport> {
None
}
fn pipeline_cache_get_data(
&self,
_: &Self::PipelineCacheId,

View File

@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore {
wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
}
fn device_generate_allocator_report(
&self,
device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport> {
wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
}
fn pipeline_cache_get_data(
&self,
cache: &Self::PipelineCacheId,

View File

@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
_device_data: &Self::DeviceData,
) -> wgt::InternalCounters;
fn device_generate_allocator_report(
&self,
device: &Self::DeviceId,
_device_data: &Self::DeviceData,
) -> Option<wgt::AllocatorReport>;
fn pipeline_cache_get_data(
&self,
cache: &Self::PipelineCacheId,
@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
device_data: &crate::Data,
) -> wgt::InternalCounters;
fn generate_allocator_report(
&self,
device: &ObjectId,
device_data: &crate::Data,
) -> Option<wgt::AllocatorReport>;
fn pipeline_cache_get_data(
&self,
cache: &ObjectId,
@ -3101,6 +3113,16 @@ where
Context::device_get_internal_counters(self, &device, device_data)
}
fn generate_allocator_report(
&self,
device: &ObjectId,
device_data: &crate::Data,
) -> Option<wgt::AllocatorReport> {
let device = <T::DeviceId>::from(*device);
let device_data = downcast_ref(device_data);
Context::device_generate_allocator_report(self, &device, device_data)
}
fn pipeline_cache_get_data(
&self,
cache: &ObjectId,

View File

@ -3238,6 +3238,15 @@ impl Device {
DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
}
/// Generate an GPU memory allocation report if the underlying backend supports it.
///
/// Backends that do not support producing these reports return `None`. A backend may
/// Support it and still return `None` if it is not using performing sub-allocation,
/// for example as a workaround for driver issues.
pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
}
/// Apply a callback to this `Device`'s underlying backend device.
///
/// If this `Device` is implemented by the backend API given by `A` (Vulkan,