Expose GPU allocation reports in wgpu, wgpu-core and wgpu-hal

2024-11-21 22:33:49 +00:00 · 2024-07-18 17:35:26 +02:00 · 2024-07-18 17:35:26 +02:00 · bc7622f641
commit bc7622f641
parent 20973d1cdc
9 changed files with 195 additions and 1 deletions
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@ -2458,6 +2458,17 @@ impl Global {
        }
    }
    pub fn device_generate_allocator_report<A: HalApi>(
        &self,
        device_id: DeviceId,
    ) -> Option<wgt::AllocatorReport> {
        let hub = A::hub(self);
        hub.devices
            .get(device_id)
            .ok()
            .and_then(|device| device.generate_allocator_report())
    }
    pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
        profiling::scope!("Queue::drop");
        api_log!("Queue::drop {queue_id:?}");
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@ -3598,6 +3598,13 @@ impl<A: HalApi> Device<A> {
            .map(|raw| raw.get_internal_counters())
            .unwrap_or_default()
    }
    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
        self.raw
            .as_ref()
            .map(|raw| raw.generate_allocator_report())
            .unwrap_or_default()
    }
 }
 impl<A: HalApi> Device<A> {
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@ -1801,4 +1801,41 @@ impl crate::Device for super::Device {
    fn get_internal_counters(&self) -> wgt::HalCounters {
        self.counters.clone()
    }
    #[cfg(feature = "windows_rs")]
    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
        let mut upstream = {
            self.mem_allocator
                .as_ref()?
                .lock()
                .allocator
                .generate_report()
        };
        let allocations = upstream
            .allocations
            .iter_mut()
            .map(|alloc| wgt::AllocationReport {
                name: std::mem::take(&mut alloc.name),
                offset: alloc.offset,
                size: alloc.size,
            })
            .collect();
        let blocks = upstream
            .blocks
            .iter()
            .map(|block| wgt::MemoryBlockReport {
                size: block.size,
                allocations: block.allocations.clone(),
            })
            .collect();
        Some(wgt::AllocatorReport {
            allocations,
            blocks,
            total_allocated_bytes: upstream.total_allocated_bytes,
            total_reserved_bytes: upstream.total_reserved_bytes,
        })
    }
 }
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync {
    );
    fn get_internal_counters(&self) -> wgt::HalCounters;
    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
        None
    }
 }
 pub trait Queue: WasmNotSendSync {
--- a/wgpu-types/src/counters.rs
+++ b/wgpu-types/src/counters.rs
@ -1,5 +1,6 @@
 #[cfg(feature = "counters")]
 use std::sync::atomic::{AtomicIsize, Ordering};
 use std::{fmt, ops::Range};
 /// An internal counter for debugging purposes
 ///
@ -128,7 +129,7 @@ pub struct HalCounters {
 /// `wgpu-core`'s internal counters.
 #[derive(Clone, Default)]
 pub struct CoreCounters {
-    // TODO
+    // TODO    #[cfg(features=)]
 }
 /// All internal counters, exposed for debugging purposes.
@ -139,3 +140,90 @@ pub struct InternalCounters {
    /// `wgpu-hal` counters.
    pub hal: HalCounters,
 }
 /// Describes an allocation in the [`AllocatorReport`].
 #[derive(Clone)]
 pub struct AllocationReport {
    /// The name provided to the `allocate()` function.
    pub name: String,
    /// The offset in bytes of the allocation in its memory block.
    pub offset: u64,
    /// The size in bytes of the allocation.
    pub size: u64,
 }
 /// Describes a memory block in the [`AllocatorReport`].
 #[derive(Clone)]
 pub struct MemoryBlockReport {
    /// The size in bytes of this memory block.
    pub size: u64,
    /// The range of allocations in [`AllocatorReport::allocations`] that are associated
    /// to this memory block.
    pub allocations: Range<usize>,
 }
 /// A report that can be generated for informational purposes using `Allocator::generate_report()`.
 #[derive(Clone)]
 pub struct AllocatorReport {
    /// All live allocations, sub-allocated from memory blocks.
    pub allocations: Vec<AllocationReport>,
    /// All memory blocks.
    pub blocks: Vec<MemoryBlockReport>,
    /// Sum of the memory used by all allocations, in bytes.
    pub total_allocated_bytes: u64,
    /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
    pub total_reserved_bytes: u64,
 }
 impl fmt::Debug for AllocationReport {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let name = if !self.name.is_empty() {
            self.name.as_str()
        } else {
            "--"
        };
        write!(f, "{name:?}: {}", FmtBytes(self.size))
    }
 }
 impl fmt::Debug for AllocatorReport {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let mut allocations = self.allocations.clone();
        allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
        let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
        allocations.truncate(max_num_allocations_to_print);
        f.debug_struct("AllocatorReport")
            .field(
                "summary",
                &std::format_args!(
                    "{} / {}",
                    FmtBytes(self.total_allocated_bytes),
                    FmtBytes(self.total_reserved_bytes)
                ),
            )
            .field("blocks", &self.blocks.len())
            .field("allocations", &self.allocations.len())
            .field("largest", &allocations.as_slice())
            .finish()
    }
 }
 struct FmtBytes(u64);
 impl fmt::Display for FmtBytes {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
        let mut idx = 0;
        let mut amount = self.0 as f64;
        loop {
            if amount < 1024.0 || idx == SUFFIX.len() - 1 {
                return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
            }
            amount /= 1024.0;
            idx += 1;
        }
    }
 }
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu {
        Default::default()
    }
    fn device_generate_allocator_report(
        &self,
        _device: &Self::DeviceId,
        _device_data: &Self::DeviceData,
    ) -> Option<wgt::AllocatorReport> {
        None
    }
    fn pipeline_cache_get_data(
        &self,
        _: &Self::PipelineCacheId,
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore {
        wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
    }
    fn device_generate_allocator_report(
        &self,
        device: &Self::DeviceId,
        _device_data: &Self::DeviceData,
    ) -> Option<wgt::AllocatorReport> {
        wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
    }
    fn pipeline_cache_get_data(
        &self,
        cache: &Self::PipelineCacheId,
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
        _device_data: &Self::DeviceData,
    ) -> wgt::InternalCounters;
    fn device_generate_allocator_report(
        &self,
        device: &Self::DeviceId,
        _device_data: &Self::DeviceData,
    ) -> Option<wgt::AllocatorReport>;
    fn pipeline_cache_get_data(
        &self,
        cache: &Self::PipelineCacheId,
@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
        device_data: &crate::Data,
    ) -> wgt::InternalCounters;
    fn generate_allocator_report(
        &self,
        device: &ObjectId,
        device_data: &crate::Data,
    ) -> Option<wgt::AllocatorReport>;
    fn pipeline_cache_get_data(
        &self,
        cache: &ObjectId,
@ -3101,6 +3113,16 @@ where
        Context::device_get_internal_counters(self, &device, device_data)
    }
    fn generate_allocator_report(
        &self,
        device: &ObjectId,
        device_data: &crate::Data,
    ) -> Option<wgt::AllocatorReport> {
        let device = <T::DeviceId>::from(*device);
        let device_data = downcast_ref(device_data);
        Context::device_generate_allocator_report(self, &device, device_data)
    }
    fn pipeline_cache_get_data(
        &self,
        cache: &ObjectId,
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@ -3238,6 +3238,15 @@ impl Device {
        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
    }
    /// Generate an GPU memory allocation report if the underlying backend supports it.
    ///
    /// Backends that do not support producing these reports return `None`. A backend may
    /// Support it and still return `None` if it is not using performing sub-allocation,
    /// for example as a workaround for driver issues.
    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
    }
    /// Apply a callback to this `Device`'s underlying backend device.
    ///
    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,