Expose GPU allocation reports in wgpu, wgpu-core and wgpu-hal

2024-11-24 15:54:00 +00:00 · 2024-07-18 17:35:26 +02:00 · 2024-07-18 17:35:26 +02:00 · bc7622f641
commit bc7622f641
parent 20973d1cdc
9 changed files with 195 additions and 1 deletions
--- a/wgpu-core/src/device/global.rs
+++ b/wgpu-core/src/device/global.rs
@ -2458,6 +2458,17 @@ impl Global {
        }
    }

+    pub fn device_generate_allocator_report<A: HalApi>(
+        &self,
+        device_id: DeviceId,
+    ) -> Option<wgt::AllocatorReport> {
+        let hub = A::hub(self);
+        hub.devices
+            .get(device_id)
+            .ok()
+            .and_then(|device| device.generate_allocator_report())
+    }
+
    pub fn queue_drop<A: HalApi>(&self, queue_id: QueueId) {
        profiling::scope!("Queue::drop");
        api_log!("Queue::drop {queue_id:?}");
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@ -3598,6 +3598,13 @@ impl<A: HalApi> Device<A> {
            .map(|raw| raw.get_internal_counters())
            .unwrap_or_default()
    }
+
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        self.raw
+            .as_ref()
+            .map(|raw| raw.generate_allocator_report())
+            .unwrap_or_default()
+    }
 }

 impl<A: HalApi> Device<A> {
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@ -1801,4 +1801,41 @@ impl crate::Device for super::Device {
    fn get_internal_counters(&self) -> wgt::HalCounters {
        self.counters.clone()
    }
+
+    #[cfg(feature = "windows_rs")]
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        let mut upstream = {
+            self.mem_allocator
+                .as_ref()?
+                .lock()
+                .allocator
+                .generate_report()
+        };
+
+        let allocations = upstream
+            .allocations
+            .iter_mut()
+            .map(|alloc| wgt::AllocationReport {
+                name: std::mem::take(&mut alloc.name),
+                offset: alloc.offset,
+                size: alloc.size,
+            })
+            .collect();
+
+        let blocks = upstream
+            .blocks
+            .iter()
+            .map(|block| wgt::MemoryBlockReport {
+                size: block.size,
+                allocations: block.allocations.clone(),
+            })
+            .collect();
+
+        Some(wgt::AllocatorReport {
+            allocations,
+            blocks,
+            total_allocated_bytes: upstream.total_allocated_bytes,
+            total_reserved_bytes: upstream.total_reserved_bytes,
+        })
+    }
 }
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@ -894,6 +894,10 @@ pub trait Device: WasmNotSendSync {
    );

    fn get_internal_counters(&self) -> wgt::HalCounters;
+
+    fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        None
+    }
 }

 pub trait Queue: WasmNotSendSync {
--- a/wgpu-types/src/counters.rs
+++ b/wgpu-types/src/counters.rs
@ -1,5 +1,6 @@
 #[cfg(feature = "counters")]
 use std::sync::atomic::{AtomicIsize, Ordering};
+use std::{fmt, ops::Range};

 /// An internal counter for debugging purposes
 ///
@ -128,7 +129,7 @@ pub struct HalCounters {
 /// `wgpu-core`'s internal counters.
 #[derive(Clone, Default)]
 pub struct CoreCounters {
-    // TODO
+    // TODO    #[cfg(features=)]
 }

 /// All internal counters, exposed for debugging purposes.
@ -139,3 +140,90 @@ pub struct InternalCounters {
    /// `wgpu-hal` counters.
    pub hal: HalCounters,
 }
+
+/// Describes an allocation in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct AllocationReport {
+    /// The name provided to the `allocate()` function.
+    pub name: String,
+    /// The offset in bytes of the allocation in its memory block.
+    pub offset: u64,
+    /// The size in bytes of the allocation.
+    pub size: u64,
+}
+
+/// Describes a memory block in the [`AllocatorReport`].
+#[derive(Clone)]
+pub struct MemoryBlockReport {
+    /// The size in bytes of this memory block.
+    pub size: u64,
+    /// The range of allocations in [`AllocatorReport::allocations`] that are associated
+    /// to this memory block.
+    pub allocations: Range<usize>,
+}
+
+/// A report that can be generated for informational purposes using `Allocator::generate_report()`.
+#[derive(Clone)]
+pub struct AllocatorReport {
+    /// All live allocations, sub-allocated from memory blocks.
+    pub allocations: Vec<AllocationReport>,
+    /// All memory blocks.
+    pub blocks: Vec<MemoryBlockReport>,
+    /// Sum of the memory used by all allocations, in bytes.
+    pub total_allocated_bytes: u64,
+    /// Sum of the memory reserved by all memory blocks including unallocated regions, in bytes.
+    pub total_reserved_bytes: u64,
+}
+
+impl fmt::Debug for AllocationReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let name = if !self.name.is_empty() {
+            self.name.as_str()
+        } else {
+            "--"
+        };
+        write!(f, "{name:?}: {}", FmtBytes(self.size))
+    }
+}
+
+impl fmt::Debug for AllocatorReport {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let mut allocations = self.allocations.clone();
+        allocations.sort_by_key(|alloc| std::cmp::Reverse(alloc.size));
+
+        let max_num_allocations_to_print = f.precision().unwrap_or(usize::MAX);
+        allocations.truncate(max_num_allocations_to_print);
+
+        f.debug_struct("AllocatorReport")
+            .field(
+                "summary",
+                &std::format_args!(
+                    "{} / {}",
+                    FmtBytes(self.total_allocated_bytes),
+                    FmtBytes(self.total_reserved_bytes)
+                ),
+            )
+            .field("blocks", &self.blocks.len())
+            .field("allocations", &self.allocations.len())
+            .field("largest", &allocations.as_slice())
+            .finish()
+    }
+}
+
+struct FmtBytes(u64);
+
+impl fmt::Display for FmtBytes {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        const SUFFIX: [&str; 5] = ["B", "KB", "MB", "GB", "TB"];
+        let mut idx = 0;
+        let mut amount = self.0 as f64;
+        loop {
+            if amount < 1024.0 || idx == SUFFIX.len() - 1 {
+                return write!(f, "{:.2} {}", amount, SUFFIX[idx]);
+            }
+
+            amount /= 1024.0;
+            idx += 1;
+        }
+    }
+}
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@ -2986,6 +2986,14 @@ impl crate::context::Context for ContextWebGpu {
        Default::default()
    }

+    fn device_generate_allocator_report(
+        &self,
+        _device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        None
+    }
+
    fn pipeline_cache_get_data(
        &self,
        _: &Self::PipelineCacheId,
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@ -2367,6 +2367,14 @@ impl crate::Context for ContextWgpuCore {
        wgc::gfx_select!(device => self.0.device_get_internal_counters(*device))
    }

+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport> {
+        wgc::gfx_select!(device => self.0.device_generate_allocator_report(*device))
+    }
+
    fn pipeline_cache_get_data(
        &self,
        cache: &Self::PipelineCacheId,
--- a/wgpu/src/context.rs
+++ b/wgpu/src/context.rs
@ -618,6 +618,12 @@ pub trait Context: Debug + WasmNotSendSync + Sized {
        _device_data: &Self::DeviceData,
    ) -> wgt::InternalCounters;

+    fn device_generate_allocator_report(
+        &self,
+        device: &Self::DeviceId,
+        _device_data: &Self::DeviceData,
+    ) -> Option<wgt::AllocatorReport>;
+
    fn pipeline_cache_get_data(
        &self,
        cache: &Self::PipelineCacheId,
@ -1617,6 +1623,12 @@ pub(crate) trait DynContext: Debug + WasmNotSendSync {
        device_data: &crate::Data,
    ) -> wgt::InternalCounters;

+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport>;
+
    fn pipeline_cache_get_data(
        &self,
        cache: &ObjectId,
@ -3101,6 +3113,16 @@ where
        Context::device_get_internal_counters(self, &device, device_data)
    }

+    fn generate_allocator_report(
+        &self,
+        device: &ObjectId,
+        device_data: &crate::Data,
+    ) -> Option<wgt::AllocatorReport> {
+        let device = <T::DeviceId>::from(*device);
+        let device_data = downcast_ref(device_data);
+        Context::device_generate_allocator_report(self, &device, device_data)
+    }
+
    fn pipeline_cache_get_data(
        &self,
        cache: &ObjectId,
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@ -3238,6 +3238,15 @@ impl Device {
        DynContext::device_get_internal_counters(&*self.context, &self.id, self.data.as_ref())
    }

+    /// Generate an GPU memory allocation report if the underlying backend supports it.
+    ///
+    /// Backends that do not support producing these reports return `None`. A backend may
+    /// Support it and still return `None` if it is not using performing sub-allocation,
+    /// for example as a workaround for driver issues.
+    pub fn generate_allocator_report(&self) -> Option<wgt::AllocatorReport> {
+        DynContext::generate_allocator_report(&*self.context, &self.id, self.data.as_ref())
+    }
+
    /// Apply a callback to this `Device`'s underlying backend device.
    ///
    /// If this `Device` is implemented by the backend API given by `A` (Vulkan,