diff --git a/CHANGELOG.md b/CHANGELOG.md index 526f6f99c..e580c550d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -151,6 +151,11 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901) - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724) - Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531) +- Added memory allocation hints to `DeviceDescriptor` by @nical in [#5875](https://github.com/gfx-rs/wgpu/pull/5875) + - `MemoryHints::Performance`, the default, favors performance over memory usage and will likely cause large amounts of VRAM to be allocated up-front. This hint is typically good for games. + - `MemoryHints::MemoryUsage` favors memory usage over performance. This hint is typically useful for smaller applications or UI libraries. + - `MemoryHints::Manual` allows the user to specify parameters for the underlying GPU memory allocator. These parameters are subject to change. + - These hints may be ignored by some backends. Currently only the Vulkan and D3D12 backends take them into account. #### Naga diff --git a/benches/benches/root.rs b/benches/benches/root.rs index 98563f839..6ef2efabc 100644 --- a/benches/benches/root.rs +++ b/benches/benches/root.rs @@ -44,6 +44,7 @@ impl DeviceState { &wgpu::DeviceDescriptor { required_features: adapter.features(), required_limits: adapter.limits(), + memory_hints: wgpu::MemoryHints::Performance, label: Some("RenderPass Device"), }, None, diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs index d77c60cac..aafb225fb 100644 --- a/deno_webgpu/lib.rs +++ b/deno_webgpu/lib.rs @@ -668,6 +668,7 @@ pub fn op_webgpu_request_device( label: Some(Cow::Owned(label)), required_features: required_features.into(), required_limits: required_limits.unwrap_or_default(), + memory_hints: wgpu_types::MemoryHints::default(), }; let (device, queue, maybe_err) = gfx_select!(adapter => instance.adapter_request_device( diff --git a/examples/src/framework.rs b/examples/src/framework.rs index b384169c7..ff86cc235 100644 --- a/examples/src/framework.rs +++ b/examples/src/framework.rs @@ -319,6 +319,7 @@ impl ExampleContext { label: None, required_features: (optional_features & adapter_features) | required_features, required_limits: needed_limits, + memory_hints: wgpu::MemoryHints::MemoryUsage, }, trace_dir.ok().as_ref().map(std::path::Path::new), ) diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs index cdd6d439d..fb23e1395 100644 --- a/examples/src/hello_compute/mod.rs +++ b/examples/src/hello_compute/mod.rs @@ -50,6 +50,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option> { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs index 9b6675289..d98f1bb8d 100644 --- a/examples/src/hello_synchronization/mod.rs +++ b/examples/src/hello_synchronization/mod.rs @@ -19,6 +19,7 @@ async fn run() { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::Performance, }, None, ) diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs index e4d42674f..41c058350 100644 --- a/examples/src/hello_triangle/mod.rs +++ b/examples/src/hello_triangle/mod.rs @@ -32,6 +32,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) { // Make sure we use the texture resolution limits from the adapter, so we can support images the size of the swapchain. required_limits: wgpu::Limits::downlevel_webgl2_defaults() .using_resolution(adapter.limits()), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs index 7d81dbef7..b568f35d3 100644 --- a/examples/src/hello_windows/mod.rs +++ b/examples/src/hello_windows/mod.rs @@ -75,6 +75,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Arc, wgpu::Color label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs index 0416451da..0184981c0 100644 --- a/examples/src/hello_workgroups/mod.rs +++ b/examples/src/hello_workgroups/mod.rs @@ -32,6 +32,7 @@ async fn run() { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs index caed73674..c0922bc2e 100644 --- a/examples/src/render_to_texture/mod.rs +++ b/examples/src/render_to_texture/mod.rs @@ -21,6 +21,7 @@ async fn run(_path: Option) { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs index 72b615251..330b930f6 100644 --- a/examples/src/repeated_compute/mod.rs +++ b/examples/src/repeated_compute/mod.rs @@ -172,6 +172,7 @@ impl WgpuContext { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::Performance, }, None, ) diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs index 04253e818..d6a06d6e2 100644 --- a/examples/src/storage_texture/mod.rs +++ b/examples/src/storage_texture/mod.rs @@ -35,6 +35,7 @@ async fn run(_path: Option) { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs index e396023a0..d712762cf 100644 --- a/examples/src/timestamp_queries/mod.rs +++ b/examples/src/timestamp_queries/mod.rs @@ -216,6 +216,7 @@ async fn run() { label: None, required_features: features, required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs index c53a18972..0adbf4e46 100644 --- a/examples/src/uniform_values/mod.rs +++ b/examples/src/uniform_values/mod.rs @@ -115,6 +115,7 @@ impl WgpuContext { label: None, required_features: wgpu::Features::empty(), required_limits: wgpu::Limits::downlevel_defaults(), + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/player/tests/test.rs b/player/tests/test.rs index a6c7222b6..2aca181c8 100644 --- a/player/tests/test.rs +++ b/player/tests/test.rs @@ -112,6 +112,7 @@ impl Test<'_> { label: None, required_features: self.features, required_limits: wgt::Limits::default(), + memory_hints: wgt::MemoryHints::default(), }, None, Some(device_id), diff --git a/tests/src/init.rs b/tests/src/init.rs index f66f08489..3a11b3abe 100644 --- a/tests/src/init.rs +++ b/tests/src/init.rs @@ -104,6 +104,7 @@ pub async fn initialize_device( label: None, required_features: features, required_limits: limits, + memory_hints: wgpu::MemoryHints::MemoryUsage, }, None, ) diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 3cef19aed..8c580588f 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -351,9 +351,11 @@ impl Adapter { } let open = unsafe { - self.raw - .adapter - .open(desc.required_features, &desc.required_limits) + self.raw.adapter.open( + desc.required_features, + &desc.required_limits, + &desc.memory_hints, + ) } .map_err(|err| match err { hal::DeviceError::Lost => RequestDeviceError::DeviceLost, diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index bd09a4e72..d61cec738 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -125,7 +125,11 @@ impl Example { let hal::OpenDevice { device, queue } = unsafe { adapter - .open(wgt::Features::empty(), &wgt::Limits::default()) + .open( + wgt::Features::empty(), + &wgt::Limits::default(), + &wgt::MemoryHints::default(), + ) .unwrap() }; diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs index 675a51869..ceab5b065 100644 --- a/wgpu-hal/examples/raw-gles.rs +++ b/wgpu-hal/examples/raw-gles.rs @@ -124,9 +124,11 @@ fn fill_screen(exposed: &hal::ExposedAdapter, width: u32, height use hal::{Adapter as _, CommandEncoder as _, Device as _, Queue as _}; let od = unsafe { - exposed - .adapter - .open(wgt::Features::empty(), &wgt::Limits::downlevel_defaults()) + exposed.adapter.open( + wgt::Features::empty(), + &wgt::Limits::downlevel_defaults(), + &wgt::MemoryHints::default(), + ) } .unwrap(); diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index f27e3d067..e6481aae6 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -249,8 +249,15 @@ impl Example { .expect("Surface doesn't support presentation"); log::info!("Surface caps: {:#?}", surface_caps); - let hal::OpenDevice { device, queue } = - unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() }; + let hal::OpenDevice { device, queue } = unsafe { + adapter + .open( + features, + &wgt::Limits::default(), + &wgt::MemoryHints::Performance, + ) + .unwrap() + }; let window_size: (u32, u32) = window.inner_size().into(); dbg!(&surface_caps.formats); diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index a81f15fc3..6c8ed1cca 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -503,6 +503,7 @@ impl crate::Adapter for super::Adapter { &self, _features: wgt::Features, limits: &wgt::Limits, + memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { let queue = { profiling::scope!("ID3D12Device::CreateCommandQueue"); @@ -520,6 +521,7 @@ impl crate::Adapter for super::Adapter { self.device.clone(), queue.clone(), limits, + memory_hints, self.private_caps, &self.library, self.dxc_container.clone(), diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index ceb430a70..eeb60acbf 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -28,12 +28,13 @@ impl super::Device { raw: d3d12::Device, present_queue: d3d12::CommandQueue, limits: &wgt::Limits, + memory_hints: &wgt::MemoryHints, private_caps: super::PrivateCapabilities, library: &Arc, dxc_container: Option>, ) -> Result { let mem_allocator = if private_caps.suballocation_supported { - super::suballocation::create_allocator_wrapper(&raw)? + super::suballocation::create_allocator_wrapper(&raw, memory_hints)? } else { None }; diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs index 35204a1b9..b7ddbaf0b 100644 --- a/wgpu-hal/src/dx12/suballocation.rs +++ b/wgpu-hal/src/dx12/suballocation.rs @@ -46,13 +46,31 @@ mod placed { pub(crate) fn create_allocator_wrapper( raw: &d3d12::Device, + memory_hints: &wgt::MemoryHints, ) -> Result>, crate::DeviceError> { let device = raw.as_ptr(); + // TODO: the allocator's configuration should take hardware capability into + // account. + let mb = 1024 * 1024; + let allocation_sizes = match memory_hints { + wgt::MemoryHints::Performance => gpu_allocator::AllocationSizes::default(), + wgt::MemoryHints::MemoryUsage => gpu_allocator::AllocationSizes::new(8 * mb, 4 * mb), + wgt::MemoryHints::Manual { + suballocated_device_memory_block_size, + } => { + // TODO: Would it be useful to expose the host size in memory hints + // instead of always using half of the device size? + let device_size = suballocated_device_memory_block_size.start; + let host_size = device_size / 2; + gpu_allocator::AllocationSizes::new(device_size, host_size) + } + }; + match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc { device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(device.as_windows().clone()), debug_settings: Default::default(), - allocation_sizes: gpu_allocator::AllocationSizes::default(), + allocation_sizes, }) { Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))), Err(e) => { @@ -279,6 +297,7 @@ mod committed { #[allow(unused)] pub(crate) fn create_allocator_wrapper( _raw: &d3d12::Device, + _memory_hints: &wgt::MemoryHints, ) -> Result>, crate::DeviceError> { Ok(None) } diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs index 227dce7ee..5d6c42ab8 100644 --- a/wgpu-hal/src/empty.rs +++ b/wgpu-hal/src/empty.rs @@ -92,6 +92,7 @@ impl crate::Adapter for Context { &self, features: wgt::Features, _limits: &wgt::Limits, + _memory_hints: &wgt::MemoryHints, ) -> DeviceResult> { Err(crate::DeviceError::Lost) } diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index 933c36dc8..1cda99b33 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -929,6 +929,7 @@ impl crate::Adapter for super::Adapter { &self, features: wgt::Features, _limits: &wgt::Limits, + _memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { let gl = &self.shared.context.lock(); unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) }; diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index ccc459c10..e63f25ab0 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -562,6 +562,7 @@ pub trait Adapter: WasmNotSendSync { &self, features: wgt::Features, limits: &wgt::Limits, + memory_hints: &wgt::MemoryHints, ) -> Result, DeviceError>; /// Return the set of supported capabilities for a texture format. diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index 7f8e789b4..924902517 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -25,6 +25,7 @@ impl crate::Adapter for super::Adapter { &self, features: wgt::Features, _limits: &wgt::Limits, + _memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { let queue = self .shared diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index efe32929a..81205c629 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -1583,6 +1583,7 @@ impl super::Adapter { handle_is_owned: bool, enabled_extensions: &[&'static CStr], features: wgt::Features, + memory_hints: &wgt::MemoryHints, family_index: u32, queue_index: u32, ) -> Result, crate::DeviceError> { @@ -1833,7 +1834,54 @@ impl super::Adapter { let mem_allocator = { let limits = self.phd_capabilities.properties.limits; - let config = gpu_alloc::Config::i_am_prototyping(); //TODO + + // Note: the parameters here are not set in stone nor where they picked with + // strong confidence. + // `final_free_list_chunk` should be bigger than starting_free_list_chunk if + // we want the behavior of starting with smaller block sizes and using larger + // ones only after we observe that the small ones aren't enough, which I think + // is a good "I don't know what the workload is going to be like" approach. + // + // For reference, `VMA`, and `gpu_allocator` both start with 256 MB blocks + // (then VMA doubles the block size each time it needs a new block). + // At some point it would be good to experiment with real workloads + // + // TODO(#5925): The plan is to switch the Vulkan backend from `gpu_alloc` to + // `gpu_allocator` which has a different (simpler) set of configuration options. + // + // TODO: These parameters should take hardware capabilities into account. + let mb = 1024 * 1024; + let perf_cfg = gpu_alloc::Config { + starting_free_list_chunk: 128 * mb, + final_free_list_chunk: 512 * mb, + minimal_buddy_size: 1, + initial_buddy_dedicated_size: 8 * mb, + dedicated_threshold: 32 * mb, + preferred_dedicated_threshold: mb, + transient_dedicated_threshold: 128 * mb, + }; + let mem_usage_cfg = gpu_alloc::Config { + starting_free_list_chunk: 8 * mb, + final_free_list_chunk: 64 * mb, + minimal_buddy_size: 1, + initial_buddy_dedicated_size: 8 * mb, + dedicated_threshold: 8 * mb, + preferred_dedicated_threshold: mb, + transient_dedicated_threshold: 16 * mb, + }; + let config = match memory_hints { + wgt::MemoryHints::Performance => perf_cfg, + wgt::MemoryHints::MemoryUsage => mem_usage_cfg, + wgt::MemoryHints::Manual { + suballocated_device_memory_block_size, + } => gpu_alloc::Config { + starting_free_list_chunk: suballocated_device_memory_block_size.start, + final_free_list_chunk: suballocated_device_memory_block_size.end, + initial_buddy_dedicated_size: suballocated_device_memory_block_size.start, + ..perf_cfg + }, + }; + let max_memory_allocation_size = if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 { maintenance_3.max_memory_allocation_size @@ -1895,6 +1943,7 @@ impl crate::Adapter for super::Adapter { &self, features: wgt::Features, _limits: &wgt::Limits, + memory_hints: &wgt::MemoryHints, ) -> Result, crate::DeviceError> { let enabled_extensions = self.required_device_extensions(features); let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features); @@ -1928,6 +1977,7 @@ impl crate::Adapter for super::Adapter { true, &enabled_extensions, features, + memory_hints, family_info.queue_family_index, 0, ) diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 04532a4c7..d61f43496 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1769,11 +1769,43 @@ pub struct AdapterInfo { pub backend: Backend, } +/// Hints to the device about the memory allocation strategy. +/// +/// Some backends may ignore these hints. +#[derive(Clone, Debug, Default)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum MemoryHints { + /// Favor performance over memory usage (the default value). + #[default] + Performance, + /// Favor memory usage over performance. + MemoryUsage, + /// Applications that have control over the content that is rendered + /// (typically games) may find an optimal compromise between memory + /// usage and performance by specifying the allocation configuration. + Manual { + /// Defines the range of allowed memory block sizes for sub-allocated + /// resources. + /// + /// The backend may attempt to group multiple resources into fewer + /// device memory blocks (sub-allocation) for performance reasons. + /// The start of the provided range specifies the initial memory + /// block size for sub-allocated resources. After running out of + /// space in existing memory blocks, the backend may chose to + /// progressively increase the block size of subsequent allocations + /// up to a limit specified by the end of the range. + /// + /// This does not limit resource sizes. If a resource does not fit + /// in the specified range, it will typically be placed in a dedicated + /// memory block. + suballocated_device_memory_block_size: Range, + }, +} + /// Describes a [`Device`](../wgpu/struct.Device.html). /// /// Corresponds to [WebGPU `GPUDeviceDescriptor`]( /// https://gpuweb.github.io/gpuweb/#gpudevicedescriptor). -#[repr(C)] #[derive(Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct DeviceDescriptor { @@ -1791,6 +1823,8 @@ pub struct DeviceDescriptor { /// Exactly the specified limits, and no better or worse, /// will be allowed in validation of API calls on the resulting device. pub required_limits: Limits, + /// Hints for memory allocation strategies. + pub memory_hints: MemoryHints, } impl DeviceDescriptor { @@ -1800,6 +1834,7 @@ impl DeviceDescriptor { label: fun(&self.label), required_features: self.required_features, required_limits: self.required_limits.clone(), + memory_hints: self.memory_hints.clone(), } } } diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index b0d27e3ef..7da27e355 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -53,7 +53,7 @@ pub use wgt::{ DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags, Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace, Gles3MinorVersion, ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor, - InstanceFlags, Limits, MaintainResult, MultisampleState, Origin2d, Origin3d, + InstanceFlags, Limits, MaintainResult, MemoryHints, MultisampleState, Origin2d, Origin3d, PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode, PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange, QueryType, RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel,