From 4c6318c0d2024fb27029d9c0e03999a9542fe13a Mon Sep 17 00:00:00 2001
From: Nicolas Silva <nical@fastmail.com>
Date: Mon, 8 Jul 2024 14:49:44 +0200
Subject: [PATCH] Expose gpu allocation configuration options (#5875)

* Expose gpu allocation configuration options

This commit adds hints to control memory allocations strategies to the configuration options. These hints allow for automatic profiles such as optimizing for performance (the default, makes sense for a game), optimizing for memory usage (typically more useful for a web browser or UI library) and specifying settings manually.

The details of gpu allocation are still in flux. The goal is to switch vulkan and metal to gpu_allocator which is currently used with d3d12. gpu_allocator will also likely receive more configuration options, in particular the ability to start with smaller memory block sizes and progressively grow the block size. So the manual settings already provision for this upcoming option. Another approach could be to wait and add the manual option after the dust settles.

The reason for providing presets and defining values in the backends is that I am convinced that optimal fonigurations should take hardware capabilities into consideration. It's a deep rabbithole, though, so that will be an exercise for later.

* changelog

* Update CHANGELOG.md

Co-authored-by: Andreas Reich <r_andreas2@web.de>

* Add a comment about not entirely knowing what we are doing

---------

Co-authored-by: Andreas Reich <r_andreas2@web.de>
---
 CHANGELOG.md                                  |  5 ++
 benches/benches/root.rs                       |  1 +
 deno_webgpu/lib.rs                            |  1 +
 examples/src/framework.rs                     |  1 +
 examples/src/hello_compute/mod.rs             |  1 +
 examples/src/hello_synchronization/mod.rs     |  1 +
 examples/src/hello_triangle/mod.rs            |  1 +
 examples/src/hello_windows/mod.rs             |  1 +
 examples/src/hello_workgroups/mod.rs          |  1 +
 examples/src/render_to_texture/mod.rs         |  1 +
 examples/src/repeated_compute/mod.rs          |  1 +
 examples/src/storage_texture/mod.rs           |  1 +
 examples/src/timestamp_queries/mod.rs         |  1 +
 examples/src/uniform_values/mod.rs            |  1 +
 player/tests/test.rs                          |  1 +
 tests/src/init.rs                             |  1 +
 wgpu-core/src/instance.rs                     |  8 +--
 wgpu-hal/examples/halmark/main.rs             |  6 ++-
 wgpu-hal/examples/raw-gles.rs                 |  8 +--
 wgpu-hal/examples/ray-traced-triangle/main.rs | 11 +++-
 wgpu-hal/src/dx12/adapter.rs                  |  2 +
 wgpu-hal/src/dx12/device.rs                   |  3 +-
 wgpu-hal/src/dx12/suballocation.rs            | 21 +++++++-
 wgpu-hal/src/empty.rs                         |  1 +
 wgpu-hal/src/gles/adapter.rs                  |  1 +
 wgpu-hal/src/lib.rs                           |  1 +
 wgpu-hal/src/metal/adapter.rs                 |  1 +
 wgpu-hal/src/vulkan/adapter.rs                | 52 ++++++++++++++++++-
 wgpu-types/src/lib.rs                         | 37 ++++++++++++-
 wgpu/src/lib.rs                               |  2 +-
 30 files changed, 160 insertions(+), 14 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 526f6f99c..e580c550d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -151,6 +151,11 @@ By @teoxoy in [#5901](https://github.com/gfx-rs/wgpu/pull/5901)
 
 - Added `as_hal` for `Buffer` to access wgpu created buffers form wgpu-hal. By @JasondeWolff in [#5724](https://github.com/gfx-rs/wgpu/pull/5724)
 - Unconsumed vertex outputs are now always allowed. Removed `StageError::InputNotConsumed`, `Features::SHADER_UNUSED_VERTEX_OUTPUT`, and associated validation. By @Imberflur in [#5531](https://github.com/gfx-rs/wgpu/pull/5531)
+- Added memory allocation hints to `DeviceDescriptor` by @nical in [#5875](https://github.com/gfx-rs/wgpu/pull/5875)
+    - `MemoryHints::Performance`, the default, favors performance over memory usage and will likely cause large amounts of VRAM to be allocated up-front. This hint is typically good for games.
+    - `MemoryHints::MemoryUsage` favors memory usage over performance. This hint is typically useful for smaller applications or UI libraries.
+    - `MemoryHints::Manual` allows the user to specify parameters for the underlying GPU memory allocator. These parameters are subject to change.
+    - These hints may be ignored by some backends. Currently only the Vulkan and D3D12 backends take them into account.
 
 #### Naga
 
diff --git a/benches/benches/root.rs b/benches/benches/root.rs
index 98563f839..6ef2efabc 100644
--- a/benches/benches/root.rs
+++ b/benches/benches/root.rs
@@ -44,6 +44,7 @@ impl DeviceState {
             &wgpu::DeviceDescriptor {
                 required_features: adapter.features(),
                 required_limits: adapter.limits(),
+                memory_hints: wgpu::MemoryHints::Performance,
                 label: Some("RenderPass Device"),
             },
             None,
diff --git a/deno_webgpu/lib.rs b/deno_webgpu/lib.rs
index d77c60cac..aafb225fb 100644
--- a/deno_webgpu/lib.rs
+++ b/deno_webgpu/lib.rs
@@ -668,6 +668,7 @@ pub fn op_webgpu_request_device(
         label: Some(Cow::Owned(label)),
         required_features: required_features.into(),
         required_limits: required_limits.unwrap_or_default(),
+        memory_hints: wgpu_types::MemoryHints::default(),
     };
 
     let (device, queue, maybe_err) = gfx_select!(adapter => instance.adapter_request_device(
diff --git a/examples/src/framework.rs b/examples/src/framework.rs
index b384169c7..ff86cc235 100644
--- a/examples/src/framework.rs
+++ b/examples/src/framework.rs
@@ -319,6 +319,7 @@ impl ExampleContext {
                     label: None,
                     required_features: (optional_features & adapter_features) | required_features,
                     required_limits: needed_limits,
+                    memory_hints: wgpu::MemoryHints::MemoryUsage,
                 },
                 trace_dir.ok().as_ref().map(std::path::Path::new),
             )
diff --git a/examples/src/hello_compute/mod.rs b/examples/src/hello_compute/mod.rs
index cdd6d439d..fb23e1395 100644
--- a/examples/src/hello_compute/mod.rs
+++ b/examples/src/hello_compute/mod.rs
@@ -50,6 +50,7 @@ async fn execute_gpu(numbers: &[u32]) -> Option<Vec<u32>> {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/hello_synchronization/mod.rs b/examples/src/hello_synchronization/mod.rs
index 9b6675289..d98f1bb8d 100644
--- a/examples/src/hello_synchronization/mod.rs
+++ b/examples/src/hello_synchronization/mod.rs
@@ -19,6 +19,7 @@ async fn run() {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::Performance,
             },
             None,
         )
diff --git a/examples/src/hello_triangle/mod.rs b/examples/src/hello_triangle/mod.rs
index e4d42674f..41c058350 100644
--- a/examples/src/hello_triangle/mod.rs
+++ b/examples/src/hello_triangle/mod.rs
@@ -32,6 +32,7 @@ async fn run(event_loop: EventLoop<()>, window: Window) {
                 // Make sure we use the texture resolution limits from the adapter, so we can support images the size of the swapchain.
                 required_limits: wgpu::Limits::downlevel_webgl2_defaults()
                     .using_resolution(adapter.limits()),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/hello_windows/mod.rs b/examples/src/hello_windows/mod.rs
index 7d81dbef7..b568f35d3 100644
--- a/examples/src/hello_windows/mod.rs
+++ b/examples/src/hello_windows/mod.rs
@@ -75,6 +75,7 @@ async fn run(event_loop: EventLoop<()>, viewports: Vec<(Arc<Window>, wgpu::Color
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/hello_workgroups/mod.rs b/examples/src/hello_workgroups/mod.rs
index 0416451da..0184981c0 100644
--- a/examples/src/hello_workgroups/mod.rs
+++ b/examples/src/hello_workgroups/mod.rs
@@ -32,6 +32,7 @@ async fn run() {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/render_to_texture/mod.rs b/examples/src/render_to_texture/mod.rs
index caed73674..c0922bc2e 100644
--- a/examples/src/render_to_texture/mod.rs
+++ b/examples/src/render_to_texture/mod.rs
@@ -21,6 +21,7 @@ async fn run(_path: Option<String>) {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/repeated_compute/mod.rs b/examples/src/repeated_compute/mod.rs
index 72b615251..330b930f6 100644
--- a/examples/src/repeated_compute/mod.rs
+++ b/examples/src/repeated_compute/mod.rs
@@ -172,6 +172,7 @@ impl WgpuContext {
                     label: None,
                     required_features: wgpu::Features::empty(),
                     required_limits: wgpu::Limits::downlevel_defaults(),
+                    memory_hints: wgpu::MemoryHints::Performance,
                 },
                 None,
             )
diff --git a/examples/src/storage_texture/mod.rs b/examples/src/storage_texture/mod.rs
index 04253e818..d6a06d6e2 100644
--- a/examples/src/storage_texture/mod.rs
+++ b/examples/src/storage_texture/mod.rs
@@ -35,6 +35,7 @@ async fn run(_path: Option<String>) {
                 label: None,
                 required_features: wgpu::Features::empty(),
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/timestamp_queries/mod.rs b/examples/src/timestamp_queries/mod.rs
index e396023a0..d712762cf 100644
--- a/examples/src/timestamp_queries/mod.rs
+++ b/examples/src/timestamp_queries/mod.rs
@@ -216,6 +216,7 @@ async fn run() {
                 label: None,
                 required_features: features,
                 required_limits: wgpu::Limits::downlevel_defaults(),
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/examples/src/uniform_values/mod.rs b/examples/src/uniform_values/mod.rs
index c53a18972..0adbf4e46 100644
--- a/examples/src/uniform_values/mod.rs
+++ b/examples/src/uniform_values/mod.rs
@@ -115,6 +115,7 @@ impl WgpuContext {
                     label: None,
                     required_features: wgpu::Features::empty(),
                     required_limits: wgpu::Limits::downlevel_defaults(),
+                    memory_hints: wgpu::MemoryHints::MemoryUsage,
                 },
                 None,
             )
diff --git a/player/tests/test.rs b/player/tests/test.rs
index a6c7222b6..2aca181c8 100644
--- a/player/tests/test.rs
+++ b/player/tests/test.rs
@@ -112,6 +112,7 @@ impl Test<'_> {
                 label: None,
                 required_features: self.features,
                 required_limits: wgt::Limits::default(),
+                memory_hints: wgt::MemoryHints::default(),
             },
             None,
             Some(device_id),
diff --git a/tests/src/init.rs b/tests/src/init.rs
index f66f08489..3a11b3abe 100644
--- a/tests/src/init.rs
+++ b/tests/src/init.rs
@@ -104,6 +104,7 @@ pub async fn initialize_device(
                 label: None,
                 required_features: features,
                 required_limits: limits,
+                memory_hints: wgpu::MemoryHints::MemoryUsage,
             },
             None,
         )
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 3cef19aed..8c580588f 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -351,9 +351,11 @@ impl<A: HalApi> Adapter<A> {
         }
 
         let open = unsafe {
-            self.raw
-                .adapter
-                .open(desc.required_features, &desc.required_limits)
+            self.raw.adapter.open(
+                desc.required_features,
+                &desc.required_limits,
+                &desc.memory_hints,
+            )
         }
         .map_err(|err| match err {
             hal::DeviceError::Lost => RequestDeviceError::DeviceLost,
diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs
index bd09a4e72..d61cec738 100644
--- a/wgpu-hal/examples/halmark/main.rs
+++ b/wgpu-hal/examples/halmark/main.rs
@@ -125,7 +125,11 @@ impl<A: hal::Api> Example<A> {
 
         let hal::OpenDevice { device, queue } = unsafe {
             adapter
-                .open(wgt::Features::empty(), &wgt::Limits::default())
+                .open(
+                    wgt::Features::empty(),
+                    &wgt::Limits::default(),
+                    &wgt::MemoryHints::default(),
+                )
                 .unwrap()
         };
 
diff --git a/wgpu-hal/examples/raw-gles.rs b/wgpu-hal/examples/raw-gles.rs
index 675a51869..ceab5b065 100644
--- a/wgpu-hal/examples/raw-gles.rs
+++ b/wgpu-hal/examples/raw-gles.rs
@@ -124,9 +124,11 @@ fn fill_screen(exposed: &hal::ExposedAdapter<hal::api::Gles>, width: u32, height
     use hal::{Adapter as _, CommandEncoder as _, Device as _, Queue as _};
 
     let od = unsafe {
-        exposed
-            .adapter
-            .open(wgt::Features::empty(), &wgt::Limits::downlevel_defaults())
+        exposed.adapter.open(
+            wgt::Features::empty(),
+            &wgt::Limits::downlevel_defaults(),
+            &wgt::MemoryHints::default(),
+        )
     }
     .unwrap();
 
diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs
index f27e3d067..e6481aae6 100644
--- a/wgpu-hal/examples/ray-traced-triangle/main.rs
+++ b/wgpu-hal/examples/ray-traced-triangle/main.rs
@@ -249,8 +249,15 @@ impl<A: hal::Api> Example<A> {
             .expect("Surface doesn't support presentation");
         log::info!("Surface caps: {:#?}", surface_caps);
 
-        let hal::OpenDevice { device, queue } =
-            unsafe { adapter.open(features, &wgt::Limits::default()).unwrap() };
+        let hal::OpenDevice { device, queue } = unsafe {
+            adapter
+                .open(
+                    features,
+                    &wgt::Limits::default(),
+                    &wgt::MemoryHints::Performance,
+                )
+                .unwrap()
+        };
 
         let window_size: (u32, u32) = window.inner_size().into();
         dbg!(&surface_caps.formats);
diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs
index a81f15fc3..6c8ed1cca 100644
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@@ -503,6 +503,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         _features: wgt::Features,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let queue = {
             profiling::scope!("ID3D12Device::CreateCommandQueue");
@@ -520,6 +521,7 @@ impl crate::Adapter for super::Adapter {
             self.device.clone(),
             queue.clone(),
             limits,
+            memory_hints,
             self.private_caps,
             &self.library,
             self.dxc_container.clone(),
diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs
index ceb430a70..eeb60acbf 100644
--- a/wgpu-hal/src/dx12/device.rs
+++ b/wgpu-hal/src/dx12/device.rs
@@ -28,12 +28,13 @@ impl super::Device {
         raw: d3d12::Device,
         present_queue: d3d12::CommandQueue,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
         private_caps: super::PrivateCapabilities,
         library: &Arc<d3d12::D3D12Lib>,
         dxc_container: Option<Arc<shader_compilation::DxcContainer>>,
     ) -> Result<Self, DeviceError> {
         let mem_allocator = if private_caps.suballocation_supported {
-            super::suballocation::create_allocator_wrapper(&raw)?
+            super::suballocation::create_allocator_wrapper(&raw, memory_hints)?
         } else {
             None
         };
diff --git a/wgpu-hal/src/dx12/suballocation.rs b/wgpu-hal/src/dx12/suballocation.rs
index 35204a1b9..b7ddbaf0b 100644
--- a/wgpu-hal/src/dx12/suballocation.rs
+++ b/wgpu-hal/src/dx12/suballocation.rs
@@ -46,13 +46,31 @@ mod placed {
 
     pub(crate) fn create_allocator_wrapper(
         raw: &d3d12::Device,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> {
         let device = raw.as_ptr();
 
+        // TODO: the allocator's configuration should take hardware capability into
+        // account.
+        let mb = 1024 * 1024;
+        let allocation_sizes = match memory_hints {
+            wgt::MemoryHints::Performance => gpu_allocator::AllocationSizes::default(),
+            wgt::MemoryHints::MemoryUsage => gpu_allocator::AllocationSizes::new(8 * mb, 4 * mb),
+            wgt::MemoryHints::Manual {
+                suballocated_device_memory_block_size,
+            } => {
+                // TODO: Would it be useful to expose the host size in memory hints
+                // instead of always using half of the device size?
+                let device_size = suballocated_device_memory_block_size.start;
+                let host_size = device_size / 2;
+                gpu_allocator::AllocationSizes::new(device_size, host_size)
+            }
+        };
+
         match gpu_allocator::d3d12::Allocator::new(&gpu_allocator::d3d12::AllocatorCreateDesc {
             device: gpu_allocator::d3d12::ID3D12DeviceVersion::Device(device.as_windows().clone()),
             debug_settings: Default::default(),
-            allocation_sizes: gpu_allocator::AllocationSizes::default(),
+            allocation_sizes,
         }) {
             Ok(allocator) => Ok(Some(Mutex::new(GpuAllocatorWrapper { allocator }))),
             Err(e) => {
@@ -279,6 +297,7 @@ mod committed {
     #[allow(unused)]
     pub(crate) fn create_allocator_wrapper(
         _raw: &d3d12::Device,
+        _memory_hints: &wgt::MemoryHints,
     ) -> Result<Option<Mutex<GpuAllocatorWrapper>>, crate::DeviceError> {
         Ok(None)
     }
diff --git a/wgpu-hal/src/empty.rs b/wgpu-hal/src/empty.rs
index 227dce7ee..5d6c42ab8 100644
--- a/wgpu-hal/src/empty.rs
+++ b/wgpu-hal/src/empty.rs
@@ -92,6 +92,7 @@ impl crate::Adapter for Context {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> DeviceResult<crate::OpenDevice<Api>> {
         Err(crate::DeviceError::Lost)
     }
diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs
index 933c36dc8..1cda99b33 100644
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@@ -929,6 +929,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let gl = &self.shared.context.lock();
         unsafe { gl.pixel_store_i32(glow::UNPACK_ALIGNMENT, 1) };
diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs
index ccc459c10..e63f25ab0 100644
--- a/wgpu-hal/src/lib.rs
+++ b/wgpu-hal/src/lib.rs
@@ -562,6 +562,7 @@ pub trait Adapter: WasmNotSendSync {
         &self,
         features: wgt::Features,
         limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<OpenDevice<Self::A>, DeviceError>;
 
     /// Return the set of supported capabilities for a texture format.
diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs
index 7f8e789b4..924902517 100644
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@@ -25,6 +25,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        _memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let queue = self
             .shared
diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs
index efe32929a..81205c629 100644
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@@ -1583,6 +1583,7 @@ impl super::Adapter {
         handle_is_owned: bool,
         enabled_extensions: &[&'static CStr],
         features: wgt::Features,
+        memory_hints: &wgt::MemoryHints,
         family_index: u32,
         queue_index: u32,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
@@ -1833,7 +1834,54 @@ impl super::Adapter {
 
         let mem_allocator = {
             let limits = self.phd_capabilities.properties.limits;
-            let config = gpu_alloc::Config::i_am_prototyping(); //TODO
+
+            // Note: the parameters here are not set in stone nor where they picked with
+            // strong confidence.
+            // `final_free_list_chunk` should be bigger than starting_free_list_chunk if
+            // we want the behavior of starting with smaller block sizes and using larger
+            // ones only after we observe that the small ones aren't enough, which I think
+            // is a good "I don't know what the workload is going to be like" approach.
+            //
+            // For reference, `VMA`, and `gpu_allocator` both start with 256 MB blocks
+            // (then VMA doubles the block size each time it needs a new block).
+            // At some point it would be good to experiment with real workloads
+            //
+            // TODO(#5925): The plan is to switch the Vulkan backend from `gpu_alloc` to
+            // `gpu_allocator` which has a different (simpler) set of configuration options.
+            //
+            // TODO: These parameters should take hardware capabilities into account.
+            let mb = 1024 * 1024;
+            let perf_cfg = gpu_alloc::Config {
+                starting_free_list_chunk: 128 * mb,
+                final_free_list_chunk: 512 * mb,
+                minimal_buddy_size: 1,
+                initial_buddy_dedicated_size: 8 * mb,
+                dedicated_threshold: 32 * mb,
+                preferred_dedicated_threshold: mb,
+                transient_dedicated_threshold: 128 * mb,
+            };
+            let mem_usage_cfg = gpu_alloc::Config {
+                starting_free_list_chunk: 8 * mb,
+                final_free_list_chunk: 64 * mb,
+                minimal_buddy_size: 1,
+                initial_buddy_dedicated_size: 8 * mb,
+                dedicated_threshold: 8 * mb,
+                preferred_dedicated_threshold: mb,
+                transient_dedicated_threshold: 16 * mb,
+            };
+            let config = match memory_hints {
+                wgt::MemoryHints::Performance => perf_cfg,
+                wgt::MemoryHints::MemoryUsage => mem_usage_cfg,
+                wgt::MemoryHints::Manual {
+                    suballocated_device_memory_block_size,
+                } => gpu_alloc::Config {
+                    starting_free_list_chunk: suballocated_device_memory_block_size.start,
+                    final_free_list_chunk: suballocated_device_memory_block_size.end,
+                    initial_buddy_dedicated_size: suballocated_device_memory_block_size.start,
+                    ..perf_cfg
+                },
+            };
+
             let max_memory_allocation_size =
                 if let Some(maintenance_3) = self.phd_capabilities.maintenance_3 {
                     maintenance_3.max_memory_allocation_size
@@ -1895,6 +1943,7 @@ impl crate::Adapter for super::Adapter {
         &self,
         features: wgt::Features,
         _limits: &wgt::Limits,
+        memory_hints: &wgt::MemoryHints,
     ) -> Result<crate::OpenDevice<super::Api>, crate::DeviceError> {
         let enabled_extensions = self.required_device_extensions(features);
         let mut enabled_phd_features = self.physical_device_features(&enabled_extensions, features);
@@ -1928,6 +1977,7 @@ impl crate::Adapter for super::Adapter {
                 true,
                 &enabled_extensions,
                 features,
+                memory_hints,
                 family_info.queue_family_index,
                 0,
             )
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 04532a4c7..d61f43496 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -1769,11 +1769,43 @@ pub struct AdapterInfo {
     pub backend: Backend,
 }
 
+/// Hints to the device about the memory allocation strategy.
+///
+/// Some backends may ignore these hints.
+#[derive(Clone, Debug, Default)]
+#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
+pub enum MemoryHints {
+    /// Favor performance over memory usage (the default value).
+    #[default]
+    Performance,
+    /// Favor memory usage over performance.
+    MemoryUsage,
+    /// Applications that have control over the content that is rendered
+    /// (typically games) may find an optimal compromise between memory
+    /// usage and performance by specifying the allocation configuration.
+    Manual {
+        /// Defines the range of allowed memory block sizes for sub-allocated
+        /// resources.
+        ///
+        /// The backend may attempt to group multiple resources into fewer
+        /// device memory blocks (sub-allocation) for performance reasons.
+        /// The start of the provided range specifies the initial memory
+        /// block size for sub-allocated resources. After running out of
+        /// space in existing memory blocks, the backend may chose to
+        /// progressively increase the block size of subsequent allocations
+        /// up to a limit specified by the end of the range.
+        ///
+        /// This does not limit resource sizes. If a resource does not fit
+        /// in the specified range, it will typically be placed in a dedicated
+        /// memory block.
+        suballocated_device_memory_block_size: Range<u64>,
+    },
+}
+
 /// Describes a [`Device`](../wgpu/struct.Device.html).
 ///
 /// Corresponds to [WebGPU `GPUDeviceDescriptor`](
 /// https://gpuweb.github.io/gpuweb/#gpudevicedescriptor).
-#[repr(C)]
 #[derive(Clone, Debug, Default)]
 #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
 pub struct DeviceDescriptor<L> {
@@ -1791,6 +1823,8 @@ pub struct DeviceDescriptor<L> {
     /// Exactly the specified limits, and no better or worse,
     /// will be allowed in validation of API calls on the resulting device.
     pub required_limits: Limits,
+    /// Hints for memory allocation strategies.
+    pub memory_hints: MemoryHints,
 }
 
 impl<L> DeviceDescriptor<L> {
@@ -1800,6 +1834,7 @@ impl<L> DeviceDescriptor<L> {
             label: fun(&self.label),
             required_features: self.required_features,
             required_limits: self.required_limits.clone(),
+            memory_hints: self.memory_hints.clone(),
         }
     }
 }
diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs
index b0d27e3ef..7da27e355 100644
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@@ -53,7 +53,7 @@ pub use wgt::{
     DepthStencilState, DeviceLostReason, DeviceType, DownlevelCapabilities, DownlevelFlags,
     Dx12Compiler, DynamicOffset, Extent3d, Face, Features, FilterMode, FrontFace,
     Gles3MinorVersion, ImageDataLayout, ImageSubresourceRange, IndexFormat, InstanceDescriptor,
-    InstanceFlags, Limits, MaintainResult, MultisampleState, Origin2d, Origin3d,
+    InstanceFlags, Limits, MaintainResult, MemoryHints, MultisampleState, Origin2d, Origin3d,
     PipelineStatisticsTypes, PolygonMode, PowerPreference, PredefinedColorSpace, PresentMode,
     PresentationTimestamp, PrimitiveState, PrimitiveTopology, PushConstantRange, QueryType,
     RenderBundleDepthStencil, SamplerBindingType, SamplerBorderColor, ShaderLocation, ShaderModel,