Add max_color_attachments and max_color_attachment_bytes_per_sample (#5218)

2025-04-16 05:56:37 +00:00 · 2024-02-09 09:39:29 +01:00 · 2024-02-09 09:39:29 +01:00 · 990324fc33
commit 990324fc33
parent 4af531cf69
12 changed files with 189 additions and 5 deletions
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@ -260,6 +260,9 @@ impl RenderBundleEncoder {
            None => (true, true),
        };

+        // TODO: should be device.limits.max_color_attachments
+        let max_color_attachments = hal::MAX_COLOR_ATTACHMENTS;
+
        //TODO: validate that attachment formats are renderable,
        // have expected aspects, support multisampling.
        Ok(Self {
@ -267,11 +270,11 @@ impl RenderBundleEncoder {
            parent_id,
            context: RenderPassContext {
                attachments: AttachmentData {
-                    colors: if desc.color_formats.len() > hal::MAX_COLOR_ATTACHMENTS {
+                    colors: if desc.color_formats.len() > max_color_attachments {
                        return Err(CreateRenderBundleError::ColorAttachment(
                            ColorAttachmentError::TooMany {
                                given: desc.color_formats.len(),
-                                limit: hal::MAX_COLOR_ATTACHMENTS,
+                                limit: max_color_attachments,
                            },
                        ));
                    } else {
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@ -531,6 +531,8 @@ pub enum ColorAttachmentError {
    InvalidFormat(wgt::TextureFormat),
    #[error("The number of color attachments {given} exceeds the limit {limit}")]
    TooMany { given: usize, limit: usize },
+    #[error("The total number of bytes per sample in color attachments {total} exceeds the limit {limit}")]
+    TooManyBytesPerSample { total: u32, limit: u32 },
 }

 /// Error encountered when performing a render pass.
--- a/wgpu-core/src/device/resource.rs
+++ b/wgpu-core/src/device/resource.rs
@ -30,7 +30,9 @@ use crate::{
    snatch::{SnatchGuard, SnatchLock, Snatchable},
    storage::Storage,
    track::{BindGroupStates, TextureSelector, Tracker},
-    validation::{self, check_buffer_usage, check_texture_usage},
+    validation::{
+        self, check_buffer_usage, check_texture_usage, validate_color_attachment_bytes_per_sample,
+    },
    FastHashMap, LabelHelpers as _, SubmissionIndex,
 };

@ -2749,11 +2751,12 @@ impl<A: HalApi> Device<A> {
        let mut shader_binding_sizes = FastHashMap::default();

        let num_attachments = desc.fragment.as_ref().map(|f| f.targets.len()).unwrap_or(0);
-        if num_attachments > hal::MAX_COLOR_ATTACHMENTS {
+        let max_attachments = self.limits.max_color_attachments as usize;
+        if num_attachments > max_attachments {
            return Err(pipeline::CreateRenderPipelineError::ColorAttachment(
                command::ColorAttachmentError::TooMany {
                    given: num_attachments,
-                    limit: hal::MAX_COLOR_ATTACHMENTS,
+                    limit: max_attachments,
                },
            ));
        }
@ -2959,6 +2962,7 @@ impl<A: HalApi> Device<A> {
                            }
                        }
                    }
+
                    break None;
                };
                if let Some(e) = error {
@ -2967,6 +2971,16 @@ impl<A: HalApi> Device<A> {
            }
        }

+        let limit = self.limits.max_color_attachment_bytes_per_sample;
+        let formats = color_targets
+            .iter()
+            .map(|cs| cs.as_ref().map(|cs| cs.format));
+        if let Err(total) = validate_color_attachment_bytes_per_sample(formats, limit) {
+            return Err(pipeline::CreateRenderPipelineError::ColorAttachment(
+                command::ColorAttachmentError::TooManyBytesPerSample { total, limit },
+            ));
+        }
+
        if let Some(ds) = depth_stencil_state {
            let error = loop {
                let format_features = self.describe_format_features(adapter, ds.format)?;
--- a/wgpu-core/src/validation.rs
+++ b/wgpu-core/src/validation.rs
@ -1252,3 +1252,29 @@ impl Interface {
            .map(|ep| ep.dual_source_blending)
    }
 }
+
+// https://gpuweb.github.io/gpuweb/#abstract-opdef-calculating-color-attachment-bytes-per-sample
+pub fn validate_color_attachment_bytes_per_sample(
+    attachment_formats: impl Iterator<Item = Option<wgt::TextureFormat>>,
+    limit: u32,
+) -> Result<(), u32> {
+    let mut total_bytes_per_sample = 0;
+    for format in attachment_formats {
+        let Some(format) = format else { continue; };
+
+        let byte_cost = format.target_pixel_byte_cost().unwrap();
+        let alignment = format.target_component_alignment().unwrap();
+
+        let rem = total_bytes_per_sample % alignment;
+        if rem != 0 {
+            total_bytes_per_sample += alignment - rem;
+        }
+        total_bytes_per_sample += byte_cost;
+    }
+
+    if total_bytes_per_sample > limit {
+        return Err(total_bytes_per_sample);
+    }
+
+    Ok(())
+}
--- a/wgpu-hal/src/dx12/adapter.rs
+++ b/wgpu-hal/src/dx12/adapter.rs
@ -307,6 +307,12 @@ impl super::Adapter {
        downlevel.flags -=
            wgt::DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW;

+        // See https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels#feature-level-support
+        let max_color_attachments = 8;
+        // TODO: determine this programmatically if possible.
+        // https://github.com/gpuweb/gpuweb/issues/2965#issuecomment-1361315447
+        let max_color_attachment_bytes_per_sample = 64;
+
        Some(crate::ExposedAdapter {
            adapter: super::Adapter {
                raw: adapter,
@ -377,6 +383,8 @@ impl super::Adapter {
                        d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
                    min_storage_buffer_offset_alignment: 4,
                    max_inter_stage_shader_components: base.max_inter_stage_shader_components,
+                    max_color_attachments,
+                    max_color_attachment_bytes_per_sample,
                    max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO?
                    max_compute_invocations_per_workgroup:
                        d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP,
--- a/wgpu-hal/src/gles/adapter.rs
+++ b/wgpu-hal/src/gles/adapter.rs
@ -652,6 +652,15 @@ impl super::Adapter {
            0
        };

+        let max_color_attachments = unsafe {
+            gl.get_parameter_i32(glow::MAX_COLOR_ATTACHMENTS)
+                .min(gl.get_parameter_i32(glow::MAX_DRAW_BUFFERS))
+                .min(crate::MAX_COLOR_ATTACHMENTS as i32) as u32
+        };
+
+        // TODO: programmatically determine this.
+        let max_color_attachment_bytes_per_sample = 32;
+
        let limits = wgt::Limits {
            max_texture_dimension_1d: max_texture_size,
            max_texture_dimension_2d: max_texture_size,
@ -722,6 +731,8 @@ impl super::Adapter {
            max_inter_stage_shader_components: unsafe {
                gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS)
            } as u32,
+            max_color_attachments,
+            max_color_attachment_bytes_per_sample,
            max_compute_workgroup_storage_size: if supports_work_group_params {
                (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) } as u32)
            } else {
--- a/wgpu-hal/src/metal/adapter.rs
+++ b/wgpu-hal/src/metal/adapter.rs
@ -731,6 +731,12 @@ impl super::PrivateCapabilities {
            } else {
                4
            },
+            // Per https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf
+            max_color_attachment_bytes_per_sample: if device.supports_family(MTLGPUFamily::Apple4) {
+                64
+            } else {
+                32
+            },
            max_varying_components: if device
                .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1)
            {
@ -940,6 +946,10 @@ impl super::PrivateCapabilities {
                min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
                min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
                max_inter_stage_shader_components: self.max_varying_components,
+                max_color_attachments: (self.max_color_render_targets as u32)
+                    .min(crate::MAX_COLOR_ATTACHMENTS as u32),
+                max_color_attachment_bytes_per_sample: self.max_color_attachment_bytes_per_sample
+                    as u32,
                max_compute_workgroup_storage_size: self.max_total_threadgroup_memory,
                max_compute_invocations_per_workgroup: self.max_threads_per_group,
                max_compute_workgroup_size_x: self.max_threads_per_group,
--- a/wgpu-hal/src/metal/mod.rs
+++ b/wgpu-hal/src/metal/mod.rs
@ -248,6 +248,7 @@ struct PrivateCapabilities {
    max_texture_layers: u64,
    max_fragment_input_components: u64,
    max_color_render_targets: u8,
+    max_color_attachment_bytes_per_sample: u8,
    max_varying_components: u32,
    max_threads_per_group: u32,
    max_total_threadgroup_memory: u32,
--- a/wgpu-hal/src/vulkan/adapter.rs
+++ b/wgpu-hal/src/vulkan/adapter.rs
@ -827,6 +827,11 @@ impl PhysicalDeviceCapabilities {
                u64::MAX
            };

+        // TODO: programmatically determine this, if possible. It's unclear whether we can
+        // as of https://github.com/gpuweb/gpuweb/issues/2965#issuecomment-1361315447.
+        // We could increase the limit when we aren't on a tiled GPU.
+        let max_color_attachment_bytes_per_sample = 32;
+
        wgt::Limits {
            max_texture_dimension_1d: limits.max_image_dimension1_d,
            max_texture_dimension_2d: limits.max_image_dimension2_d,
@ -862,6 +867,10 @@ impl PhysicalDeviceCapabilities {
            max_inter_stage_shader_components: limits
                .max_vertex_output_components
                .min(limits.max_fragment_input_components),
+            max_color_attachments: limits
+                .max_color_attachments
+                .min(crate::MAX_COLOR_ATTACHMENTS as u32),
+            max_color_attachment_bytes_per_sample,
            max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size,
            max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations,
            max_compute_workgroup_size_x: max_compute_workgroup_sizes[0],
--- a/wgpu-info/src/human.rs
+++ b/wgpu-info/src/human.rs
@ -147,6 +147,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
        min_uniform_buffer_offset_alignment,
        min_storage_buffer_offset_alignment,
        max_inter_stage_shader_components,
+        max_color_attachments,
+        max_color_attachment_bytes_per_sample,
        max_compute_workgroup_storage_size,
        max_compute_invocations_per_workgroup,
        max_compute_workgroup_size_x,
@ -178,6 +180,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize
    writeln!(output, "\t\t             Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?;
    writeln!(output, "\t\t             Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?;
    writeln!(output, "\t\t                Max Inter-Stage Shader Component: {max_inter_stage_shader_components}")?;
+    writeln!(output, "\t\t                           Max Color Attachments: {max_color_attachments}")?;
+    writeln!(output, "\t\t           Max Color Attachment Bytes per sample: {max_color_attachment_bytes_per_sample}")?;
    writeln!(output, "\t\t              Max Compute Workgroup Storage Size: {max_compute_workgroup_storage_size}")?;
    writeln!(output, "\t\t           Max Compute Invocations Per Workgroup: {max_compute_invocations_per_workgroup}")?;
    writeln!(output, "\t\t                    Max Compute Workgroup Size X: {max_compute_workgroup_size_x}")?;
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@ -1078,6 +1078,11 @@ pub struct Limits {
    /// inter-stage communication (vertex outputs to fragment inputs). Defaults to 60.
    /// Higher is "better".
    pub max_inter_stage_shader_components: u32,
+    /// The maximum allowed number of color attachments.
+    pub max_color_attachments: u32,
+    /// The maximum number of bytes necessary to hold one sample (pixel or subpixel) of render
+    /// pipeline output data, across all color attachments.
+    pub max_color_attachment_bytes_per_sample: u32,
    /// Maximum number of bytes used for workgroup memory in a compute entry point. Defaults to
    /// 16352. Higher is "better".
    pub max_compute_workgroup_storage_size: u32,
@ -1139,6 +1144,8 @@ impl Default for Limits {
            min_uniform_buffer_offset_alignment: 256,
            min_storage_buffer_offset_alignment: 256,
            max_inter_stage_shader_components: 60,
+            max_color_attachments: 8,
+            max_color_attachment_bytes_per_sample: 32,
            max_compute_workgroup_storage_size: 16384,
            max_compute_invocations_per_workgroup: 256,
            max_compute_workgroup_size_x: 256,
@ -1180,6 +1187,8 @@ impl Limits {
    ///     min_uniform_buffer_offset_alignment: 256,
    ///     min_storage_buffer_offset_alignment: 256,
    ///     max_inter_stage_shader_components: 60,
+    ///     max_color_attachments: 8,
+    ///     max_color_attachment_bytes_per_sample: 32,
    ///     max_compute_workgroup_storage_size: 16352,
    ///     max_compute_invocations_per_workgroup: 256,
    ///     max_compute_workgroup_size_x: 256,
@ -1214,6 +1223,8 @@ impl Limits {
            min_uniform_buffer_offset_alignment: 256,
            min_storage_buffer_offset_alignment: 256,
            max_inter_stage_shader_components: 60,
+            max_color_attachments: 8,
+            max_color_attachment_bytes_per_sample: 32,
            max_compute_workgroup_storage_size: 16352,
            max_compute_invocations_per_workgroup: 256,
            max_compute_workgroup_size_x: 256,
@ -1254,6 +1265,8 @@ impl Limits {
    ///     min_uniform_buffer_offset_alignment: 256,
    ///     min_storage_buffer_offset_alignment: 256,
    ///     max_inter_stage_shader_components: 31,
+    ///     max_color_attachments: 8,
+    ///     max_color_attachment_bytes_per_sample: 32,
    ///     max_compute_workgroup_storage_size: 0, // +
    ///     max_compute_invocations_per_workgroup: 0, // +
    ///     max_compute_workgroup_size_x: 0, // +
@ -3522,6 +3535,87 @@ impl TextureFormat {
        }
    }

+    /// The number of bytes occupied per pixel in a color attachment
+    /// <https://gpuweb.github.io/gpuweb/#render-target-pixel-byte-cost>
+    pub fn target_pixel_byte_cost(&self) -> Option<u32> {
+        match *self {
+            Self::R8Unorm | Self::R8Uint | Self::R8Sint => Some(1),
+            Self::Rg8Unorm
+            | Self::Rg8Uint
+            | Self::Rg8Sint
+            | Self::R16Uint
+            | Self::R16Sint
+            | Self::R16Float => Some(2),
+            Self::Rgba8Uint
+            | Self::Rgba8Sint
+            | Self::Rg16Uint
+            | Self::Rg16Sint
+            | Self::Rg16Float
+            | Self::R32Uint
+            | Self::R32Sint
+            | Self::R32Float => Some(4),
+            Self::Rgba8Unorm
+            | Self::Rgba8UnormSrgb
+            | Self::Bgra8Unorm
+            | Self::Bgra8UnormSrgb
+            | Self::Rgba16Uint
+            | Self::Rgba16Sint
+            | Self::Rgba16Float
+            | Self::Rg32Uint
+            | Self::Rg32Sint
+            | Self::Rg32Float
+            | Self::Rgb10a2Uint
+            | Self::Rgb10a2Unorm
+            | Self::Rg11b10Float => Some(8),
+            Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16),
+            Self::Rgba8Snorm | Self::Rg8Snorm | Self::R8Snorm => None,
+            _ => None,
+        }
+    }
+
+    /// See <https://gpuweb.github.io/gpuweb/#render-target-component-alignment>
+    pub fn target_component_alignment(&self) -> Option<u32> {
+        match self {
+            Self::R8Unorm
+            | Self::R8Snorm
+            | Self::R8Uint
+            | Self::R8Sint
+            | Self::Rg8Unorm
+            | Self::Rg8Snorm
+            | Self::Rg8Uint
+            | Self::Rg8Sint
+            | Self::Rgba8Unorm
+            | Self::Rgba8UnormSrgb
+            | Self::Rgba8Snorm
+            | Self::Rgba8Uint
+            | Self::Rgba8Sint
+            | Self::Bgra8Unorm
+            | Self::Bgra8UnormSrgb => Some(1),
+            Self::R16Uint
+            | Self::R16Sint
+            | Self::R16Float
+            | Self::Rg16Uint
+            | Self::Rg16Sint
+            | Self::Rg16Float
+            | Self::Rgba16Uint
+            | Self::Rgba16Sint
+            | Self::Rgba16Float => Some(2),
+            Self::R32Uint
+            | Self::R32Sint
+            | Self::R32Float
+            | Self::Rg32Uint
+            | Self::Rg32Sint
+            | Self::Rg32Float
+            | Self::Rgba32Uint
+            | Self::Rgba32Sint
+            | Self::Rgba32Float
+            | Self::Rgb10a2Uint
+            | Self::Rgb10a2Unorm
+            | Self::Rg11b10Float => Some(4),
+            _ => None,
+        }
+    }
+
    /// Returns the number of components this format has.
    pub fn components(&self) -> u8 {
        self.components_with_aspect(TextureAspect::All)
--- a/wgpu/src/backend/webgpu.rs
+++ b/wgpu/src/backend/webgpu.rs
@ -724,6 +724,8 @@ fn map_wgt_limits(limits: web_sys::GpuSupportedLimits) -> wgt::Limits {
        min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(),
        min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(),
        max_inter_stage_shader_components: limits.max_inter_stage_shader_components(),
+        max_color_attachments: limits.max_color_attachments(),
+        max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(),
        max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(),
        max_compute_invocations_per_workgroup: limits.max_compute_invocations_per_workgroup(),
        max_compute_workgroup_size_x: limits.max_compute_workgroup_size_x(),