From 990324fc337a7c9d836a558f442c8a562fad44f9 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Fri, 9 Feb 2024 09:39:29 +0100 Subject: [PATCH] Add max_color_attachments and max_color_attachment_bytes_per_sample (#5218) --- wgpu-core/src/command/bundle.rs | 7 ++- wgpu-core/src/command/render.rs | 2 + wgpu-core/src/device/resource.rs | 20 ++++++- wgpu-core/src/validation.rs | 26 +++++++++ wgpu-hal/src/dx12/adapter.rs | 8 +++ wgpu-hal/src/gles/adapter.rs | 11 ++++ wgpu-hal/src/metal/adapter.rs | 10 ++++ wgpu-hal/src/metal/mod.rs | 1 + wgpu-hal/src/vulkan/adapter.rs | 9 +++ wgpu-info/src/human.rs | 4 ++ wgpu-types/src/lib.rs | 94 ++++++++++++++++++++++++++++++++ wgpu/src/backend/webgpu.rs | 2 + 12 files changed, 189 insertions(+), 5 deletions(-) diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 9d80c62f8..930af6832 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -260,6 +260,9 @@ impl RenderBundleEncoder { None => (true, true), }; + // TODO: should be device.limits.max_color_attachments + let max_color_attachments = hal::MAX_COLOR_ATTACHMENTS; + //TODO: validate that attachment formats are renderable, // have expected aspects, support multisampling. Ok(Self { @@ -267,11 +270,11 @@ impl RenderBundleEncoder { parent_id, context: RenderPassContext { attachments: AttachmentData { - colors: if desc.color_formats.len() > hal::MAX_COLOR_ATTACHMENTS { + colors: if desc.color_formats.len() > max_color_attachments { return Err(CreateRenderBundleError::ColorAttachment( ColorAttachmentError::TooMany { given: desc.color_formats.len(), - limit: hal::MAX_COLOR_ATTACHMENTS, + limit: max_color_attachments, }, )); } else { diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index d3de3e26e..8acde0819 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -531,6 +531,8 @@ pub enum ColorAttachmentError { InvalidFormat(wgt::TextureFormat), #[error("The number of color attachments {given} exceeds the limit {limit}")] TooMany { given: usize, limit: usize }, + #[error("The total number of bytes per sample in color attachments {total} exceeds the limit {limit}")] + TooManyBytesPerSample { total: u32, limit: u32 }, } /// Error encountered when performing a render pass. diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index b2c85a056..6cb522349 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -30,7 +30,9 @@ use crate::{ snatch::{SnatchGuard, SnatchLock, Snatchable}, storage::Storage, track::{BindGroupStates, TextureSelector, Tracker}, - validation::{self, check_buffer_usage, check_texture_usage}, + validation::{ + self, check_buffer_usage, check_texture_usage, validate_color_attachment_bytes_per_sample, + }, FastHashMap, LabelHelpers as _, SubmissionIndex, }; @@ -2749,11 +2751,12 @@ impl Device { let mut shader_binding_sizes = FastHashMap::default(); let num_attachments = desc.fragment.as_ref().map(|f| f.targets.len()).unwrap_or(0); - if num_attachments > hal::MAX_COLOR_ATTACHMENTS { + let max_attachments = self.limits.max_color_attachments as usize; + if num_attachments > max_attachments { return Err(pipeline::CreateRenderPipelineError::ColorAttachment( command::ColorAttachmentError::TooMany { given: num_attachments, - limit: hal::MAX_COLOR_ATTACHMENTS, + limit: max_attachments, }, )); } @@ -2959,6 +2962,7 @@ impl Device { } } } + break None; }; if let Some(e) = error { @@ -2967,6 +2971,16 @@ impl Device { } } + let limit = self.limits.max_color_attachment_bytes_per_sample; + let formats = color_targets + .iter() + .map(|cs| cs.as_ref().map(|cs| cs.format)); + if let Err(total) = validate_color_attachment_bytes_per_sample(formats, limit) { + return Err(pipeline::CreateRenderPipelineError::ColorAttachment( + command::ColorAttachmentError::TooManyBytesPerSample { total, limit }, + )); + } + if let Some(ds) = depth_stencil_state { let error = loop { let format_features = self.describe_format_features(adapter, ds.format)?; diff --git a/wgpu-core/src/validation.rs b/wgpu-core/src/validation.rs index 994a6cd52..00307a71b 100644 --- a/wgpu-core/src/validation.rs +++ b/wgpu-core/src/validation.rs @@ -1252,3 +1252,29 @@ impl Interface { .map(|ep| ep.dual_source_blending) } } + +// https://gpuweb.github.io/gpuweb/#abstract-opdef-calculating-color-attachment-bytes-per-sample +pub fn validate_color_attachment_bytes_per_sample( + attachment_formats: impl Iterator>, + limit: u32, +) -> Result<(), u32> { + let mut total_bytes_per_sample = 0; + for format in attachment_formats { + let Some(format) = format else { continue; }; + + let byte_cost = format.target_pixel_byte_cost().unwrap(); + let alignment = format.target_component_alignment().unwrap(); + + let rem = total_bytes_per_sample % alignment; + if rem != 0 { + total_bytes_per_sample += alignment - rem; + } + total_bytes_per_sample += byte_cost; + } + + if total_bytes_per_sample > limit { + return Err(total_bytes_per_sample); + } + + Ok(()) +} diff --git a/wgpu-hal/src/dx12/adapter.rs b/wgpu-hal/src/dx12/adapter.rs index f6027014d..712c2254b 100644 --- a/wgpu-hal/src/dx12/adapter.rs +++ b/wgpu-hal/src/dx12/adapter.rs @@ -307,6 +307,12 @@ impl super::Adapter { downlevel.flags -= wgt::DownlevelFlags::VERTEX_AND_INSTANCE_INDEX_RESPECTS_RESPECTIVE_FIRST_VALUE_IN_INDIRECT_DRAW; + // See https://learn.microsoft.com/en-us/windows/win32/direct3d12/hardware-feature-levels#feature-level-support + let max_color_attachments = 8; + // TODO: determine this programmatically if possible. + // https://github.com/gpuweb/gpuweb/issues/2965#issuecomment-1361315447 + let max_color_attachment_bytes_per_sample = 64; + Some(crate::ExposedAdapter { adapter: super::Adapter { raw: adapter, @@ -377,6 +383,8 @@ impl super::Adapter { d3d12_ty::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT, min_storage_buffer_offset_alignment: 4, max_inter_stage_shader_components: base.max_inter_stage_shader_components, + max_color_attachments, + max_color_attachment_bytes_per_sample, max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO? max_compute_invocations_per_workgroup: d3d12_ty::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP, diff --git a/wgpu-hal/src/gles/adapter.rs b/wgpu-hal/src/gles/adapter.rs index afa402379..fd8ad7e69 100644 --- a/wgpu-hal/src/gles/adapter.rs +++ b/wgpu-hal/src/gles/adapter.rs @@ -652,6 +652,15 @@ impl super::Adapter { 0 }; + let max_color_attachments = unsafe { + gl.get_parameter_i32(glow::MAX_COLOR_ATTACHMENTS) + .min(gl.get_parameter_i32(glow::MAX_DRAW_BUFFERS)) + .min(crate::MAX_COLOR_ATTACHMENTS as i32) as u32 + }; + + // TODO: programmatically determine this. + let max_color_attachment_bytes_per_sample = 32; + let limits = wgt::Limits { max_texture_dimension_1d: max_texture_size, max_texture_dimension_2d: max_texture_size, @@ -722,6 +731,8 @@ impl super::Adapter { max_inter_stage_shader_components: unsafe { gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS) } as u32, + max_color_attachments, + max_color_attachment_bytes_per_sample, max_compute_workgroup_storage_size: if supports_work_group_params { (unsafe { gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) } as u32) } else { diff --git a/wgpu-hal/src/metal/adapter.rs b/wgpu-hal/src/metal/adapter.rs index a946ce581..f3b27de71 100644 --- a/wgpu-hal/src/metal/adapter.rs +++ b/wgpu-hal/src/metal/adapter.rs @@ -731,6 +731,12 @@ impl super::PrivateCapabilities { } else { 4 }, + // Per https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf + max_color_attachment_bytes_per_sample: if device.supports_family(MTLGPUFamily::Apple4) { + 64 + } else { + 32 + }, max_varying_components: if device .supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1) { @@ -940,6 +946,10 @@ impl super::PrivateCapabilities { min_uniform_buffer_offset_alignment: self.buffer_alignment as u32, min_storage_buffer_offset_alignment: self.buffer_alignment as u32, max_inter_stage_shader_components: self.max_varying_components, + max_color_attachments: (self.max_color_render_targets as u32) + .min(crate::MAX_COLOR_ATTACHMENTS as u32), + max_color_attachment_bytes_per_sample: self.max_color_attachment_bytes_per_sample + as u32, max_compute_workgroup_storage_size: self.max_total_threadgroup_memory, max_compute_invocations_per_workgroup: self.max_threads_per_group, max_compute_workgroup_size_x: self.max_threads_per_group, diff --git a/wgpu-hal/src/metal/mod.rs b/wgpu-hal/src/metal/mod.rs index 298f60faa..62fbf3d49 100644 --- a/wgpu-hal/src/metal/mod.rs +++ b/wgpu-hal/src/metal/mod.rs @@ -248,6 +248,7 @@ struct PrivateCapabilities { max_texture_layers: u64, max_fragment_input_components: u64, max_color_render_targets: u8, + max_color_attachment_bytes_per_sample: u8, max_varying_components: u32, max_threads_per_group: u32, max_total_threadgroup_memory: u32, diff --git a/wgpu-hal/src/vulkan/adapter.rs b/wgpu-hal/src/vulkan/adapter.rs index 85e620d23..2693e7402 100644 --- a/wgpu-hal/src/vulkan/adapter.rs +++ b/wgpu-hal/src/vulkan/adapter.rs @@ -827,6 +827,11 @@ impl PhysicalDeviceCapabilities { u64::MAX }; + // TODO: programmatically determine this, if possible. It's unclear whether we can + // as of https://github.com/gpuweb/gpuweb/issues/2965#issuecomment-1361315447. + // We could increase the limit when we aren't on a tiled GPU. + let max_color_attachment_bytes_per_sample = 32; + wgt::Limits { max_texture_dimension_1d: limits.max_image_dimension1_d, max_texture_dimension_2d: limits.max_image_dimension2_d, @@ -862,6 +867,10 @@ impl PhysicalDeviceCapabilities { max_inter_stage_shader_components: limits .max_vertex_output_components .min(limits.max_fragment_input_components), + max_color_attachments: limits + .max_color_attachments + .min(crate::MAX_COLOR_ATTACHMENTS as u32), + max_color_attachment_bytes_per_sample, max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size, max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations, max_compute_workgroup_size_x: max_compute_workgroup_sizes[0], diff --git a/wgpu-info/src/human.rs b/wgpu-info/src/human.rs index 9cc4c27f8..9bb281352 100644 --- a/wgpu-info/src/human.rs +++ b/wgpu-info/src/human.rs @@ -147,6 +147,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize min_uniform_buffer_offset_alignment, min_storage_buffer_offset_alignment, max_inter_stage_shader_components, + max_color_attachments, + max_color_attachment_bytes_per_sample, max_compute_workgroup_storage_size, max_compute_invocations_per_workgroup, max_compute_workgroup_size_x, @@ -178,6 +180,8 @@ fn print_adapter(output: &mut impl io::Write, report: &AdapterReport, idx: usize writeln!(output, "\t\t Min Uniform Buffer Offset Alignment: {min_uniform_buffer_offset_alignment}")?; writeln!(output, "\t\t Min Storage Buffer Offset Alignment: {min_storage_buffer_offset_alignment}")?; writeln!(output, "\t\t Max Inter-Stage Shader Component: {max_inter_stage_shader_components}")?; + writeln!(output, "\t\t Max Color Attachments: {max_color_attachments}")?; + writeln!(output, "\t\t Max Color Attachment Bytes per sample: {max_color_attachment_bytes_per_sample}")?; writeln!(output, "\t\t Max Compute Workgroup Storage Size: {max_compute_workgroup_storage_size}")?; writeln!(output, "\t\t Max Compute Invocations Per Workgroup: {max_compute_invocations_per_workgroup}")?; writeln!(output, "\t\t Max Compute Workgroup Size X: {max_compute_workgroup_size_x}")?; diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index d2d493a7c..003cbb847 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -1078,6 +1078,11 @@ pub struct Limits { /// inter-stage communication (vertex outputs to fragment inputs). Defaults to 60. /// Higher is "better". pub max_inter_stage_shader_components: u32, + /// The maximum allowed number of color attachments. + pub max_color_attachments: u32, + /// The maximum number of bytes necessary to hold one sample (pixel or subpixel) of render + /// pipeline output data, across all color attachments. + pub max_color_attachment_bytes_per_sample: u32, /// Maximum number of bytes used for workgroup memory in a compute entry point. Defaults to /// 16352. Higher is "better". pub max_compute_workgroup_storage_size: u32, @@ -1139,6 +1144,8 @@ impl Default for Limits { min_uniform_buffer_offset_alignment: 256, min_storage_buffer_offset_alignment: 256, max_inter_stage_shader_components: 60, + max_color_attachments: 8, + max_color_attachment_bytes_per_sample: 32, max_compute_workgroup_storage_size: 16384, max_compute_invocations_per_workgroup: 256, max_compute_workgroup_size_x: 256, @@ -1180,6 +1187,8 @@ impl Limits { /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, /// max_inter_stage_shader_components: 60, + /// max_color_attachments: 8, + /// max_color_attachment_bytes_per_sample: 32, /// max_compute_workgroup_storage_size: 16352, /// max_compute_invocations_per_workgroup: 256, /// max_compute_workgroup_size_x: 256, @@ -1214,6 +1223,8 @@ impl Limits { min_uniform_buffer_offset_alignment: 256, min_storage_buffer_offset_alignment: 256, max_inter_stage_shader_components: 60, + max_color_attachments: 8, + max_color_attachment_bytes_per_sample: 32, max_compute_workgroup_storage_size: 16352, max_compute_invocations_per_workgroup: 256, max_compute_workgroup_size_x: 256, @@ -1254,6 +1265,8 @@ impl Limits { /// min_uniform_buffer_offset_alignment: 256, /// min_storage_buffer_offset_alignment: 256, /// max_inter_stage_shader_components: 31, + /// max_color_attachments: 8, + /// max_color_attachment_bytes_per_sample: 32, /// max_compute_workgroup_storage_size: 0, // + /// max_compute_invocations_per_workgroup: 0, // + /// max_compute_workgroup_size_x: 0, // + @@ -3522,6 +3535,87 @@ impl TextureFormat { } } + /// The number of bytes occupied per pixel in a color attachment + /// + pub fn target_pixel_byte_cost(&self) -> Option { + match *self { + Self::R8Unorm | Self::R8Uint | Self::R8Sint => Some(1), + Self::Rg8Unorm + | Self::Rg8Uint + | Self::Rg8Sint + | Self::R16Uint + | Self::R16Sint + | Self::R16Float => Some(2), + Self::Rgba8Uint + | Self::Rgba8Sint + | Self::Rg16Uint + | Self::Rg16Sint + | Self::Rg16Float + | Self::R32Uint + | Self::R32Sint + | Self::R32Float => Some(4), + Self::Rgba8Unorm + | Self::Rgba8UnormSrgb + | Self::Bgra8Unorm + | Self::Bgra8UnormSrgb + | Self::Rgba16Uint + | Self::Rgba16Sint + | Self::Rgba16Float + | Self::Rg32Uint + | Self::Rg32Sint + | Self::Rg32Float + | Self::Rgb10a2Uint + | Self::Rgb10a2Unorm + | Self::Rg11b10Float => Some(8), + Self::Rgba32Uint | Self::Rgba32Sint | Self::Rgba32Float => Some(16), + Self::Rgba8Snorm | Self::Rg8Snorm | Self::R8Snorm => None, + _ => None, + } + } + + /// See + pub fn target_component_alignment(&self) -> Option { + match self { + Self::R8Unorm + | Self::R8Snorm + | Self::R8Uint + | Self::R8Sint + | Self::Rg8Unorm + | Self::Rg8Snorm + | Self::Rg8Uint + | Self::Rg8Sint + | Self::Rgba8Unorm + | Self::Rgba8UnormSrgb + | Self::Rgba8Snorm + | Self::Rgba8Uint + | Self::Rgba8Sint + | Self::Bgra8Unorm + | Self::Bgra8UnormSrgb => Some(1), + Self::R16Uint + | Self::R16Sint + | Self::R16Float + | Self::Rg16Uint + | Self::Rg16Sint + | Self::Rg16Float + | Self::Rgba16Uint + | Self::Rgba16Sint + | Self::Rgba16Float => Some(2), + Self::R32Uint + | Self::R32Sint + | Self::R32Float + | Self::Rg32Uint + | Self::Rg32Sint + | Self::Rg32Float + | Self::Rgba32Uint + | Self::Rgba32Sint + | Self::Rgba32Float + | Self::Rgb10a2Uint + | Self::Rgb10a2Unorm + | Self::Rg11b10Float => Some(4), + _ => None, + } + } + /// Returns the number of components this format has. pub fn components(&self) -> u8 { self.components_with_aspect(TextureAspect::All) diff --git a/wgpu/src/backend/webgpu.rs b/wgpu/src/backend/webgpu.rs index ce6c658cc..a855f6e47 100644 --- a/wgpu/src/backend/webgpu.rs +++ b/wgpu/src/backend/webgpu.rs @@ -724,6 +724,8 @@ fn map_wgt_limits(limits: web_sys::GpuSupportedLimits) -> wgt::Limits { min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment(), min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment(), max_inter_stage_shader_components: limits.max_inter_stage_shader_components(), + max_color_attachments: limits.max_color_attachments(), + max_color_attachment_bytes_per_sample: limits.max_color_attachment_bytes_per_sample(), max_compute_workgroup_storage_size: limits.max_compute_workgroup_storage_size(), max_compute_invocations_per_workgroup: limits.max_compute_invocations_per_workgroup(), max_compute_workgroup_size_x: limits.max_compute_workgroup_size_x(),