Compute max workgroup size limits (#2133)

* Creation of max compute workgroup size limits
* Implemented for Vulkan and Dx12

* Follow up of gfx-rs/wgpu#1808

* Check if current dispatch operation is under the limit

* Typos and some forgotten fixes

* Port to GLES

* Port to Metal with default limits

* Change Vulkan max_workroup_size_per_dims to pick the min of the reported dimensions

Co-authored-by: Dzmitry Malyshau <kvark@fastmail.com>
Co-authored-by: Dzmitry Malyshau <kvarkus@gmail.com>
This commit is contained in:
Jerónimo Sánchez 2021-11-01 03:32:10 +01:00 committed by GitHub
parent d2d8affb79
commit 939d54f88d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 86 additions and 28 deletions

View File

@ -121,6 +121,10 @@ pub enum DispatchError {
//expected: BindGroupLayoutId,
//provided: Option<(BindGroupLayoutId, BindGroupId)>,
},
#[error(
"each current dispatch group size dimension ({current:?}) must be less or equal to {limit}"
)]
InvalidGroupSize { current: [u32; 3], limit: u32 },
}
/// Error encountered when performing a compute pass.
@ -535,6 +539,22 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
&*texture_guard,
)
.map_pass_err(scope)?;
let groups_size_limit = cmd_buf.limits.max_compute_workgroups_per_dimension;
if groups[0] > groups_size_limit
|| groups[1] > groups_size_limit
|| groups[2] > groups_size_limit
{
return Err(ComputePassErrorInner::Dispatch(
DispatchError::InvalidGroupSize {
current: groups,
limit: groups_size_limit,
},
))
.map_pass_err(scope);
}
unsafe {
raw.dispatch(groups);
}

View File

@ -245,7 +245,13 @@ impl super::Adapter {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment:
d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
min_storage_buffer_offset_alignment: 4, // TODO?
min_storage_buffer_offset_alignment: 4,
max_compute_workgroup_size_x: d3d12::D3D12_CS_THREAD_GROUP_MAX_X,
max_compute_workgroup_size_y: d3d12::D3D12_CS_THREAD_GROUP_MAX_Y,
max_compute_workgroup_size_z: d3d12::D3D12_CS_THREAD_GROUP_MAX_Z,
max_compute_workgroups_per_dimension:
d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
// TODO?
},
alignments: crate::Alignments {
buffer_copy_offset: wgt::BufferSize::new(

View File

@ -328,6 +328,12 @@ impl super::Adapter {
gl.get_parameter_i32(glow::MAX_VERTEX_UNIFORM_BLOCKS)
.min(gl.get_parameter_i32(glow::MAX_FRAGMENT_UNIFORM_BLOCKS)) as u32;
let max_compute_workgroups_per_dimension = gl
.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 0)
.min(gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 1))
.min(gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_COUNT, 2))
as u32;
let limits = wgt::Limits {
max_texture_dimension_1d: max_texture_size,
max_texture_dimension_2d: max_texture_size,
@ -367,6 +373,16 @@ impl super::Adapter {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
max_compute_workgroup_size_x: gl
.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0)
as u32,
max_compute_workgroup_size_y: gl
.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 1)
as u32,
max_compute_workgroup_size_z: gl
.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 2)
as u32,
max_compute_workgroups_per_dimension,
};
let mut workarounds = super::Workarounds::empty();

View File

@ -946,6 +946,11 @@ impl super::PrivateCapabilities {
max_push_constant_size: 0x1000,
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
//TODO: double-check how these match Metal feature set tables
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
max_compute_workgroups_per_dimension: 0xFFFF,
},
alignments: crate::Alignments {
buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(),

View File

@ -591,6 +591,11 @@ impl PhysicalDeviceCapabilities {
limits.max_per_stage_descriptor_storage_buffers
};
let max_compute_workgroup_sizes = limits.max_compute_work_group_size;
let max_compute_workgroups_per_dimension = limits.max_compute_work_group_count[0]
.min(limits.max_compute_work_group_count[1])
.min(limits.max_compute_work_group_count[2]);
wgt::Limits {
max_texture_dimension_1d: limits.max_image_dimension1_d,
max_texture_dimension_2d: limits.max_image_dimension2_d,
@ -618,6 +623,10 @@ impl PhysicalDeviceCapabilities {
max_push_constant_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
max_compute_workgroup_size_x: max_compute_workgroup_sizes[0],
max_compute_workgroup_size_y: max_compute_workgroup_sizes[1],
max_compute_workgroup_size_z: max_compute_workgroup_sizes[2],
max_compute_workgroups_per_dimension,
}
}

View File

@ -50,6 +50,10 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
max_push_constant_size,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
max_compute_workgroup_size_x,
max_compute_workgroup_size_y,
max_compute_workgroup_size_z,
max_compute_workgroups_per_dimension
} = limits;
println!("\t\tMax Texture Dimension 1d: {}", max_texture_dimension_1d);
println!("\t\tMax Texture Dimension 2d: {}", max_texture_dimension_2d);
@ -71,6 +75,10 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
println!("\t\tMax Push Constant Size: {}", max_push_constant_size);
println!("\t\tMin Uniform Buffer Offset Alignment: {}", min_uniform_buffer_offset_alignment);
println!("\t\tMin Storage Buffer Offset Alignment: {}", min_storage_buffer_offset_alignment);
println!("\t\tMax Compute Workgroup Size X: {}", max_compute_workgroup_size_x);
println!("\t\tMax Compute Workgroup Size Y: {}", max_compute_workgroup_size_y);
println!("\t\tMax Compute Workgroup Size Z: {}", max_compute_workgroup_size_z);
println!("\t\tMax Compute Workgroups Per Dimmension: {}", max_compute_workgroups_per_dimension);
println!("\tDownlevel Properties:");
let wgpu::DownlevelCapabilities {
shader_model,

View File

@ -641,6 +641,19 @@ pub struct Limits {
/// when creating a `BindGroup`, or for `set_bind_group` `dynamicOffsets`.
/// Defaults to 256. Lower is "better".
pub min_storage_buffer_offset_alignment: u32,
/// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point.
/// Defaults to 256.
pub max_compute_workgroup_size_x: u32,
/// The maximum value of the workgroup_size Y dimension for a compute stage `ShaderModule` entry-point.
/// Defaults to 256.
pub max_compute_workgroup_size_y: u32,
/// The maximum value of the workgroup_size Z dimension for a compute stage `ShaderModule` entry-point.
/// Defaults to 256.
pub max_compute_workgroup_size_z: u32,
/// The maximum value for each dimension of a `ComputePass::dispatch(x, y, z)` operation.
/// Defaults to 65535.
pub max_compute_workgroups_per_dimension: u32,
}
impl Default for Limits {
@ -666,6 +679,10 @@ impl Default for Limits {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment: 256,
min_storage_buffer_offset_alignment: 256,
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
max_compute_workgroups_per_dimension: 65535,
}
}
}
@ -694,6 +711,10 @@ impl Limits {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment: 256,
min_storage_buffer_offset_alignment: 256,
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
max_compute_workgroups_per_dimension: 65535,
}
}

View File

@ -38,33 +38,6 @@ pub struct TestingContext {
pub queue: Queue,
}
// A rather arbitrary set of limits which should be lower than all devices wgpu reasonably expects to run on and provides enough resources for most tests to run.
// Adjust as needed if they are too low/high.
pub fn lowest_reasonable_limits() -> Limits {
Limits {
max_texture_dimension_1d: 1024,
max_texture_dimension_2d: 1024,
max_texture_dimension_3d: 32,
max_texture_array_layers: 32,
max_bind_groups: 2,
max_dynamic_uniform_buffers_per_pipeline_layout: 2,
max_dynamic_storage_buffers_per_pipeline_layout: 2,
max_sampled_textures_per_shader_stage: 2,
max_samplers_per_shader_stage: 2,
max_storage_buffers_per_shader_stage: 2,
max_storage_textures_per_shader_stage: 2,
max_uniform_buffers_per_shader_stage: 2,
max_uniform_buffer_binding_size: 256,
max_storage_buffer_binding_size: 1 << 16,
max_vertex_buffers: 4,
max_vertex_attributes: 4,
max_vertex_buffer_array_stride: 32,
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment: 256,
min_storage_buffer_offset_alignment: 256,
}
}
fn lowest_downlevel_properties() -> DownlevelCapabilities {
DownlevelCapabilities {
flags: wgt::DownlevelFlags::empty(),