Add limits for compute storage and max invocations

This commit is contained in:
Dzmitry Malyshau 2021-12-08 14:36:17 -05:00
parent c1c855bb98
commit a8caa367ee
9 changed files with 149 additions and 19 deletions

View File

@ -64,6 +64,13 @@ fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLim
compare!(max_push_constant_size, Less);
compare!(min_uniform_buffer_offset_alignment, Greater);
compare!(min_storage_buffer_offset_alignment, Greater);
compare!(max_inter_stage_shader_components, Less);
compare!(max_compute_workgroup_storage_size, Less);
compare!(max_compute_invocations_per_workgroup, Less);
compare!(max_compute_workgroup_size_x, Less);
compare!(max_compute_workgroup_size_y, Less);
compare!(max_compute_workgroup_size_z, Less);
compare!(max_compute_workgroups_per_dimension, Less);
failed
}

View File

@ -45,6 +45,16 @@ impl fmt::Display for NumericDimension {
}
}
impl NumericDimension {
fn num_components(&self) -> u32 {
match *self {
Self::Scalar => 1,
Self::Vector(size) => size as u32,
Self::Matrix(w, h) => w as u32 * h as u32,
}
}
}
#[derive(Clone, Copy, Debug)]
pub struct NumericType {
dim: NumericDimension,
@ -226,9 +236,15 @@ pub enum StageError {
#[error("shader module is invalid")]
InvalidModule,
#[error(
"shader entry point current workgroup size {current:?} must be less or equal to {limit:?}"
"shader entry point current workgroup size {current:?} must be less or equal to {limit:?} of total {total}"
)]
InvalidComputeEntryPoint { current: [u32; 3], limit: [u32; 3] },
InvalidWorkgroupSize {
current: [u32; 3],
limit: [u32; 3],
total: u32,
},
#[error("shader uses {used} inter-stage components above the limit of {limit}")]
TooManyVaryings { used: u32, limit: u32 },
#[error("unable to find entry point '{0}'")]
MissingEntryPoint(String),
#[error("shader global {0:?} is not available in the layout pipeline layout")]
@ -1083,18 +1099,24 @@ impl Interface {
self.limits.max_compute_workgroup_size_y,
self.limits.max_compute_workgroup_size_z,
];
let total_invocations = entry_point.workgroup_size.iter().product::<u32>();
if entry_point.workgroup_size[0] > max_workgroup_size_limits[0]
if entry_point.workgroup_size.iter().any(|&s| s == 0)
|| total_invocations > self.limits.max_compute_invocations_per_workgroup
|| entry_point.workgroup_size[0] > max_workgroup_size_limits[0]
|| entry_point.workgroup_size[1] > max_workgroup_size_limits[1]
|| entry_point.workgroup_size[2] > max_workgroup_size_limits[2]
{
return Err(StageError::InvalidComputeEntryPoint {
return Err(StageError::InvalidWorkgroupSize {
current: entry_point.workgroup_size,
limit: max_workgroup_size_limits,
total: self.limits.max_compute_invocations_per_workgroup,
});
}
}
let mut inter_stage_components = 0;
// check inputs compatibility
for input in entry_point.inputs.iter() {
match *input {
@ -1104,11 +1126,12 @@ impl Interface {
.get(&location)
.ok_or(InputError::Missing)
.and_then(|provided| {
let compatible = match shader_stage {
let (compatible, num_components) = match shader_stage {
// For vertex attributes, there are defaults filled out
// by the driver if data is not provided.
naga::ShaderStage::Vertex => {
iv.ty.is_compatible_with(&provided.ty)
// vertex inputs don't count towards inter-stage
(iv.ty.is_compatible_with(&provided.ty), 0)
}
naga::ShaderStage::Fragment => {
if iv.interpolation != provided.interpolation {
@ -1121,28 +1144,53 @@ impl Interface {
provided.sampling,
));
}
iv.ty.is_subtype_of(&provided.ty)
(
iv.ty.is_subtype_of(&provided.ty),
iv.ty.dim.num_components(),
)
}
naga::ShaderStage::Compute => false,
naga::ShaderStage::Compute => (false, 0),
};
if compatible {
Ok(())
Ok(num_components)
} else {
Err(InputError::WrongType(provided.ty))
}
});
if let Err(error) = result {
return Err(StageError::Input {
location,
var: iv.clone(),
error,
});
match result {
Ok(num_components) => {
inter_stage_components += num_components;
}
Err(error) => {
return Err(StageError::Input {
location,
var: iv.clone(),
error,
})
}
}
}
Varying::BuiltIn(_) => {}
}
}
if shader_stage == naga::ShaderStage::Vertex {
for output in entry_point.outputs.iter() {
//TODO: count builtins towards the limit?
inter_stage_components += match *output {
Varying::Local { ref iv, .. } => iv.ty.dim.num_components(),
Varying::BuiltIn(_) => 0,
};
}
}
if inter_stage_components > self.limits.max_inter_stage_shader_components {
return Err(StageError::TooManyVaryings {
used: inter_stage_components,
limit: self.limits.max_inter_stage_shader_components,
});
}
let outputs = entry_point
.outputs
.iter()

View File

@ -247,12 +247,15 @@ impl super::Adapter {
min_uniform_buffer_offset_alignment:
d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
min_storage_buffer_offset_alignment: 4,
max_inter_stage_shader_components: base.max_inter_stage_shader_components,
max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO?
max_compute_invocations_per_workgroup:
d3d12::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP,
max_compute_workgroup_size_x: d3d12::D3D12_CS_THREAD_GROUP_MAX_X,
max_compute_workgroup_size_y: d3d12::D3D12_CS_THREAD_GROUP_MAX_Y,
max_compute_workgroup_size_z: d3d12::D3D12_CS_THREAD_GROUP_MAX_Z,
max_compute_workgroups_per_dimension:
d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
// TODO?
},
alignments: crate::Alignments {
buffer_copy_offset: wgt::BufferSize::new(

View File

@ -387,6 +387,18 @@ impl super::Adapter {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
max_inter_stage_shader_components: gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS)
as u32,
max_compute_workgroup_storage_size: if supports_work_group_params {
gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) as u32
} else {
0
},
max_compute_invocations_per_workgroup: if supports_work_group_params {
gl.get_parameter_i32(glow::MAX_COMPUTE_WORK_GROUP_INVOCATIONS) as u32
} else {
0
},
max_compute_workgroup_size_x: if supports_work_group_params {
gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) as u32
} else {

View File

@ -787,6 +787,30 @@ impl super::PrivateCapabilities {
} else {
4
},
max_varying_components: if Self::supports_any(
device,
&[
MTLFeatureSet::macOS_GPUFamily1_v1,
MTLFeatureSet::macOS_GPUFamily2_v1,
],
) {
128
} else {
60
},
max_threads_per_group: if Self::supports_any(
device,
&[
MTLFeatureSet::iOS_GPUFamily4_v2,
MTLFeatureSet::iOS_GPUFamily5_v1,
MTLFeatureSet::macOS_GPUFamily1_v1,
MTLFeatureSet::macOS_GPUFamily2_v1,
],
) {
1024
} else {
512
},
max_total_threadgroup_memory: if Self::supports_any(
device,
&[
@ -949,6 +973,9 @@ impl super::PrivateCapabilities {
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
//TODO: double-check how these match Metal feature set tables
max_inter_stage_shader_components: self.max_varying_components,
max_compute_workgroup_storage_size: self.max_total_threadgroup_memory,
max_compute_invocations_per_workgroup: self.max_threads_per_group,
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,

View File

@ -212,6 +212,8 @@ struct PrivateCapabilities {
max_texture_layers: u64,
max_fragment_input_components: u64,
max_color_render_targets: u8,
max_varying_components: u32,
max_threads_per_group: u32,
max_total_threadgroup_memory: u32,
sample_count_mask: u8,
supports_debug_markers: bool,

View File

@ -696,6 +696,11 @@ impl PhysicalDeviceCapabilities {
max_push_constant_size: limits.max_push_constants_size,
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
max_inter_stage_shader_components: limits
.max_vertex_output_components
.min(limits.max_fragment_input_components),
max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size,
max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations,
max_compute_workgroup_size_x: max_compute_workgroup_sizes[0],
max_compute_workgroup_size_y: max_compute_workgroup_sizes[1],
max_compute_workgroup_size_z: max_compute_workgroup_sizes[2],

View File

@ -28,6 +28,7 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
}
}
}
println!("\tLimits:");
let wgpu::Limits {
max_texture_dimension_1d,
@ -50,10 +51,13 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
max_push_constant_size,
min_uniform_buffer_offset_alignment,
min_storage_buffer_offset_alignment,
max_inter_stage_shader_components,
max_compute_workgroup_storage_size,
max_compute_invocations_per_workgroup,
max_compute_workgroup_size_x,
max_compute_workgroup_size_y,
max_compute_workgroup_size_z,
max_compute_workgroups_per_dimension
max_compute_workgroups_per_dimension,
} = limits;
println!("\t\tMax Texture Dimension 1d: {}", max_texture_dimension_1d);
println!("\t\tMax Texture Dimension 2d: {}", max_texture_dimension_2d);
@ -75,10 +79,14 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
println!("\t\tMax Push Constant Size: {}", max_push_constant_size);
println!("\t\tMin Uniform Buffer Offset Alignment: {}", min_uniform_buffer_offset_alignment);
println!("\t\tMin Storage Buffer Offset Alignment: {}", min_storage_buffer_offset_alignment);
println!("\t\tMax Inter-Stage Shader Component: {}", max_inter_stage_shader_components);
println!("\t\tMax Compute Workgroup Storage Size: {}", max_compute_workgroup_storage_size);
println!("\t\tMax Compute Invocations Per Workgroup: {}", max_compute_invocations_per_workgroup);
println!("\t\tMax Compute Workgroup Size X: {}", max_compute_workgroup_size_x);
println!("\t\tMax Compute Workgroup Size Y: {}", max_compute_workgroup_size_y);
println!("\t\tMax Compute Workgroup Size Z: {}", max_compute_workgroup_size_z);
println!("\t\tMax Compute Workgroups Per Dimension: {}", max_compute_workgroups_per_dimension);
println!("\t\tMax Compute Workgroups Per Dimension: {}", max_compute_workgroups_per_dimension);
println!("\tDownlevel Properties:");
let wgpu::DownlevelCapabilities {
shader_model,

View File

@ -657,7 +657,13 @@ pub struct Limits {
/// when creating a `BindGroup`, or for `set_bind_group` `dynamicOffsets`.
/// Defaults to 256. Lower is "better".
pub min_storage_buffer_offset_alignment: u32,
/// Maximum allowed number of components (scalars) of input or output locations for
/// inter-stage communication (vertex outputs to fragment inputs).
pub max_inter_stage_shader_components: u32,
/// Maximum number of bytes used for workgroup memory in a compute entry point.
pub max_compute_workgroup_storage_size: u32,
/// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point.
pub max_compute_invocations_per_workgroup: u32,
/// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point.
/// Defaults to 256.
pub max_compute_workgroup_size_x: u32,
@ -695,6 +701,9 @@ impl Default for Limits {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment: 256,
min_storage_buffer_offset_alignment: 256,
max_inter_stage_shader_components: 60,
max_compute_workgroup_storage_size: 16352,
max_compute_invocations_per_workgroup: 256,
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
@ -727,6 +736,9 @@ impl Limits {
max_push_constant_size: 0,
min_uniform_buffer_offset_alignment: 256,
min_storage_buffer_offset_alignment: 256,
max_inter_stage_shader_components: 60,
max_compute_workgroup_storage_size: 16352,
max_compute_invocations_per_workgroup: 256,
max_compute_workgroup_size_x: 256,
max_compute_workgroup_size_y: 256,
max_compute_workgroup_size_z: 64,
@ -743,6 +755,12 @@ impl Limits {
max_dynamic_storage_buffers_per_pipeline_layout: 0,
max_storage_buffer_binding_size: 0,
max_vertex_buffer_array_stride: 255,
max_compute_workgroup_storage_size: 0,
max_compute_invocations_per_workgroup: 0,
max_compute_workgroup_size_x: 0,
max_compute_workgroup_size_y: 0,
max_compute_workgroup_size_z: 0,
max_compute_workgroups_per_dimension: 0,
// Most of the values should be the same as the downlevel defaults
..Self::downlevel_defaults()