mirror of
https://github.com/gfx-rs/wgpu.git
synced 2025-02-16 17:02:32 +00:00
Add limits for compute storage and max invocations
This commit is contained in:
parent
c1c855bb98
commit
a8caa367ee
@ -64,6 +64,13 @@ fn check_limits(requested: &wgt::Limits, allowed: &wgt::Limits) -> Vec<FailedLim
|
||||
compare!(max_push_constant_size, Less);
|
||||
compare!(min_uniform_buffer_offset_alignment, Greater);
|
||||
compare!(min_storage_buffer_offset_alignment, Greater);
|
||||
compare!(max_inter_stage_shader_components, Less);
|
||||
compare!(max_compute_workgroup_storage_size, Less);
|
||||
compare!(max_compute_invocations_per_workgroup, Less);
|
||||
compare!(max_compute_workgroup_size_x, Less);
|
||||
compare!(max_compute_workgroup_size_y, Less);
|
||||
compare!(max_compute_workgroup_size_z, Less);
|
||||
compare!(max_compute_workgroups_per_dimension, Less);
|
||||
failed
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,16 @@ impl fmt::Display for NumericDimension {
|
||||
}
|
||||
}
|
||||
|
||||
impl NumericDimension {
|
||||
fn num_components(&self) -> u32 {
|
||||
match *self {
|
||||
Self::Scalar => 1,
|
||||
Self::Vector(size) => size as u32,
|
||||
Self::Matrix(w, h) => w as u32 * h as u32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct NumericType {
|
||||
dim: NumericDimension,
|
||||
@ -226,9 +236,15 @@ pub enum StageError {
|
||||
#[error("shader module is invalid")]
|
||||
InvalidModule,
|
||||
#[error(
|
||||
"shader entry point current workgroup size {current:?} must be less or equal to {limit:?}"
|
||||
"shader entry point current workgroup size {current:?} must be less or equal to {limit:?} of total {total}"
|
||||
)]
|
||||
InvalidComputeEntryPoint { current: [u32; 3], limit: [u32; 3] },
|
||||
InvalidWorkgroupSize {
|
||||
current: [u32; 3],
|
||||
limit: [u32; 3],
|
||||
total: u32,
|
||||
},
|
||||
#[error("shader uses {used} inter-stage components above the limit of {limit}")]
|
||||
TooManyVaryings { used: u32, limit: u32 },
|
||||
#[error("unable to find entry point '{0}'")]
|
||||
MissingEntryPoint(String),
|
||||
#[error("shader global {0:?} is not available in the layout pipeline layout")]
|
||||
@ -1083,18 +1099,24 @@ impl Interface {
|
||||
self.limits.max_compute_workgroup_size_y,
|
||||
self.limits.max_compute_workgroup_size_z,
|
||||
];
|
||||
let total_invocations = entry_point.workgroup_size.iter().product::<u32>();
|
||||
|
||||
if entry_point.workgroup_size[0] > max_workgroup_size_limits[0]
|
||||
if entry_point.workgroup_size.iter().any(|&s| s == 0)
|
||||
|| total_invocations > self.limits.max_compute_invocations_per_workgroup
|
||||
|| entry_point.workgroup_size[0] > max_workgroup_size_limits[0]
|
||||
|| entry_point.workgroup_size[1] > max_workgroup_size_limits[1]
|
||||
|| entry_point.workgroup_size[2] > max_workgroup_size_limits[2]
|
||||
{
|
||||
return Err(StageError::InvalidComputeEntryPoint {
|
||||
return Err(StageError::InvalidWorkgroupSize {
|
||||
current: entry_point.workgroup_size,
|
||||
limit: max_workgroup_size_limits,
|
||||
total: self.limits.max_compute_invocations_per_workgroup,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
let mut inter_stage_components = 0;
|
||||
|
||||
// check inputs compatibility
|
||||
for input in entry_point.inputs.iter() {
|
||||
match *input {
|
||||
@ -1104,11 +1126,12 @@ impl Interface {
|
||||
.get(&location)
|
||||
.ok_or(InputError::Missing)
|
||||
.and_then(|provided| {
|
||||
let compatible = match shader_stage {
|
||||
let (compatible, num_components) = match shader_stage {
|
||||
// For vertex attributes, there are defaults filled out
|
||||
// by the driver if data is not provided.
|
||||
naga::ShaderStage::Vertex => {
|
||||
iv.ty.is_compatible_with(&provided.ty)
|
||||
// vertex inputs don't count towards inter-stage
|
||||
(iv.ty.is_compatible_with(&provided.ty), 0)
|
||||
}
|
||||
naga::ShaderStage::Fragment => {
|
||||
if iv.interpolation != provided.interpolation {
|
||||
@ -1121,28 +1144,53 @@ impl Interface {
|
||||
provided.sampling,
|
||||
));
|
||||
}
|
||||
iv.ty.is_subtype_of(&provided.ty)
|
||||
(
|
||||
iv.ty.is_subtype_of(&provided.ty),
|
||||
iv.ty.dim.num_components(),
|
||||
)
|
||||
}
|
||||
naga::ShaderStage::Compute => false,
|
||||
naga::ShaderStage::Compute => (false, 0),
|
||||
};
|
||||
if compatible {
|
||||
Ok(())
|
||||
Ok(num_components)
|
||||
} else {
|
||||
Err(InputError::WrongType(provided.ty))
|
||||
}
|
||||
});
|
||||
if let Err(error) = result {
|
||||
return Err(StageError::Input {
|
||||
location,
|
||||
var: iv.clone(),
|
||||
error,
|
||||
});
|
||||
match result {
|
||||
Ok(num_components) => {
|
||||
inter_stage_components += num_components;
|
||||
}
|
||||
Err(error) => {
|
||||
return Err(StageError::Input {
|
||||
location,
|
||||
var: iv.clone(),
|
||||
error,
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
Varying::BuiltIn(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
if shader_stage == naga::ShaderStage::Vertex {
|
||||
for output in entry_point.outputs.iter() {
|
||||
//TODO: count builtins towards the limit?
|
||||
inter_stage_components += match *output {
|
||||
Varying::Local { ref iv, .. } => iv.ty.dim.num_components(),
|
||||
Varying::BuiltIn(_) => 0,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if inter_stage_components > self.limits.max_inter_stage_shader_components {
|
||||
return Err(StageError::TooManyVaryings {
|
||||
used: inter_stage_components,
|
||||
limit: self.limits.max_inter_stage_shader_components,
|
||||
});
|
||||
}
|
||||
|
||||
let outputs = entry_point
|
||||
.outputs
|
||||
.iter()
|
||||
|
@ -247,12 +247,15 @@ impl super::Adapter {
|
||||
min_uniform_buffer_offset_alignment:
|
||||
d3d12::D3D12_CONSTANT_BUFFER_DATA_PLACEMENT_ALIGNMENT,
|
||||
min_storage_buffer_offset_alignment: 4,
|
||||
max_inter_stage_shader_components: base.max_inter_stage_shader_components,
|
||||
max_compute_workgroup_storage_size: base.max_compute_workgroup_storage_size, //TODO?
|
||||
max_compute_invocations_per_workgroup:
|
||||
d3d12::D3D12_CS_4_X_THREAD_GROUP_MAX_THREADS_PER_GROUP,
|
||||
max_compute_workgroup_size_x: d3d12::D3D12_CS_THREAD_GROUP_MAX_X,
|
||||
max_compute_workgroup_size_y: d3d12::D3D12_CS_THREAD_GROUP_MAX_Y,
|
||||
max_compute_workgroup_size_z: d3d12::D3D12_CS_THREAD_GROUP_MAX_Z,
|
||||
max_compute_workgroups_per_dimension:
|
||||
d3d12::D3D12_CS_DISPATCH_MAX_THREAD_GROUPS_PER_DIMENSION,
|
||||
// TODO?
|
||||
},
|
||||
alignments: crate::Alignments {
|
||||
buffer_copy_offset: wgt::BufferSize::new(
|
||||
|
@ -387,6 +387,18 @@ impl super::Adapter {
|
||||
max_push_constant_size: 0,
|
||||
min_uniform_buffer_offset_alignment,
|
||||
min_storage_buffer_offset_alignment,
|
||||
max_inter_stage_shader_components: gl.get_parameter_i32(glow::MAX_VARYING_COMPONENTS)
|
||||
as u32,
|
||||
max_compute_workgroup_storage_size: if supports_work_group_params {
|
||||
gl.get_parameter_i32(glow::MAX_COMPUTE_SHARED_MEMORY_SIZE) as u32
|
||||
} else {
|
||||
0
|
||||
},
|
||||
max_compute_invocations_per_workgroup: if supports_work_group_params {
|
||||
gl.get_parameter_i32(glow::MAX_COMPUTE_WORK_GROUP_INVOCATIONS) as u32
|
||||
} else {
|
||||
0
|
||||
},
|
||||
max_compute_workgroup_size_x: if supports_work_group_params {
|
||||
gl.get_parameter_indexed_i32(glow::MAX_COMPUTE_WORK_GROUP_SIZE, 0) as u32
|
||||
} else {
|
||||
|
@ -787,6 +787,30 @@ impl super::PrivateCapabilities {
|
||||
} else {
|
||||
4
|
||||
},
|
||||
max_varying_components: if Self::supports_any(
|
||||
device,
|
||||
&[
|
||||
MTLFeatureSet::macOS_GPUFamily1_v1,
|
||||
MTLFeatureSet::macOS_GPUFamily2_v1,
|
||||
],
|
||||
) {
|
||||
128
|
||||
} else {
|
||||
60
|
||||
},
|
||||
max_threads_per_group: if Self::supports_any(
|
||||
device,
|
||||
&[
|
||||
MTLFeatureSet::iOS_GPUFamily4_v2,
|
||||
MTLFeatureSet::iOS_GPUFamily5_v1,
|
||||
MTLFeatureSet::macOS_GPUFamily1_v1,
|
||||
MTLFeatureSet::macOS_GPUFamily2_v1,
|
||||
],
|
||||
) {
|
||||
1024
|
||||
} else {
|
||||
512
|
||||
},
|
||||
max_total_threadgroup_memory: if Self::supports_any(
|
||||
device,
|
||||
&[
|
||||
@ -949,6 +973,9 @@ impl super::PrivateCapabilities {
|
||||
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
|
||||
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
|
||||
//TODO: double-check how these match Metal feature set tables
|
||||
max_inter_stage_shader_components: self.max_varying_components,
|
||||
max_compute_workgroup_storage_size: self.max_total_threadgroup_memory,
|
||||
max_compute_invocations_per_workgroup: self.max_threads_per_group,
|
||||
max_compute_workgroup_size_x: 256,
|
||||
max_compute_workgroup_size_y: 256,
|
||||
max_compute_workgroup_size_z: 64,
|
||||
|
@ -212,6 +212,8 @@ struct PrivateCapabilities {
|
||||
max_texture_layers: u64,
|
||||
max_fragment_input_components: u64,
|
||||
max_color_render_targets: u8,
|
||||
max_varying_components: u32,
|
||||
max_threads_per_group: u32,
|
||||
max_total_threadgroup_memory: u32,
|
||||
sample_count_mask: u8,
|
||||
supports_debug_markers: bool,
|
||||
|
@ -696,6 +696,11 @@ impl PhysicalDeviceCapabilities {
|
||||
max_push_constant_size: limits.max_push_constants_size,
|
||||
min_uniform_buffer_offset_alignment: limits.min_uniform_buffer_offset_alignment as u32,
|
||||
min_storage_buffer_offset_alignment: limits.min_storage_buffer_offset_alignment as u32,
|
||||
max_inter_stage_shader_components: limits
|
||||
.max_vertex_output_components
|
||||
.min(limits.max_fragment_input_components),
|
||||
max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size,
|
||||
max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations,
|
||||
max_compute_workgroup_size_x: max_compute_workgroup_sizes[0],
|
||||
max_compute_workgroup_size_y: max_compute_workgroup_sizes[1],
|
||||
max_compute_workgroup_size_z: max_compute_workgroup_sizes[2],
|
||||
|
@ -28,6 +28,7 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
println!("\tLimits:");
|
||||
let wgpu::Limits {
|
||||
max_texture_dimension_1d,
|
||||
@ -50,10 +51,13 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
|
||||
max_push_constant_size,
|
||||
min_uniform_buffer_offset_alignment,
|
||||
min_storage_buffer_offset_alignment,
|
||||
max_inter_stage_shader_components,
|
||||
max_compute_workgroup_storage_size,
|
||||
max_compute_invocations_per_workgroup,
|
||||
max_compute_workgroup_size_x,
|
||||
max_compute_workgroup_size_y,
|
||||
max_compute_workgroup_size_z,
|
||||
max_compute_workgroups_per_dimension
|
||||
max_compute_workgroups_per_dimension,
|
||||
} = limits;
|
||||
println!("\t\tMax Texture Dimension 1d: {}", max_texture_dimension_1d);
|
||||
println!("\t\tMax Texture Dimension 2d: {}", max_texture_dimension_2d);
|
||||
@ -75,10 +79,14 @@ fn print_info_from_adapter(adapter: &wgpu::Adapter, idx: usize) {
|
||||
println!("\t\tMax Push Constant Size: {}", max_push_constant_size);
|
||||
println!("\t\tMin Uniform Buffer Offset Alignment: {}", min_uniform_buffer_offset_alignment);
|
||||
println!("\t\tMin Storage Buffer Offset Alignment: {}", min_storage_buffer_offset_alignment);
|
||||
println!("\t\tMax Inter-Stage Shader Component: {}", max_inter_stage_shader_components);
|
||||
println!("\t\tMax Compute Workgroup Storage Size: {}", max_compute_workgroup_storage_size);
|
||||
println!("\t\tMax Compute Invocations Per Workgroup: {}", max_compute_invocations_per_workgroup);
|
||||
println!("\t\tMax Compute Workgroup Size X: {}", max_compute_workgroup_size_x);
|
||||
println!("\t\tMax Compute Workgroup Size Y: {}", max_compute_workgroup_size_y);
|
||||
println!("\t\tMax Compute Workgroup Size Z: {}", max_compute_workgroup_size_z);
|
||||
println!("\t\tMax Compute Workgroups Per Dimension: {}", max_compute_workgroups_per_dimension);
|
||||
println!("\t\tMax Compute Workgroups Per Dimension: {}", max_compute_workgroups_per_dimension);
|
||||
|
||||
println!("\tDownlevel Properties:");
|
||||
let wgpu::DownlevelCapabilities {
|
||||
shader_model,
|
||||
|
@ -657,7 +657,13 @@ pub struct Limits {
|
||||
/// when creating a `BindGroup`, or for `set_bind_group` `dynamicOffsets`.
|
||||
/// Defaults to 256. Lower is "better".
|
||||
pub min_storage_buffer_offset_alignment: u32,
|
||||
|
||||
/// Maximum allowed number of components (scalars) of input or output locations for
|
||||
/// inter-stage communication (vertex outputs to fragment inputs).
|
||||
pub max_inter_stage_shader_components: u32,
|
||||
/// Maximum number of bytes used for workgroup memory in a compute entry point.
|
||||
pub max_compute_workgroup_storage_size: u32,
|
||||
/// Maximum value of the product of the `workgroup_size` dimensions for a compute entry-point.
|
||||
pub max_compute_invocations_per_workgroup: u32,
|
||||
/// The maximum value of the workgroup_size X dimension for a compute stage `ShaderModule` entry-point.
|
||||
/// Defaults to 256.
|
||||
pub max_compute_workgroup_size_x: u32,
|
||||
@ -695,6 +701,9 @@ impl Default for Limits {
|
||||
max_push_constant_size: 0,
|
||||
min_uniform_buffer_offset_alignment: 256,
|
||||
min_storage_buffer_offset_alignment: 256,
|
||||
max_inter_stage_shader_components: 60,
|
||||
max_compute_workgroup_storage_size: 16352,
|
||||
max_compute_invocations_per_workgroup: 256,
|
||||
max_compute_workgroup_size_x: 256,
|
||||
max_compute_workgroup_size_y: 256,
|
||||
max_compute_workgroup_size_z: 64,
|
||||
@ -727,6 +736,9 @@ impl Limits {
|
||||
max_push_constant_size: 0,
|
||||
min_uniform_buffer_offset_alignment: 256,
|
||||
min_storage_buffer_offset_alignment: 256,
|
||||
max_inter_stage_shader_components: 60,
|
||||
max_compute_workgroup_storage_size: 16352,
|
||||
max_compute_invocations_per_workgroup: 256,
|
||||
max_compute_workgroup_size_x: 256,
|
||||
max_compute_workgroup_size_y: 256,
|
||||
max_compute_workgroup_size_z: 64,
|
||||
@ -743,6 +755,12 @@ impl Limits {
|
||||
max_dynamic_storage_buffers_per_pipeline_layout: 0,
|
||||
max_storage_buffer_binding_size: 0,
|
||||
max_vertex_buffer_array_stride: 255,
|
||||
max_compute_workgroup_storage_size: 0,
|
||||
max_compute_invocations_per_workgroup: 0,
|
||||
max_compute_workgroup_size_x: 0,
|
||||
max_compute_workgroup_size_y: 0,
|
||||
max_compute_workgroup_size_z: 0,
|
||||
max_compute_workgroups_per_dimension: 0,
|
||||
|
||||
// Most of the values should be the same as the downlevel defaults
|
||||
..Self::downlevel_defaults()
|
||||
|
Loading…
Reference in New Issue
Block a user