From c8bcc50ed618e972ce16cfeea7ed2d615715debb Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Thu, 9 Jul 2020 02:11:16 -0400 Subject: [PATCH] Implement PUSH_CONSTANTS feature --- player/src/main.rs | 4 +- wgpu-core/src/binding_model.rs | 112 +++++++++++++++++++++- wgpu-core/src/command/bind.rs | 51 ++++++++++ wgpu-core/src/command/bundle.rs | 156 +++++++++++++++++++++++++++++-- wgpu-core/src/command/compute.rs | 70 ++++++++++++++ wgpu-core/src/command/mod.rs | 27 ++++++ wgpu-core/src/command/render.rs | 89 +++++++++++++++++- wgpu-core/src/device/mod.rs | 101 ++++++++++++++++---- wgpu-core/src/device/trace.rs | 1 + wgpu-core/src/instance.rs | 12 ++- wgpu-core/src/lib.rs | 2 +- wgpu-types/src/lib.rs | 57 ++++++++++- 12 files changed, 646 insertions(+), 36 deletions(-) diff --git a/player/src/main.rs b/player/src/main.rs index a2fa2bd4e..f172587b2 100644 --- a/player/src/main.rs +++ b/player/src/main.rs @@ -223,12 +223,14 @@ impl GlobalExt for wgc::hub::Global { A::CreatePipelineLayout { id, bind_group_layouts, + push_constant_ranges, } => { self.device_maintain_ids::(device); self.device_create_pipeline_layout::( device, - &wgc::binding_model::PipelineLayoutDescriptor { + &wgt::PipelineLayoutDescriptor { bind_group_layouts: &bind_group_layouts, + push_constant_ranges: &push_constant_ranges, }, id, ) diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index d7733dd80..aa7f0fb89 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -3,6 +3,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ use crate::{ + device::SHADER_STAGE_COUNT, id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId}, track::{TrackerSet, DUMMY_SELECTOR}, FastHashMap, LifeGuard, MultiRefCount, RefCount, Stored, MAX_BIND_GROUPS, @@ -229,12 +230,23 @@ pub struct BindGroupLayout { pub(crate) count_validator: BindingTypeMaxCountValidator, } -pub type PipelineLayoutDescriptor<'a> = wgt::PipelineLayoutDescriptor<'a, BindGroupLayoutId>; - #[derive(Clone, Debug)] pub enum PipelineLayoutError { TooManyGroups(usize), TooManyBindings(BindingTypeMaxCountError), + PushConstantRangeTooLarge { index: usize }, + MoreThanOnePushConstantRangePerStage { index: usize }, + MisalignedPushConstantRange { index: usize }, + MissingFeature(wgt::Features), +} + +#[derive(Clone, Debug)] +pub enum PushConstantUploadError { + TooLarge, + PartialRangeMatch, + MissingStages, + UnmatchedStages, + Unaligned, } #[derive(Debug)] @@ -243,6 +255,102 @@ pub struct PipelineLayout { pub(crate) device_id: Stored, pub(crate) life_guard: LifeGuard, pub(crate) bind_group_layout_ids: ArrayVec<[Stored; MAX_BIND_GROUPS]>, + pub(crate) push_constant_ranges: ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT]>, +} + +impl PipelineLayout { + /// Validate push constants match up with expected ranges. + pub(crate) fn validate_push_constant_ranges( + &self, + stages: wgt::ShaderStage, + offset: u32, + end_offset: u32, + ) -> Result<(), PushConstantUploadError> { + // Don't need to validate size against the push constant size limit here, + // as push constant ranges are already validated to be within bounds, + // and we validate that they are within the ranges. + + if offset % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + log::error!( + "Provided push constant offset {} must be aligned to {}", + offset, + wgt::PUSH_CONSTANT_ALIGNMENT + ); + return Err(PushConstantUploadError::Unaligned); + } + + // Push constant validation looks very complicated on the surface, but + // the problem can be range-reduced pretty well. + // + // Push constants require (summarized from the vulkan spec): + // 1. For each byte in the range and for each shader stage in stageFlags, + // there must be a push constant range in the layout that includes that + // byte and that stage. + // 2. For each byte in the range and for each push constant range that overlaps that byte, + // `stage` must include all stages in that push constant range’s `stage`. + // + // However there are some additional constraints that help us: + // 3. All push constant ranges are the only range that can access that stage. + // i.e. if one range has VERTEX, no other range has VERTEX + // + // Therefore we can simplify the checks in the following ways: + // - Because 3 guarantees that the push constant range has a unique stage, + // when we check for 1, we can simply check that our entire updated range + // is within a push constant range. i.e. our range for a specific stage cannot + // intersect more than one push constant range. + let mut used_stages = wgt::ShaderStage::NONE; + for (idx, range) in self.push_constant_ranges.iter().enumerate() { + // contains not intersects due to 2 + if stages.contains(range.stages) { + if !(range.range.start <= offset && end_offset <= range.range.end) { + log::error!( + "Provided push constant with indices {}..{} overruns matching push constant range (index {}) with stage(s) {:?} and indices {}..{}", + offset, + end_offset, + idx, + range.stages, + range.range.start, + range.range.end, + ); + return Err(PushConstantUploadError::TooLarge); + } + used_stages |= range.stages; + } else if stages.intersects(range.stages) { + // Will be caught by used stages check below, but we can do this because of 1 + // and is more helpful to the user. + log::error!( + "Provided push constant is for stage(s) {:?}, stage with a partial match found at index {} with stage(s) {:?}, however push constants must be complete matches.", + stages, + idx, + range.stages, + ); + return Err(PushConstantUploadError::PartialRangeMatch); + } + + // The push constant range intersects range we are uploading + if offset < range.range.end && range.range.start < end_offset { + // But requires stages we don't provide + if !stages.contains(range.stages) { + log::error!( + "Provided push constant is for stage(s) {:?}, but intersects a push constant range (at index {}) with stage(s) {:?}. Push constants must provide the stages for all ranges they intersect.", + stages, + idx, + range.stages, + ); + return Err(PushConstantUploadError::MissingStages); + } + } + } + if used_stages != stages { + log::error!( + "Provided push constant is for stage(s) {:?}, however the pipeline layout has no push constant range for the stage(s) {:?}.", + stages, + stages - used_stages + ); + return Err(PushConstantUploadError::UnmatchedStages); + } + Ok(()) + } } #[repr(C)] diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs index 31a45afe1..c38f67045 100644 --- a/wgpu-core/src/command/bind.rs +++ b/wgpu-core/src/command/bind.rs @@ -4,6 +4,7 @@ use crate::{ binding_model::BindGroup, + device::SHADER_STAGE_COUNT, hub::GfxBackend, id::{BindGroupId, BindGroupLayoutId, PipelineLayoutId}, Stored, MAX_BIND_GROUPS, @@ -213,3 +214,53 @@ impl Binder { .unwrap_or_else(|| self.entries.len()) } } + +struct PushConstantChange { + stages: wgt::ShaderStage, + offset: u32, + enable: bool, +} + +/// Break up possibly overlapping push constant ranges into a set of non-overlapping ranges +/// which contain all the stage flags of the original ranges. This allows us to zero out (or write any value) +/// to every possible value. +pub fn compute_nonoverlapping_ranges( + ranges: &[wgt::PushConstantRange], +) -> ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT * 2]> { + if ranges.is_empty() { + return ArrayVec::new(); + } + debug_assert!(ranges.len() <= SHADER_STAGE_COUNT); + + let mut breaks: ArrayVec<[PushConstantChange; SHADER_STAGE_COUNT * 2]> = ArrayVec::new(); + for range in ranges { + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.start, + enable: true, + }); + breaks.push(PushConstantChange { + stages: range.stages, + offset: range.range.end, + enable: false, + }); + } + breaks.sort_unstable_by_key(|change| change.offset); + + let mut output_ranges = ArrayVec::new(); + let mut position = 0_u32; + let mut stages = wgt::ShaderStage::NONE; + + for bk in breaks { + if bk.offset - position > 0 && !stages.is_empty() { + output_ranges.push(wgt::PushConstantRange { + stages, + range: position..bk.offset, + }) + } + position = bk.offset; + stages.set(bk.stages, bk.enable); + } + + output_ranges +} diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index e5aa06391..266681485 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -39,7 +39,7 @@ use crate::{ command::{BasePass, RenderCommand}, conv, - device::{AttachmentData, Label, RenderPassContext, MAX_VERTEX_BUFFERS}, + device::{AttachmentData, Label, RenderPassContext, MAX_VERTEX_BUFFERS, SHADER_STAGE_COUNT}, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Input, Storage, Token}, id, resource::BufferUse, @@ -179,6 +179,41 @@ impl RenderBundle { }; comb.bind_vertex_buffers(slot, iter::once((&buffer.raw, range))); } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id + .expect("Must have a pipeline bound to use push constants")]; + + if let Some(values_offset) = values_offset { + let values_end_offset = (values_offset + size_bytes / 4) as usize; + let data_slice = &self.base.push_constant_data + [(values_offset as usize)..values_end_offset]; + + comb.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(stages), + offset, + &data_slice, + ) + } else { + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| { + comb.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(stages), + clear_offset, + clear_data, + ); + }, + ); + } + } RenderCommand::Draw { vertex_count, instance_count, @@ -372,12 +407,37 @@ impl BindState { } } +#[derive(Debug)] +struct PushConstantState { + ranges: ArrayVec<[wgt::PushConstantRange; SHADER_STAGE_COUNT]>, + is_dirty: bool, +} +impl PushConstantState { + fn new() -> Self { + Self { + ranges: ArrayVec::new(), + is_dirty: false, + } + } + + fn set_push_constants(&mut self, new_ranges: &[wgt::PushConstantRange]) -> bool { + if &*self.ranges != new_ranges { + self.ranges = new_ranges.iter().cloned().collect(); + self.is_dirty = true; + true + } else { + false + } + } +} + #[derive(Debug)] struct State { trackers: TrackerSet, index: IndexState, vertex: ArrayVec<[VertexState; MAX_VERTEX_BUFFERS]>, bind: ArrayVec<[BindState; MAX_BIND_GROUPS]>, + push_constant_ranges: PushConstantState, raw_dynamic_offsets: Vec, flat_dynamic_offsets: Vec, used_bind_groups: usize, @@ -431,6 +491,7 @@ impl State { index_format: wgt::IndexFormat, vertex_strides: &[(wgt::BufferAddress, wgt::InputStepMode)], layout_ids: &[Stored], + push_constant_layouts: &[wgt::PushConstantRange], ) { self.index.set_format(index_format); for (vs, &(stride, step_mode)) in self.vertex.iter_mut().zip(vertex_strides) { @@ -440,20 +501,50 @@ impl State { vs.is_dirty = true; } } + + let push_constants_changed = self + .push_constant_ranges + .set_push_constants(push_constant_layouts); + self.used_bind_groups = layout_ids.len(); - let invalid_from = self - .bind - .iter() - .zip(layout_ids) - .position(|(bs, layout_id)| match bs.bind_group { - Some((_, bgl_id)) => bgl_id != layout_id.value, - None => false, - }); + let invalid_from = if push_constants_changed { + Some(0) + } else { + self.bind + .iter() + .zip(layout_ids) + .position(|(bs, layout_id)| match bs.bind_group { + Some((_, bgl_id)) => bgl_id != layout_id.value, + None => false, + }) + }; if let Some(slot) = invalid_from { self.invalidate_group_from(slot); } } + fn flush_push_constants(&mut self) -> Option> { + let is_dirty = self.push_constant_ranges.is_dirty; + + if is_dirty { + let nonoverlapping_ranges = + super::bind::compute_nonoverlapping_ranges(&self.push_constant_ranges.ranges); + + Some( + nonoverlapping_ranges + .into_iter() + .map(|range| RenderCommand::SetPushConstant { + stages: range.stages, + offset: range.range.start, + size_bytes: range.range.end - range.range.start, + values_offset: None, + }), + ) + } else { + None + } + } + fn flush_vertices(&mut self) -> impl Iterator + '_ { self.vertex .iter_mut() @@ -514,12 +605,14 @@ impl Global { .map(|_| VertexState::new()) .collect(), bind: (0..MAX_BIND_GROUPS).map(|_| BindState::new()).collect(), + push_constant_ranges: PushConstantState::new(), raw_dynamic_offsets: Vec::new(), flat_dynamic_offsets: Vec::new(), used_bind_groups: 0, }; let mut commands = Vec::new(); let mut base = bundle_encoder.base.as_ref(); + let mut pipeline_layout_id = None::; for &command in base.commands { match command { @@ -572,13 +665,18 @@ impl Global { //TODO: check read-only depth let layout = &pipeline_layout_guard[pipeline.layout_id.value]; + pipeline_layout_id = Some(pipeline.layout_id.value); state.set_pipeline( pipeline.index_format, &pipeline.vertex_strides, &layout.bind_group_layout_ids, + &layout.push_constant_ranges, ); commands.push(command); + if let Some(iter) = state.flush_push_constants() { + commands.extend(iter) + } } RenderCommand::SetIndexBuffer { buffer_id, @@ -621,6 +719,23 @@ impl Global { }; state.vertex[slot as usize].set_buffer(buffer_id, offset..end); } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: _, + } => { + let end_offset = offset + size_bytes; + + let pipeline_layout = &pipeline_layout_guard[pipeline_layout_id + .expect("Must have a pipeline bound to use push constants")]; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset) + .unwrap(); + + commands.push(command); + } RenderCommand::Draw { vertex_count, instance_count, @@ -737,6 +852,7 @@ impl Global { commands, dynamic_offsets: state.flat_dynamic_offsets, string_data: Vec::new(), + push_constant_data: Vec::new(), }, device_id: Stored { value: bundle_encoder.parent_id, @@ -854,6 +970,28 @@ pub mod bundle_ffi { }); } + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_bundle_set_push_constants( + pass: &mut RenderBundleEncoder, + stages: wgt::ShaderStage, + offset: u32, + size_bytes: u32, + data: *const u32, + ) { + span!(_guard, DEBUG, "RenderBundle::set_push_constants"); + let data_slice = slice::from_raw_parts(data, (size_bytes / 4) as usize); + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderBundle.", + ); + pass.base.push_constant_data.extend_from_slice(data_slice); + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + #[no_mangle] pub extern "C" fn wgpu_render_bundle_draw( bundle: &mut RenderBundleEncoder, diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index a06c337ad..9051ac62e 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -36,6 +36,11 @@ pub enum ComputeCommand { bind_group_id: id::BindGroupId, }, SetPipeline(id::ComputePipelineId), + SetPushConstant { + offset: u32, + size_bytes: u32, + values_offset: u32, + }, Dispatch([u32; 3]), DispatchIndirect { buffer_id: id::BufferId, @@ -257,8 +262,53 @@ impl Global { } } } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline_layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.push_compute_constants( + &pipeline_layout.raw, + clear_offset, + clear_data, + ); + }, + ); + } } } + ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset, + } => { + let end_offset_bytes = offset + size_bytes; + let values_end_offset = (values_offset + size_bytes / 4) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout = &pipeline_layout_guard[state + .binder + .pipeline_layout_id + .expect("Must have a pipeline bound to use push constants")]; + + pipeline_layout + .validate_push_constant_ranges( + wgt::ShaderStage::COMPUTE, + offset, + end_offset_bytes, + ) + .unwrap(); + + unsafe { raw.push_compute_constants(&pipeline_layout.raw, offset, data_slice) } + } ComputeCommand::Dispatch(groups) => { assert_eq!( state.pipeline, @@ -365,6 +415,26 @@ pub mod compute_ffi { .push(ComputeCommand::SetPipeline(pipeline_id)); } + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_set_push_constant( + pass: &mut ComputePass, + offset: u32, + size_bytes: u32, + data: *const u32, + ) { + span!(_guard, DEBUG, "RenderPass::set_push_constant"); + let data_slice = slice::from_raw_parts(data, (size_bytes / 4) as usize); + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per ComputePass.", + ); + pass.base.push_constant_data.extend_from_slice(data_slice); + pass.base.commands.push(ComputeCommand::SetPushConstant { + offset, + size_bytes, + values_offset: value_offset, + }); + } + #[no_mangle] pub extern "C" fn wgpu_compute_pass_dispatch( pass: &mut ComputePass, diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index e8ab91ac0..423b965bd 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -29,6 +29,8 @@ use hal::command::CommandBuffer as _; use std::thread::ThreadId; +const PUSH_CONSTANT_CLEAR_ARRAY: &[u32] = &[0_u32; 64]; + #[derive(Debug)] pub struct CommandBuffer { pub(crate) raw: Vec, @@ -89,6 +91,7 @@ pub struct BasePassRef<'a, C> { pub commands: &'a [C], pub dynamic_offsets: &'a [wgt::DynamicOffset], pub string_data: &'a [u8], + pub push_constant_data: &'a [u32], } #[doc(hidden)] @@ -105,6 +108,7 @@ pub struct BasePass { pub commands: Vec, pub dynamic_offsets: Vec, pub string_data: Vec, + pub push_constant_data: Vec, } impl BasePass { @@ -113,6 +117,7 @@ impl BasePass { commands: Vec::new(), dynamic_offsets: Vec::new(), string_data: Vec::new(), + push_constant_data: Vec::new(), } } @@ -122,6 +127,7 @@ impl BasePass { commands: base.commands.to_vec(), dynamic_offsets: base.dynamic_offsets.to_vec(), string_data: base.string_data.to_vec(), + push_constant_data: base.push_constant_data.to_vec(), } } @@ -130,6 +136,7 @@ impl BasePass { commands: &self.commands, dynamic_offsets: &self.dynamic_offsets, string_data: &self.string_data, + push_constant_data: &self.push_constant_data, } } } @@ -215,3 +222,23 @@ impl Global { } } } + +fn push_constant_clear(offset: u32, size_bytes: u32, mut push_fn: PushFn) +where + PushFn: FnMut(u32, &[u32]), +{ + let mut count_words = 0_u32; + let size_words = size_bytes / 4; + while count_words < size_words { + let count_bytes = count_words * 4; + let size_to_write_words = + (size_words - count_words).min(PUSH_CONSTANT_CLEAR_ARRAY.len() as u32); + + push_fn( + offset + count_bytes, + &PUSH_CONSTANT_CLEAR_ARRAY[0..size_to_write_words as usize], + ); + + count_words += size_to_write_words; + } +} diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 94475d295..4110078b6 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -107,6 +107,15 @@ pub enum RenderCommand { depth_max: f32, }, SetScissor(Rect), + SetPushConstant { + stages: wgt::ShaderStage, + offset: u32, + size_bytes: u32, + /// None means there is no data and the data should be an array of zeros. + /// + /// Facilitates clears in renderbundles which explicitly do their clears. + values_offset: Option, + }, Draw { vertex_count: u32, instance_count: u32, @@ -174,12 +183,13 @@ impl fmt::Debug for RenderPass { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { write!( f, - "RenderPass {{ encoder_id: {:?}, color_targets: {:?}, depth_stencil_target: {:?}, data: {:?} commands and {:?} dynamic offsets }}", + "RenderPass {{ encoder_id: {:?}, color_targets: {:?}, depth_stencil_target: {:?}, data: {:?} commands, {:?} dynamic offsets, and {:?} push constant u32s }}", self.parent_id, self.color_targets, self.depth_stencil_target, self.base.commands.len(), - self.base.dynamic_offsets.len() + self.base.dynamic_offsets.len(), + self.base.push_constant_data.len(), ) } } @@ -976,6 +986,27 @@ impl Global { } } } + + // Clear push constant ranges + let non_overlapping = super::bind::compute_nonoverlapping_ranges( + &pipeline_layout.push_constant_ranges, + ); + for range in non_overlapping { + let offset = range.range.start; + let size_bytes = range.range.end - offset; + super::push_constant_clear( + offset, + size_bytes, + |clear_offset, clear_data| unsafe { + raw.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(range.stages), + clear_offset, + clear_data, + ); + }, + ); + } } // Rebind index buffer if the index format has changed with the pipeline switch @@ -1112,6 +1143,38 @@ impl Global { ); } } + RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset, + } => { + let values_offset = values_offset + .expect("values_offset of None is only for internal use in renderbundles"); + + let end_offset_bytes = offset + size_bytes; + let values_end_offset = (values_offset + size_bytes / 4) as usize; + let data_slice = + &base.push_constant_data[(values_offset as usize)..values_end_offset]; + + let pipeline_layout = &pipeline_layout_guard[state + .binder + .pipeline_layout_id + .expect("Must have a pipeline bound to use push constants")]; + + pipeline_layout + .validate_push_constant_ranges(stages, offset, end_offset_bytes) + .unwrap(); + + unsafe { + raw.push_graphics_constants( + &pipeline_layout.raw, + conv::map_shader_stage_flags(stages), + offset, + data_slice, + ) + } + } RenderCommand::SetScissor(ref rect) => { use std::{convert::TryFrom, i16}; let r = hal::pso::Rect { @@ -1573,6 +1636,28 @@ pub mod render_ffi { .push(RenderCommand::SetScissor(Rect { x, y, w, h })); } + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_set_push_constants( + pass: &mut RenderPass, + stages: wgt::ShaderStage, + offset: u32, + size_bytes: u32, + data: *const u32, + ) { + span!(_guard, DEBUG, "RenderPass::set_push_constants"); + let data_slice = slice::from_raw_parts(data, (size_bytes / 4) as usize); + let value_offset = pass.base.push_constant_data.len().try_into().expect( + "Ran out of push constant space. Don't set 4gb of push constants per RenderPass.", + ); + pass.base.push_constant_data.extend_from_slice(data_slice); + pass.base.commands.push(RenderCommand::SetPushConstant { + stages, + offset, + size_bytes, + values_offset: Some(value_offset), + }); + } + #[no_mangle] pub extern "C" fn wgpu_render_pass_draw( pass: &mut RenderPass, diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 4ea92fbc3..10edc64ef 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -24,8 +24,6 @@ use hal::{ use parking_lot::{Mutex, MutexGuard}; use wgt::{BufferAddress, BufferSize, InputStepMode, TextureDimension, TextureFormat}; -#[cfg(feature = "trace")] -use std::slice; use std::{ collections::hash_map::Entry, ffi, iter, marker::PhantomData, mem, ops::Range, ptr, sync::atomic::Ordering, @@ -58,6 +56,7 @@ pub const MAX_COLOR_TARGETS: usize = 4; pub const MAX_MIP_LEVELS: usize = 16; pub const MAX_VERTEX_BUFFERS: usize = 16; pub const MAX_ANISOTROPY: u8 = 16; +pub const SHADER_STAGE_COUNT: usize = 3; pub fn all_buffer_stages() -> hal::pso::PipelineStage { use hal::pso::PipelineStage as Ps; @@ -1349,7 +1348,7 @@ impl Global { pub fn device_create_pipeline_layout( &self, device_id: id::DeviceId, - desc: &binding_model::PipelineLayoutDescriptor, + desc: &wgt::PipelineLayoutDescriptor, id_in: Input, ) -> Result { span!(_guard, INFO, "Device::create_pipeline_layout"); @@ -1359,29 +1358,97 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); let device = &device_guard[device_id]; - let bind_group_layout_ids = desc.bind_group_layouts; - - if bind_group_layout_ids.len() > (device.limits.max_bind_groups as usize) { + if desc.bind_group_layouts.len() > (device.limits.max_bind_groups as usize) { + log::error!( + "Bind group layout count {} exceeds device bind group limit {}", + desc.bind_group_layouts.len(), + device.limits.max_bind_groups + ); return Err(binding_model::PipelineLayoutError::TooManyGroups( - bind_group_layout_ids.len(), + desc.bind_group_layouts.len(), )); } - // TODO: push constants + if !desc.push_constant_ranges.is_empty() + && !device.features.contains(wgt::Features::PUSH_CONSTANTS) + { + return Err(binding_model::PipelineLayoutError::MissingFeature( + wgt::Features::PUSH_CONSTANTS, + )); + } + let mut used_stages = wgt::ShaderStage::empty(); + for (index, pc) in desc.push_constant_ranges.iter().enumerate() { + if pc.stages.intersects(used_stages) { + log::error!( + "Push constant range (index {}) provides for stage(s) {:?} but there exists another range that provides stage(s) {:?}. Each stage may only be provided by one range.", + index, + pc.stages, + pc.stages & used_stages, + ); + return Err( + binding_model::PipelineLayoutError::MoreThanOnePushConstantRangePerStage { + index, + }, + ); + } + used_stages |= pc.stages; + + if device.limits.max_push_constant_size < pc.range.end { + log::error!( + "Push constant range (index {}) has range {}..{} which exceeds device push constant size limit 0..{}", + index, + pc.range.start, + pc.range.end, + device.limits.max_push_constant_size + ); + return Err( + binding_model::PipelineLayoutError::PushConstantRangeTooLarge { index }, + ); + } + + if pc.range.start % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + log::error!( + "Push constant range (index {}) start {} must be aligned to {}", + index, + pc.range.start, + wgt::PUSH_CONSTANT_ALIGNMENT + ); + return Err( + binding_model::PipelineLayoutError::MisalignedPushConstantRange { index }, + ); + } + if pc.range.end % wgt::PUSH_CONSTANT_ALIGNMENT != 0 { + log::error!( + "Push constant range (index {}) end {} must be aligned to {}", + index, + pc.range.end, + wgt::PUSH_CONSTANT_ALIGNMENT + ); + return Err( + binding_model::PipelineLayoutError::MisalignedPushConstantRange { index }, + ); + } + } + let mut count_validator = binding_model::BindingTypeMaxCountValidator::default(); let pipeline_layout = { let (bind_group_layout_guard, _) = hub.bind_group_layouts.read(&mut token); - for &id in bind_group_layout_ids { + for &id in desc.bind_group_layouts { let bind_group_layout = &bind_group_layout_guard[id]; count_validator.merge(&bind_group_layout.count_validator); } - let descriptor_set_layouts = bind_group_layout_ids + let descriptor_set_layouts = desc + .bind_group_layouts .iter() .map(|&id| &bind_group_layout_guard[id].raw); + let push_constants = desc + .push_constant_ranges + .iter() + .map(|pc| (conv::map_shader_stage_flags(pc.stages), pc.range.clone())); unsafe { device .raw - .create_pipeline_layout(descriptor_set_layouts, &[]) + .create_pipeline_layout(descriptor_set_layouts, push_constants) } .unwrap() }; @@ -1398,7 +1465,7 @@ impl Global { life_guard: LifeGuard::new(), bind_group_layout_ids: { let (bind_group_layout_guard, _) = hub.bind_group_layouts.read(&mut token); - bind_group_layout_ids + desc.bind_group_layouts .iter() .map(|&id| Stored { value: id, @@ -1406,6 +1473,7 @@ impl Global { }) .collect() }, + push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(), }; let id = hub @@ -1415,7 +1483,8 @@ impl Global { match device.trace { Some(ref trace) => trace.lock().add(trace::Action::CreatePipelineLayout { id, - bind_group_layouts: bind_group_layout_ids.to_owned(), + bind_group_layouts: desc.bind_group_layouts.to_owned(), + push_constant_ranges: desc.push_constant_ranges.iter().cloned().collect(), }), None => (), }; @@ -1952,7 +2021,7 @@ impl Global { Some(ref trace) => { let mut trace = trace.lock(); let data = trace.make_binary("spv", unsafe { - slice::from_raw_parts(spv.as_ptr() as *const u8, spv.len() * 4) + std::slice::from_raw_parts(spv.as_ptr() as *const u8, spv.len() * 4) }); trace.add(trace::Action::CreateShaderModule { id, data }); } @@ -2908,7 +2977,7 @@ impl Global { Some(ref trace) => { let mut trace = trace.lock(); let data = trace.make_binary("bin", unsafe { - slice::from_raw_parts(ptr.as_ptr(), buffer.size as usize) + std::slice::from_raw_parts(ptr.as_ptr(), buffer.size as usize) }); trace.add(trace::Action::WriteBuffer { id: buffer_id, @@ -2970,7 +3039,7 @@ impl Global { let mut trace = trace.lock(); let size = sub_range.size_to(buffer.size); let data = trace.make_binary("bin", unsafe { - slice::from_raw_parts(ptr.as_ptr(), size as usize) + std::slice::from_raw_parts(ptr.as_ptr(), size as usize) }); trace.add(trace::Action::WriteBuffer { id: buffer_id, diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 95a3894d7..1865a574f 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -166,6 +166,7 @@ pub enum Action { CreatePipelineLayout { id: id::PipelineLayoutId, bind_group_layouts: Vec, + push_constant_ranges: Vec, }, DestroyPipelineLayout(id::PipelineLayoutId), CreateBindGroup { diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index ce859cd48..0dafdbd85 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -25,6 +25,12 @@ use hal::{ }; use std::fmt::Display; +/// Size that is guaranteed to be available in push constants. +/// +/// This is needed because non-vulkan backends might not +/// provide a push-constant size limit. +const MIN_PUSH_CONSTANT_SIZE: u32 = 128; + pub type RequestAdapterOptions = wgt::RequestAdapterOptions; #[derive(Debug)] @@ -118,7 +124,9 @@ impl Adapter { let adapter_features = raw.physical_device.features(); - let mut features = wgt::Features::default() | wgt::Features::MAPPABLE_PRIMARY_BUFFERS; + let mut features = wgt::Features::default() + | wgt::Features::MAPPABLE_PRIMARY_BUFFERS + | wgt::Features::PUSH_CONSTANTS; features.set( wgt::Features::SAMPLED_TEXTURE_BINDING_ARRAY, adapter_features.contains(hal::Features::TEXTURE_DESCRIPTOR_ARRAY), @@ -184,6 +192,8 @@ impl Adapter { .max(default_limits.max_uniform_buffers_per_shader_stage), max_uniform_buffer_binding_size: (adapter_limits.max_uniform_buffer_range as u32) .max(default_limits.max_uniform_buffer_binding_size), + max_push_constant_size: (adapter_limits.max_push_constants_size as u32) + .max(MIN_PUSH_CONSTANT_SIZE), // As an extension, the default is always 0, so define a separate minimum. }; Adapter { diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index 6990c946b..1c1818067 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -54,7 +54,7 @@ use atomic::{AtomicUsize, Ordering}; use std::{os::raw::c_char, ptr}; -const MAX_BIND_GROUPS: usize = 8; +pub const MAX_BIND_GROUPS: usize = 8; type SubmissionIndex = usize; type Index = u32; diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index c8711a9d7..eb75f7662 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -10,6 +10,7 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use std::ops::Range; /// Integral type used for buffer offsets. pub type BufferAddress = u64; @@ -26,6 +27,8 @@ pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256; pub const BIND_BUFFER_ALIGNMENT: BufferAddress = 256; /// Buffer to buffer copy offsets and sizes must be aligned to this number. pub const COPY_BUFFER_ALIGNMENT: BufferAddress = 4; +/// Alignment all push constants need +pub const PUSH_CONSTANT_ALIGNMENT: u32 = 4; /// Backends supported by wgpu. #[repr(u8)] @@ -215,15 +218,32 @@ bitflags::bitflags! { /// /// This allows the use of a buffer containing the actual number of draw calls. /// + /// A block of push constants can be declared with `layout(push_constant) uniform Name {..}` in shaders. + /// /// Supported platforms: /// - DX12 /// - Vulkan 1.2+ (or VK_KHR_draw_indirect_count) /// /// This is a native only feature. const MULTI_DRAW_INDIRECT_COUNT = 0x0000_0000_0040_0000; - /// Features which are part of the upstream webgpu standard + /// Allows the use of push constants: small, fast bits of memory that can be updated + /// inside a [`RenderPass`]. + /// + /// Allows the user to call [`RenderPass::set_push_constants`], provide a non-empty array + /// to [`PipelineLayoutDescriptor`], and provide a non-zero limit to [`Limits::max_push_constant_size`]. + /// + /// Supported platforms: + /// - DX12 + /// - Vulkan + /// - Metal + /// - DX11 (emulated with uniforms) + /// - OpenGL (emulated with uniforms) + /// + /// This is a native only feature. + const PUSH_CONSTANTS = 0x0000_0000_0080_0000; + /// Features which are part of the upstream WebGPU standard. const ALL_WEBGPU = 0x0000_0000_0000_FFFF; - /// Features that are only available when targeting native (not web) + /// Features that are only available when targeting native (not web). const ALL_NATIVE = 0xFFFF_FFFF_FFFF_0000; } } @@ -263,6 +283,16 @@ pub struct Limits { pub max_uniform_buffers_per_shader_stage: u32, /// Maximum size in bytes of a binding to a uniform buffer. Defaults to 16384. Higher is "better". pub max_uniform_buffer_binding_size: u32, + /// Amount of storage available for push constants in bytes. Defaults to 0. Higher is "better". + /// Requesting more than 0 during device creation requires [`Features::PUSH_CONSTANTS`] to be enabled. + /// + /// Expect the size to be: + /// - Vulkan: 128-256 bytes + /// - DX12: 256 bytes + /// - Metal: 4096 bytes + /// - DX11 & OpenGL don't natively support push constants, and are emulated with uniforms, + /// so this number is less useful. + pub max_push_constant_size: u32, } impl Default for Limits { @@ -277,6 +307,7 @@ impl Default for Limits { max_storage_textures_per_shader_stage: 4, max_uniform_buffers_per_shader_stage: 12, max_uniform_buffer_binding_size: 16384, + max_push_constant_size: 0, } } } @@ -305,8 +336,7 @@ bitflags::bitflags! { /// /// `ShaderStage::VERTEX | ShaderStage::FRAGMENT` #[repr(transparent)] - #[cfg_attr(feature = "trace", derive(Serialize))] - #[cfg_attr(feature = "replay", derive(Deserialize))] + #[cfg_attr(feature = "serde", derive(Deserialize, Serialize))] pub struct ShaderStage: u32 { /// Binding is not visible from any shader stage const NONE = 0; @@ -1494,6 +1524,25 @@ pub struct PipelineLayoutDescriptor<'a, B> { /// Bind groups that this pipeline uses. The first entry will provide all the bindings for /// "set = 0", second entry will provide all the bindings for "set = 1" etc. pub bind_group_layouts: &'a [B], + /// Set of push constant ranges this pipeline uses. Each shader stage that uses push constants + /// must define the range in push constant memory that corresponds to its single `layout(push_constant)` + /// uniform block. + /// + /// If this array is non-empty, the [`Features::PUSH_CONSTANTS`] must be enabled. + pub push_constant_ranges: &'a [PushConstantRange], +} + +/// A range of push constant memory to pass to a shader stage. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "trace", derive(Serialize))] +#[cfg_attr(feature = "replay", derive(Deserialize))] +pub struct PushConstantRange { + /// Stage push constant range is visible from. Each stage can only be served by at most one range. + /// One range can serve multiple stages however. + pub stages: ShaderStage, + /// Range in push constant memory to use for the stage. Must be less than [`Limits::max_push_constant_size`]. + /// Start and end must be aligned to the 4s. + pub range: Range, } /// Describes a programmable pipeline stage.