From 7aa57537016c732ca62bbb88bee5674ad29f19f0 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Wed, 10 Jun 2020 09:32:51 -0400 Subject: [PATCH] Re-architecture the bundles using normalized command streams. This is a major change in how the bundles are implemented. Instead of transparently injecting them into the pass command stream, we are now treating bundles as first-class API objects and API tracing them accordingly. The bundle contains a normalized command stream that is very easy to inject into a native command buffer multiple times. --- player/src/main.rs | 54 +-- wgpu-core/src/command/bind.rs | 11 +- wgpu-core/src/command/bundle.rs | 720 +++++++++++++++++++++++++++---- wgpu-core/src/command/compute.rs | 21 +- wgpu-core/src/command/mod.rs | 10 +- wgpu-core/src/command/render.rs | 155 ++++--- wgpu-core/src/device/life.rs | 49 ++- wgpu-core/src/device/mod.rs | 46 +- wgpu-core/src/device/trace.rs | 48 ++- wgpu-core/src/hub.rs | 3 +- wgpu-core/src/pipeline.rs | 1 - wgpu-core/src/track/mod.rs | 10 +- wgpu-types/src/lib.rs | 16 +- 13 files changed, 910 insertions(+), 234 deletions(-) diff --git a/player/src/main.rs b/player/src/main.rs index 3d360eeba..4a7ba5d2b 100644 --- a/player/src/main.rs +++ b/player/src/main.rs @@ -143,19 +143,8 @@ impl GlobalExt for wgc::hub::Global { commands, dynamic_offsets, } => unsafe { - let mut offsets = &dynamic_offsets[..]; let mut pass = wgc::command::RawPass::new_compute(encoder); - for com in commands { - pass.encode(&com); - if let wgc::command::ComputeCommand::SetBindGroup { - num_dynamic_offsets, - .. - } = com - { - pass.encode_slice(&offsets[..num_dynamic_offsets as usize]); - offsets = &offsets[num_dynamic_offsets as usize..]; - } - } + pass.fill_compute_commands(&commands, &dynamic_offsets); let (data, _) = pass.finish_compute(); self.command_encoder_run_compute_pass::(encoder, &data); }, @@ -165,7 +154,6 @@ impl GlobalExt for wgc::hub::Global { commands, dynamic_offsets, } => unsafe { - let mut offsets = &dynamic_offsets[..]; let mut pass = wgc::command::RawPass::new_render( encoder, &wgc::command::RenderPassDescriptor { @@ -174,17 +162,7 @@ impl GlobalExt for wgc::hub::Global { depth_stencil_attachment: target_depth_stencil.as_ref(), }, ); - for com in commands { - pass.encode(&com); - if let wgc::command::RenderCommand::SetBindGroup { - num_dynamic_offsets, - .. - } = com - { - pass.encode_slice(&offsets[..num_dynamic_offsets as usize]); - offsets = &offsets[num_dynamic_offsets as usize..]; - } - } + pass.fill_render_commands(&commands, &dynamic_offsets); let (data, _) = pass.finish_render(); self.command_encoder_run_render_pass::(encoder, &data); }, @@ -408,6 +386,34 @@ impl GlobalExt for wgc::hub::Global { A::DestroyRenderPipeline(id) => { self.render_pipeline_destroy::(id); } + A::CreateRenderBundle { + id, + desc, + commands, + dynamic_offsets, + } => { + let label = Label::new(&desc.label); + let mut bundle_encoder = wgc::command::RenderBundleEncoder::new( + &wgt::RenderBundleEncoderDescriptor { + label: None, + color_formats: &desc.color_formats, + depth_stencil_format: desc.depth_stencil_format, + sample_count: desc.sample_count, + }, + device, + ); + bundle_encoder.fill_commands(&commands, &dynamic_offsets); + self.render_bundle_encoder_finish::( + bundle_encoder, + &wgt::RenderBundleDescriptor { + label: label.as_ptr(), + }, + id, + ); + } + A::DestroyRenderBundle(id) => { + self.render_bundle_destroy::(id); + } A::WriteBuffer { id, data, diff --git a/wgpu-core/src/command/bind.rs b/wgpu-core/src/command/bind.rs index 1c2d5724e..375f0b78b 100644 --- a/wgpu-core/src/command/bind.rs +++ b/wgpu-core/src/command/bind.rs @@ -6,14 +6,13 @@ use crate::{ binding_model::BindGroup, hub::GfxBackend, id::{BindGroupId, BindGroupLayoutId, PipelineLayoutId}, - Stored, + Stored, MAX_BIND_GROUPS, }; -use smallvec::{smallvec, SmallVec}; +use arrayvec::ArrayVec; use std::slice; use wgt::DynamicOffset; -pub const DEFAULT_BIND_GROUPS: usize = 4; type BindGroupMask = u8; #[derive(Clone, Debug)] @@ -134,14 +133,16 @@ impl BindGroupEntry { #[derive(Debug)] pub struct Binder { pub(crate) pipeline_layout_id: Option, //TODO: strongly `Stored` - pub(crate) entries: SmallVec<[BindGroupEntry; DEFAULT_BIND_GROUPS]>, + pub(crate) entries: ArrayVec<[BindGroupEntry; MAX_BIND_GROUPS]>, } impl Binder { pub(crate) fn new(max_bind_groups: u32) -> Self { Self { pipeline_layout_id: None, - entries: smallvec![Default::default(); max_bind_groups as usize], + entries: (0..max_bind_groups) + .map(|_| BindGroupEntry::default()) + .collect(), } } diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 3b27ad4f8..3de385a7c 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -2,39 +2,75 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/*! Render Bundles + + ## Software implementation + + The path from nothing to using a render bundle consists of 3 phases. + + ### Initial command encoding + + User creates a `RenderBundleEncoder` and populates it by issuing commands + from `bundle_ffi` module, just like with `RenderPass`, except that the + set of available commands is reduced. Everything is written into a `RawPass`. + + ### Bundle baking + + Once the commands are encoded, user calls `render_bundle_encoder_finish`. + This is perhaps the most complex part of the logic. It consumes the + commands stored in `RawPass`, while validating everything, tracking the state, + and re-recording the commands into a separate `Vec`. It + doesn't actually execute any commands. + + What's more important, is that the produced vector of commands is "normalized", + which means it can be executed verbatim without any state tracking. More + formally, "normalized" command stream guarantees that any state required by + a draw call is set explicitly by one of the commands between the draw call + and the last changing of the pipeline. + + ### Execution + + When the bundle is used in an actual render pass, `RenderBundle::execute` is + called. It goes through the commands and issues them into the native command + buffer. Thanks to the "normalized" property, it doesn't track any bind group + invalidations or index format changes. +!*/ + use crate::{ - command::{RawPass, RenderCommand}, + command::{PhantomSlice, RawPass, RenderCommand}, conv, - device::{Label, RenderPassContext}, - hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Input, Token}, + device::{AttachmentData, Label, RenderPassContext, MAX_VERTEX_BUFFERS}, + hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Input, Storage, Token}, id, resource::BufferUse, track::TrackerSet, - LifeGuard, RefCount, + LifeGuard, RefCount, Stored, MAX_BIND_GROUPS, }; use arrayvec::ArrayVec; use peek_poke::{Peek, Poke}; +use std::{borrow::Borrow, iter, marker::PhantomData, ops::Range}; #[derive(Debug)] pub struct RenderBundleEncoder { - pub(crate) raw: RawPass, + raw: RawPass, pub(crate) context: RenderPassContext, - pub(crate) sample_count: u8, } impl RenderBundleEncoder { pub fn new(desc: &wgt::RenderBundleEncoderDescriptor, device_id: id::DeviceId) -> Self { RenderBundleEncoder { - raw: RawPass::from_vec::(Vec::with_capacity(1), device_id), + raw: RawPass::new::(device_id), context: RenderPassContext { - colors: desc.color_formats.iter().cloned().collect(), - resolves: ArrayVec::new(), - depth_stencil: desc.depth_stencil_format, - }, - sample_count: { - let sc = desc.sample_count; - assert!(sc != 0 && sc <= 32 && conv::is_power_of_two(sc)); - sc as u8 + attachments: AttachmentData { + colors: desc.color_formats.iter().cloned().collect(), + resolves: ArrayVec::new(), + depth_stencil: desc.depth_stencil_format, + }, + sample_count: { + let sc = desc.sample_count; + assert!(sc != 0 && sc <= 32 && conv::is_power_of_two(sc)); + sc as u8 + }, }, } } @@ -43,6 +79,10 @@ impl RenderBundleEncoder { self.raw.parent } + pub fn fill_commands(&mut self, commands: &[RenderCommand], offsets: &[wgt::DynamicOffset]) { + unsafe { self.raw.fill_render_commands(commands, offsets) } + } + pub fn destroy(mut self) { unsafe { self.raw.invalidate() }; } @@ -51,46 +91,435 @@ impl RenderBundleEncoder { //Note: here, `RenderBundle` is just wrapping a raw stream of render commands. // The plan is to back it by an actual Vulkan secondary buffer, D3D12 Bundle, // or Metal indirect command buffer. -//Note: there is no API tracing support for `RenderBundle` yet. -// It's transparent with regards to the submitted render passes. #[derive(Debug)] pub struct RenderBundle { - pub(crate) device_ref_count: RefCount, - pub(crate) raw: RawPass, - pub(crate) trackers: TrackerSet, + // Normalized command stream. It can be executed verbatim, + // without re-binding anything on the pipeline change. + commands: Vec, + dynamic_offsets: Vec, + pub(crate) device_id: Stored, + pub(crate) used: TrackerSet, pub(crate) context: RenderPassContext, - pub(crate) sample_count: u8, pub(crate) life_guard: LifeGuard, } unsafe impl Send for RenderBundle {} unsafe impl Sync for RenderBundle {} +impl RenderBundle { + /// Actually encode the contents into a native command buffer. + /// + /// This is partially duplicating the logic of `command_encoder_run_render_pass`. + /// However the point of this function is to be lighter, since we already had + /// a chance to go through the commands in `render_bundle_encoder_finish`. + pub(crate) unsafe fn execute( + &self, + comb: &mut B::CommandBuffer, + pipeline_layout_guard: &Storage< + crate::binding_model::PipelineLayout, + id::PipelineLayoutId, + >, + bind_group_guard: &Storage, id::BindGroupId>, + pipeline_guard: &Storage, id::RenderPipelineId>, + buffer_guard: &Storage, id::BufferId>, + ) { + use hal::command::CommandBuffer as _; + + let mut offsets = self.dynamic_offsets.as_slice(); + let mut index_type = hal::IndexType::U16; + let mut pipeline_layout_id = None::; + + for command in self.commands.iter() { + match *command { + RenderCommand::SetBindGroup { + index, + num_dynamic_offsets, + bind_group_id, + phantom_offsets: _, + } => { + let bind_group = &bind_group_guard[bind_group_id]; + comb.bind_graphics_descriptor_sets( + &pipeline_layout_guard[pipeline_layout_id.unwrap()].raw, + index as usize, + iter::once(bind_group.raw.raw()), + &offsets[..num_dynamic_offsets as usize], + ); + offsets = &offsets[num_dynamic_offsets as usize..]; + } + RenderCommand::SetPipeline(pipeline_id) => { + let pipeline = &pipeline_guard[pipeline_id]; + comb.bind_graphics_pipeline(&pipeline.raw); + index_type = conv::map_index_format(pipeline.index_format); + pipeline_layout_id = Some(pipeline.layout_id.value); + } + RenderCommand::SetIndexBuffer { + buffer_id, + offset, + size, + } => { + let buffer = &buffer_guard[buffer_id]; + let view = hal::buffer::IndexBufferView { + buffer: &buffer.raw, + range: hal::buffer::SubRange { + offset, + size: if size != wgt::BufferSize::WHOLE { + Some(size.0) + } else { + None + }, + }, + index_type, + }; + + comb.bind_index_buffer(view); + } + RenderCommand::SetVertexBuffer { + slot, + buffer_id, + offset, + size, + } => { + let buffer = &buffer_guard[buffer_id]; + let range = hal::buffer::SubRange { + offset, + size: if size != wgt::BufferSize::WHOLE { + Some(size.0) + } else { + None + }, + }; + comb.bind_vertex_buffers(slot, iter::once((&buffer.raw, range))); + } + RenderCommand::Draw { + vertex_count, + instance_count, + first_vertex, + first_instance, + } => { + comb.draw( + first_vertex..first_vertex + vertex_count, + first_instance..first_instance + instance_count, + ); + } + RenderCommand::DrawIndexed { + index_count, + instance_count, + first_index, + base_vertex, + first_instance, + } => { + comb.draw_indexed( + first_index..first_index + index_count, + base_vertex, + first_instance..first_instance + instance_count, + ); + } + RenderCommand::DrawIndirect { buffer_id, offset } => { + let buffer = &buffer_guard[buffer_id]; + comb.draw_indirect(&buffer.raw, offset, 1, 0); + } + RenderCommand::DrawIndexedIndirect { buffer_id, offset } => { + let buffer = &buffer_guard[buffer_id]; + comb.draw_indexed_indirect(&buffer.raw, offset, 1, 0); + } + RenderCommand::ExecuteBundle(_) + | RenderCommand::SetBlendColor(_) + | RenderCommand::SetStencilReference(_) + | RenderCommand::SetViewport { .. } + | RenderCommand::SetScissor(_) + | RenderCommand::End => unreachable!(), + } + } + } +} + +impl Borrow for RenderBundle { + fn borrow(&self) -> &RefCount { + self.life_guard.ref_count.as_ref().unwrap() + } +} + +#[derive(Debug)] +struct IndexState { + buffer: Option, + format: wgt::IndexFormat, + range: Range, + is_dirty: bool, +} + +impl IndexState { + fn new() -> Self { + IndexState { + buffer: None, + format: wgt::IndexFormat::default(), + range: 0..0, + is_dirty: false, + } + } + + fn limit(&self) -> u32 { + assert!(self.buffer.is_some()); + let bytes_per_index = match self.format { + wgt::IndexFormat::Uint16 => 2, + wgt::IndexFormat::Uint32 => 4, + }; + ((self.range.end - self.range.start) / bytes_per_index) as u32 + } + + fn flush(&mut self) -> Option { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetIndexBuffer { + buffer_id: self.buffer.unwrap(), + offset: self.range.start, + size: wgt::BufferSize(self.range.end - self.range.start), + }) + } else { + None + } + } + + fn set_format(&mut self, format: wgt::IndexFormat) { + if self.format != format { + self.format = format; + self.is_dirty = true; + } + } + + fn set_buffer(&mut self, id: id::BufferId, range: Range) { + self.buffer = Some(id); + self.range = range; + self.is_dirty = true; + } +} + +#[derive(Debug)] +struct VertexState { + buffer: Option, + range: Range, + stride: wgt::BufferAddress, + rate: wgt::InputStepMode, + is_dirty: bool, +} + +impl VertexState { + fn new() -> Self { + VertexState { + buffer: None, + range: 0..0, + stride: 0, + rate: wgt::InputStepMode::Vertex, + is_dirty: false, + } + } + + fn set_buffer(&mut self, buffer_id: id::BufferId, range: Range) { + self.buffer = Some(buffer_id); + self.range = range; + self.is_dirty = true; + } + + fn flush(&mut self, slot: u32) -> Option { + if self.is_dirty { + self.is_dirty = false; + Some(RenderCommand::SetVertexBuffer { + slot, + buffer_id: self.buffer.unwrap(), + offset: self.range.start, + size: wgt::BufferSize(self.range.end - self.range.start), + }) + } else { + None + } + } +} + +#[derive(Debug)] +struct BindState { + bind_group: Option<(id::BindGroupId, id::BindGroupLayoutId)>, + dynamic_offsets: Range, + is_dirty: bool, +} + +impl BindState { + fn new() -> Self { + BindState { + bind_group: None, + dynamic_offsets: 0..0, + is_dirty: false, + } + } + + fn set_group( + &mut self, + bind_group_id: id::BindGroupId, + layout_id: id::BindGroupLayoutId, + dyn_offset: usize, + dyn_count: usize, + ) -> bool { + match self.bind_group { + Some((bg_id, _)) if bg_id == bind_group_id && dyn_count == 0 => false, + _ => { + self.bind_group = Some((bind_group_id, layout_id)); + self.dynamic_offsets = dyn_offset..dyn_offset + dyn_count; + self.is_dirty = true; + true + } + } + } +} + +#[derive(Debug)] +struct State { + trackers: TrackerSet, + index: IndexState, + vertex: ArrayVec<[VertexState; MAX_VERTEX_BUFFERS]>, + bind: ArrayVec<[BindState; MAX_BIND_GROUPS]>, + raw_dynamic_offsets: Vec, + flat_dynamic_offsets: Vec, + used_bind_groups: usize, +} + +impl State { + fn vertex_limits(&self) -> (u32, u32) { + let mut vertex_limit = !0; + let mut instance_limit = !0; + for vbs in &self.vertex { + if vbs.stride == 0 { + continue; + } + let limit = ((vbs.range.end - vbs.range.start) / vbs.stride) as u32; + match vbs.rate { + wgt::InputStepMode::Vertex => vertex_limit = vertex_limit.min(limit), + wgt::InputStepMode::Instance => instance_limit = instance_limit.min(limit), + } + } + (vertex_limit, instance_limit) + } + + fn invalidate_group_from(&mut self, slot: usize) { + for bind in self.bind[slot..].iter_mut() { + if bind.bind_group.is_some() { + bind.is_dirty = true; + } + } + } + + fn set_bind_group( + &mut self, + slot: u8, + bind_group_id: id::BindGroupId, + layout_id: id::BindGroupLayoutId, + offsets: &[wgt::DynamicOffset], + ) { + if self.bind[slot as usize].set_group( + bind_group_id, + layout_id, + self.raw_dynamic_offsets.len(), + offsets.len(), + ) { + self.invalidate_group_from(slot as usize + 1); + } + self.raw_dynamic_offsets.extend(offsets); + } + + fn set_pipeline( + &mut self, + index_format: wgt::IndexFormat, + vertex_strides: &[(wgt::BufferAddress, wgt::InputStepMode)], + layout_ids: &[Stored], + ) { + self.index.set_format(index_format); + for (vs, &(stride, step_mode)) in self.vertex.iter_mut().zip(vertex_strides) { + if vs.stride != stride || vs.rate != step_mode { + vs.stride = stride; + vs.rate = step_mode; + vs.is_dirty = true; + } + } + self.used_bind_groups = layout_ids.len(); + let invalid_from = self + .bind + .iter() + .zip(layout_ids) + .position(|(bs, layout_id)| match bs.bind_group { + Some((_, bgl_id)) => bgl_id != layout_id.value, + None => false, + }); + if let Some(slot) = invalid_from { + self.invalidate_group_from(slot); + } + } + + fn flush_vertices(&mut self) -> impl Iterator + '_ { + self.vertex + .iter_mut() + .enumerate() + .flat_map(|(i, vs)| vs.flush(i as u32)) + } + + fn flush_binds(&mut self) -> impl Iterator + '_ { + for bs in self.bind[..self.used_bind_groups].iter() { + if bs.is_dirty { + self.flat_dynamic_offsets + .extend_from_slice(&self.raw_dynamic_offsets[bs.dynamic_offsets.clone()]); + } + } + self.bind + .iter_mut() + .take(self.used_bind_groups) + .enumerate() + .flat_map(|(i, bs)| { + if bs.is_dirty { + bs.is_dirty = false; + Some(RenderCommand::SetBindGroup { + index: i as u8, + bind_group_id: bs.bind_group.unwrap().0, + num_dynamic_offsets: (bs.dynamic_offsets.end - bs.dynamic_offsets.start) + as u8, + phantom_offsets: PhantomSlice::default(), + }) + } else { + None + } + }) + } +} + impl Global { pub fn render_bundle_encoder_finish( &self, - mut bundle_encoder: RenderBundleEncoder, - _desc: &wgt::RenderBundleDescriptor