From a22037ced05cc9747b39ff630b09582f2b06f1da Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Sun, 3 Jan 2021 02:33:29 -0500 Subject: [PATCH] Add pipeline statistics and timeline queries --- player/src/lib.rs | 32 +++ wgpu-core/src/command/bundle.rs | 6 + wgpu-core/src/command/compute.rs | 121 ++++++++- wgpu-core/src/command/draw.rs | 11 + wgpu-core/src/command/mod.rs | 10 + wgpu-core/src/command/query.rs | 424 +++++++++++++++++++++++++++++++ wgpu-core/src/command/render.rs | 135 +++++++++- wgpu-core/src/conv.rs | 37 +++ wgpu-core/src/device/life.rs | 31 +++ wgpu-core/src/device/mod.rs | 132 ++++++++++ wgpu-core/src/device/trace.rs | 16 ++ wgpu-core/src/hub.rs | 20 ++ wgpu-core/src/id.rs | 1 + wgpu-core/src/instance.rs | 36 ++- wgpu-core/src/lib.rs | 1 + wgpu-core/src/resource.rs | 36 +++ wgpu-core/src/track/mod.rs | 5 + wgpu-types/src/lib.rs | 111 +++++++- 18 files changed, 1153 insertions(+), 12 deletions(-) create mode 100644 wgpu-core/src/command/query.rs diff --git a/player/src/lib.rs b/player/src/lib.rs index 4ee3cc4b5..91bfe15ce 100644 --- a/player/src/lib.rs +++ b/player/src/lib.rs @@ -81,6 +81,28 @@ impl GlobalPlay for wgc::hub::Global { trace::Command::CopyTextureToTexture { src, dst, size } => self .command_encoder_copy_texture_to_texture::(encoder, &src, &dst, &size) .unwrap(), + trace::Command::WriteTimestamp { + query_set_id, + query_index, + } => self + .command_encoder_write_timestamp::(encoder, query_set_id, query_index) + .unwrap(), + trace::Command::ResolveQuerySet { + query_set_id, + start_query, + query_count, + destination, + destination_offset, + } => self + .command_encoder_resolve_query_set::( + encoder, + query_set_id, + start_query, + query_count, + destination, + destination_offset, + ) + .unwrap(), trace::Command::RunComputePass { base } => { self.command_encoder_run_compute_pass_impl::(encoder, base.as_ref()) .unwrap(); @@ -267,6 +289,16 @@ impl GlobalPlay for wgc::hub::Global { A::DestroyRenderBundle(id) => { self.render_bundle_drop::(id); } + A::CreateQuerySet { id, desc } => { + self.device_maintain_ids::(device).unwrap(); + let (_, error) = self.device_create_query_set::(device, &desc, id); + if let Some(e) = error { + panic!("{:?}", e); + } + } + A::DestroyQuerySet(id) => { + self.query_set_drop::(id); + } A::WriteBuffer { id, data, diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index 6481e7bd4..e8ba2b543 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -430,6 +430,9 @@ impl RenderBundleEncoder { RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::WriteTimestamp { .. } + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(), RenderCommand::ExecuteBundle(_) | RenderCommand::SetBlendColor(_) | RenderCommand::SetStencilReference(_) @@ -693,6 +696,9 @@ impl RenderBundle { RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(), RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(), RenderCommand::PopDebugGroup => unimplemented!(), + RenderCommand::WriteTimestamp { .. } + | RenderCommand::BeginPipelineStatisticsQuery { .. } + | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(), RenderCommand::ExecuteBundle(_) | RenderCommand::SetBlendColor(_) | RenderCommand::SetStencilReference(_) diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index fb1e8fddb..4450ae2cb 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -6,8 +6,8 @@ use crate::{ binding_model::{BindError, BindGroup, PushConstantUploadError}, command::{ bind::{Binder, LayoutChange}, - BasePass, BasePassRef, CommandBuffer, CommandEncoderError, MapPassErr, PassErrorScope, - StateChange, + end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, CommandEncoderError, + MapPassErr, PassErrorScope, QueryUseError, StateChange, }, hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, id, @@ -23,6 +23,7 @@ use hal::command::CommandBuffer as _; use thiserror::Error; use wgt::{BufferAddress, BufferUsage, ShaderStage}; +use crate::track::UseExtendError; use std::{fmt, iter, str}; #[doc(hidden)] @@ -61,6 +62,15 @@ pub enum ComputeCommand { color: u32, len: usize, }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, } #[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))] @@ -127,6 +137,8 @@ pub enum ComputePassErrorInner { BindGroupIndexOutOfRange { index: u8, max: u32 }, #[error("compute pipeline {0:?} is invalid")] InvalidPipeline(id::ComputePipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), #[error("indirect buffer {0:?} is invalid or destroyed")] InvalidIndirectBuffer(id::BufferId), #[error(transparent)] @@ -141,6 +153,8 @@ pub enum ComputePassErrorInner { Bind(#[from] BindError), #[error(transparent)] PushConstants(#[from] PushConstantUploadError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), } /// Error encountered when performing a compute pass. @@ -260,6 +274,7 @@ impl Global { let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, _) = hub.textures.read(&mut token); @@ -272,6 +287,7 @@ impl Global { let mut temp_offsets = Vec::new(); let mut dynamic_offset_count = 0; let mut string_offset = 0; + let mut active_query = None; for command in base.commands { match *command { @@ -525,6 +541,62 @@ impl Global { string_offset += len; unsafe { raw.insert_debug_marker(label, color) } } + ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::WriteTimestamp; + + let query_set = cmd_buf + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => { + ComputePassErrorInner::InvalidQuerySet(query_set_id) + } + _ => unreachable!(), + }) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp(raw, query_set_id, query_index, None) + .map_pass_err(scope)?; + } + ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set = cmd_buf + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => { + ComputePassErrorInner::InvalidQuerySet(query_set_id) + } + _ => unreachable!(), + }) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + raw, + query_set_id, + query_index, + None, + &mut active_query, + ) + .map_pass_err(scope)?; + } + ComputeCommand::EndPipelineStatisticsQuery => { + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query) + .map_pass_err(scope)?; + } } } @@ -680,4 +752,49 @@ pub mod compute_ffi { len: bytes.len(), }); } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_write_timestamp( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + span!(_guard, DEBUG, "ComputePass::write_timestamp"); + + pass.base.commands.push(ComputeCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query( + pass: &mut ComputePass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + span!( + _guard, + DEBUG, + "ComputePass::begin_pipeline_statistics query" + ); + + pass.base + .commands + .push(ComputeCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query( + pass: &mut ComputePass, + ) { + span!(_guard, DEBUG, "ComputePass::end_pipeline_statistics_query"); + + pass.base + .commands + .push(ComputeCommand::EndPipelineStatisticsQuery); + } } diff --git a/wgpu-core/src/command/draw.rs b/wgpu-core/src/command/draw.rs index 87ac116e8..1df50a83c 100644 --- a/wgpu-core/src/command/draw.rs +++ b/wgpu-core/src/command/draw.rs @@ -75,6 +75,8 @@ pub enum RenderCommandError { InvalidDynamicOffsetCount { actual: usize, expected: usize }, #[error("render pipeline {0:?} is invalid")] InvalidPipeline(id::RenderPipelineId), + #[error("QuerySet {0:?} is invalid")] + InvalidQuerySet(id::QuerySetId), #[error("Render pipeline is incompatible with render pass")] IncompatiblePipeline(#[from] crate::device::RenderPassCompatibilityError), #[error("pipeline is not compatible with the depth-stencil read-only render pass")] @@ -195,5 +197,14 @@ pub enum RenderCommand { color: u32, len: usize, }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + BeginPipelineStatisticsQuery { + query_set_id: id::QuerySetId, + query_index: u32, + }, + EndPipelineStatisticsQuery, ExecuteBundle(id::RenderBundleId), } diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 6a19f9378..b98ce3959 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -7,6 +7,7 @@ mod bind; mod bundle; mod compute; mod draw; +mod query; mod render; mod transfer; @@ -15,6 +16,7 @@ pub use self::allocator::CommandAllocatorError; pub use self::bundle::*; pub use self::compute::*; pub use self::draw::*; +pub use self::query::*; pub use self::render::*; pub use self::transfer::*; @@ -367,6 +369,14 @@ pub enum PassErrorScope { indirect: bool, pipeline: Option, }, + #[error("While resetting queries after the renderpass was ran")] + QueryReset, + #[error("In a write_timestamp command")] + WriteTimestamp, + #[error("In a begin_pipeline_statistics_query command")] + BeginPipelineStatisticsQuery, + #[error("In a end_pipeline_statistics_query command")] + EndPipelineStatisticsQuery, #[error("In a execute_bundle command")] ExecuteBundle, #[error("In a dispatch command, indirect:{indirect}")] diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs new file mode 100644 index 000000000..fe25f264c --- /dev/null +++ b/wgpu-core/src/command/query.rs @@ -0,0 +1,424 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +use hal::command::CommandBuffer as _; + +#[cfg(feature = "trace")] +use crate::device::trace::Command as TraceCommand; +use crate::{ + command::{CommandBuffer, CommandEncoderError}, + device::all_buffer_stages, + hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token}, + id::{self, Id, TypedId}, + resource::{BufferUse, QuerySet}, + track::UseExtendError, + Epoch, FastHashMap, Index, +}; +use std::{iter, marker::PhantomData}; +use thiserror::Error; +use wgt::BufferAddress; + +#[derive(Debug)] +pub(super) struct QueryResetMap { + map: FastHashMap, Epoch)>, + _phantom: PhantomData, +} +impl QueryResetMap { + pub fn new() -> Self { + Self { + map: FastHashMap::default(), + _phantom: PhantomData, + } + } + + pub fn use_query_set( + &mut self, + id: id::QuerySetId, + query_set: &QuerySet, + query: u32, + ) -> bool { + let (index, epoch, _) = id.unzip(); + let (vec, _) = self + .map + .entry(index) + .or_insert_with(|| (vec![false; query_set.desc.count as usize], epoch)); + + std::mem::replace(&mut vec[query as usize], true) + } + + pub fn reset_queries( + self, + cmd_buf_raw: &mut B::CommandBuffer, + query_set_storage: &Storage, id::QuerySetId>, + backend: wgt::Backend, + ) -> Result<(), id::QuerySetId> { + for (query_set_id, (state, epoch)) in self.map.into_iter() { + let id = Id::zip(query_set_id, epoch, backend); + let query_set = query_set_storage.get(id).map_err(|_| id)?; + + debug_assert_eq!(state.len(), query_set.desc.count as usize); + + // Need to find all "runs" of values which need resets. If the state vector is: + // [false, true, true, false, true], we want to reset [1..3, 4..5]. This minimizes + // the amount of resets needed. + let mut state_iter = state.into_iter().chain(iter::once(false)).enumerate(); + let mut run_start: Option = None; + while let Some((idx, value)) = state_iter.next() { + match (run_start, value) { + // We're inside of a run, do nothing + (Some(..), true) => {} + // We've hit the end of a run, dispatch a reset + (Some(start), false) => { + run_start = None; + unsafe { cmd_buf_raw.reset_query_pool(&query_set.raw, start..idx as u32) }; + } + // We're starting a run + (None, true) => { + run_start = Some(idx as u32); + } + // We're in a run of falses, do nothing. + (None, false) => {} + } + } + } + + Ok(()) + } +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SimplifiedQueryType { + Timestamp, + PipelineStatistics, +} +impl From for SimplifiedQueryType { + fn from(q: wgt::QueryType) -> Self { + match q { + wgt::QueryType::Timestamp => SimplifiedQueryType::Timestamp, + wgt::QueryType::PipelineStatistics(..) => SimplifiedQueryType::PipelineStatistics, + } + } +} + +/// Error encountered when dealing with queries +#[derive(Clone, Debug, Error)] +pub enum QueryError { + #[error(transparent)] + Encoder(#[from] CommandEncoderError), + #[error("Error encountered while trying to use queries")] + Use(#[from] QueryUseError), + #[error("Error encountered while trying to resolve a query")] + Resolve(#[from] ResolveError), + #[error("Buffer {0:?} is invalid or destroyed")] + InvalidBuffer(id::BufferId), + #[error("QuerySet {0:?} is invalid or destroyed")] + InvalidQuerySet(id::QuerySetId), +} + +/// Error encountered while trying to use queries +#[derive(Clone, Debug, Error)] +pub enum QueryUseError { + #[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")] + OutOfBounds { + query_index: u32, + query_set_size: u32, + }, + #[error("Query {query_index} has already been used within the same renderpass. Queries must only be used once per renderpass")] + UsedTwiceInsideRenderpass { query_index: u32 }, + #[error("Query {new_query_index} was started while query {active_query_index} was already active. No more than one statistic or occlusion query may be active at once")] + AlreadyStarted { + active_query_index: u32, + new_query_index: u32, + }, + #[error("Query was stopped while there was no active query")] + AlreadyStopped, + #[error("A query of type {query_type:?} was started using a query set of type {set_type:?}")] + IncompatibleType { + set_type: SimplifiedQueryType, + query_type: SimplifiedQueryType, + }, +} + +/// Error encountered while trying to resolve a query. +#[derive(Clone, Debug, Error)] +pub enum ResolveError { + #[error("Queries can only be resolved to buffers that contain the COPY_DST usage")] + MissingBufferUsage, + #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")] + QueryOverrun { + start_query: u32, + end_query: u32, + query_set_size: u32, + }, + #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overruning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")] + BufferOverrun { + start_query: u32, + end_query: u32, + stride: u32, + buffer_size: BufferAddress, + buffer_start_offset: BufferAddress, + buffer_end_offset: BufferAddress, + }, +} + +impl QuerySet { + fn validate_query( + &self, + query_set_id: id::QuerySetId, + query_type: SimplifiedQueryType, + query_index: u32, + reset_state: Option<&mut QueryResetMap>, + ) -> Result, QueryUseError> { + // We need to defer our resets because we are in a renderpass, add the usage to the reset map. + if let Some(reset) = reset_state { + let used = reset.use_query_set(query_set_id, self, query_index); + if used { + return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index }.into()); + } + } + + let simple_set_type = SimplifiedQueryType::from(self.desc.ty); + if simple_set_type != query_type { + return Err(QueryUseError::IncompatibleType { + query_type, + set_type: simple_set_type, + } + .into()); + } + + if query_index >= self.desc.count { + return Err(QueryUseError::OutOfBounds { + query_index, + query_set_size: self.desc.count, + } + .into()); + } + + let hal_query = hal::query::Query:: { + pool: &self.raw, + id: query_index, + }; + + Ok(hal_query) + } + + pub(super) fn validate_and_write_timestamp( + &self, + cmd_buf_raw: &mut B::CommandBuffer, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let hal_query = self.validate_query( + query_set_id, + SimplifiedQueryType::Timestamp, + query_index, + reset_state, + )?; + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + cmd_buf_raw.reset_query_pool(&self.raw, query_index..(query_index + 1)); + } + cmd_buf_raw.write_timestamp(hal::pso::PipelineStage::BOTTOM_OF_PIPE, hal_query); + } + + Ok(()) + } + + pub(super) fn validate_and_begin_pipeline_statistics_query( + &self, + cmd_buf_raw: &mut B::CommandBuffer, + query_set_id: id::QuerySetId, + query_index: u32, + reset_state: Option<&mut QueryResetMap>, + active_query: &mut Option<(id::QuerySetId, u32)>, + ) -> Result<(), QueryUseError> { + let needs_reset = reset_state.is_none(); + let hal_query = self.validate_query( + query_set_id, + SimplifiedQueryType::PipelineStatistics, + query_index, + reset_state, + )?; + + if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) { + return Err(QueryUseError::AlreadyStarted { + active_query_index: old_idx, + new_query_index: query_index, + } + .into()); + } + + unsafe { + // If we don't have a reset state tracker which can defer resets, we must reset now. + if needs_reset { + cmd_buf_raw.reset_query_pool(&self.raw, query_index..(query_index + 1)); + } + cmd_buf_raw.begin_query(hal_query, hal::query::ControlFlags::empty()); + } + + Ok(()) + } +} + +pub(super) fn end_pipeline_statistics_query( + cmd_buf_raw: &mut B::CommandBuffer, + storage: &Storage, id::QuerySetId>, + active_query: &mut Option<(id::QuerySetId, u32)>, +) -> Result<(), QueryUseError> { + if let Some((query_set_id, query_index)) = active_query.take() { + // We can unwrap here as the validity was validated when the active query was set + let query_set = storage.get(query_set_id).unwrap(); + + let hal_query = hal::query::Query:: { + pool: &query_set.raw, + id: query_index, + }; + + unsafe { cmd_buf_raw.end_query(hal_query) } + + Ok(()) + } else { + Err(QueryUseError::AlreadyStopped) + } +} + +impl Global { + pub fn command_encoder_write_timestamp( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + query_index: u32, + ) -> Result<(), QueryError> { + let hub = B::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, _) = hub.query_sets.read(&mut token); + + let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?; + let cmd_buf_raw = cmd_buf.raw.last_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + let query_set = cmd_buf + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => QueryError::InvalidQuerySet(query_set_id), + _ => unreachable!(), + })?; + + query_set.validate_and_write_timestamp(cmd_buf_raw, query_set_id, query_index, None)?; + + Ok(()) + } + + pub fn command_encoder_resolve_query_set( + &self, + command_encoder_id: id::CommandEncoderId, + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: BufferAddress, + ) -> Result<(), QueryError> { + let hub = B::hub(self); + let mut token = Token::root(); + + let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); + let (buffer_guard, _) = hub.buffers.read(&mut token); + + let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?; + let cmd_buf_raw = cmd_buf.raw.last_mut().unwrap(); + + #[cfg(feature = "trace")] + if let Some(ref mut list) = cmd_buf.commands { + list.push(TraceCommand::ResolveQuerySet { + query_set_id, + start_query, + query_count, + destination, + destination_offset, + }); + } + + let query_set = cmd_buf + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => QueryError::InvalidQuerySet(query_set_id), + _ => unreachable!(), + })?; + + let (dst_buffer, dst_pending) = cmd_buf + .trackers + .buffers + .use_replace(&*buffer_guard, destination, (), BufferUse::COPY_DST) + .map_err(QueryError::InvalidBuffer)?; + let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer)); + + if !dst_buffer.usage.contains(wgt::BufferUsage::COPY_DST) { + return Err(ResolveError::MissingBufferUsage.into()); + } + + let end_query = start_query + query_count; + if end_query > query_set.desc.count { + return Err(ResolveError::QueryOverrun { + start_query, + end_query, + query_set_size: query_set.desc.count, + } + .into()); + } + + let stride = query_set.elements * wgt::QUERY_SIZE; + let bytes_used = (stride * query_count) as BufferAddress; + + let buffer_start_offset = destination_offset; + let buffer_end_offset = buffer_start_offset + bytes_used; + + if buffer_end_offset > dst_buffer.size { + return Err(ResolveError::BufferOverrun { + start_query, + end_query, + stride, + buffer_size: dst_buffer.size, + buffer_start_offset, + buffer_end_offset, + } + .into()); + } + + unsafe { + cmd_buf_raw.pipeline_barrier( + all_buffer_stages()..hal::pso::PipelineStage::TRANSFER, + hal::memory::Dependencies::empty(), + dst_barrier, + ); + cmd_buf_raw.copy_query_pool_results( + &query_set.raw, + start_query..end_query, + &dst_buffer.raw.as_ref().unwrap().0, + destination_offset, + stride, + hal::query::ResultFlags::WAIT | hal::query::ResultFlags::BITS_64, + ); + } + + Ok(()) + } +} diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index f342cf6b8..6dc742df4 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -6,8 +6,9 @@ use crate::{ binding_model::BindError, command::{ bind::{Binder, LayoutChange}, - BasePass, BasePassRef, CommandBuffer, CommandEncoderError, DrawError, ExecutionError, - MapPassErr, PassErrorScope, RenderCommand, RenderCommandError, StateChange, + end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, CommandEncoderError, + DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryResetMap, QueryUseError, + RenderCommand, RenderCommandError, StateChange, }, conv, device::{ @@ -38,6 +39,7 @@ use serde::Deserialize; #[cfg(any(feature = "serial-pass", feature = "trace"))] use serde::Serialize; +use crate::track::UseExtendError; use std::{ borrow::{Borrow, Cow}, collections::hash_map::Entry, @@ -441,6 +443,8 @@ pub enum RenderPassErrorInner { Draw(#[from] DrawError), #[error(transparent)] Bind(#[from] BindError), + #[error(transparent)] + QueryUse(#[from] QueryUseError), } impl From for RenderPassErrorInner { @@ -1019,7 +1023,7 @@ impl Global { let (device_guard, mut token) = hub.devices.read(&mut token); - let (cmd_buf_raw, trackers, used_swapchain) = { + let (cmd_buf_raw, trackers, used_swapchain, query_reset_state) = { // read-only lock guard let (cmb_guard, mut token) = hub.command_buffers.read(&mut token); @@ -1039,6 +1043,7 @@ impl Global { let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token); let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); let (pipeline_guard, mut token) = hub.render_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, mut token) = hub.textures.read(&mut token); let (view_guard, _) = hub.texture_views.read(&mut token); @@ -1071,6 +1076,8 @@ impl Global { let mut temp_offsets = Vec::new(); let mut dynamic_offset_count = 0; let mut string_offset = 0; + let mut active_query = None; + let mut query_reset_state = QueryResetMap::new(); for command in base.commands { match *command { @@ -1734,6 +1741,71 @@ impl Global { raw.insert_debug_marker(label, color); } } + RenderCommand::WriteTimestamp { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::WriteTimestamp; + + let query_set = info + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => { + RenderCommandError::InvalidQuerySet(query_set_id) + } + _ => unreachable!(), + }) + .map_pass_err(scope)?; + + query_set + .validate_and_write_timestamp( + &mut raw, + query_set_id, + query_index, + Some(&mut query_reset_state), + ) + .map_pass_err(scope)?; + } + RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + } => { + let scope = PassErrorScope::BeginPipelineStatisticsQuery; + + let query_set = info + .trackers + .query_sets + .use_extend(&*query_set_guard, query_set_id, (), ()) + .map_err(|e| match e { + UseExtendError::InvalidResource => { + RenderCommandError::InvalidQuerySet(query_set_id) + } + _ => unreachable!(), + }) + .map_pass_err(scope)?; + + query_set + .validate_and_begin_pipeline_statistics_query( + &mut raw, + query_set_id, + query_index, + Some(&mut query_reset_state), + &mut active_query, + ) + .map_pass_err(scope)?; + } + RenderCommand::EndPipelineStatisticsQuery => { + let scope = PassErrorScope::EndPipelineStatisticsQuery; + + end_pipeline_statistics_query( + &mut raw, + &*query_set_guard, + &mut active_query, + ) + .map_pass_err(scope)?; + } RenderCommand::ExecuteBundle(bundle_id) => { let scope = PassErrorScope::ExecuteBundle; let bundle = info @@ -1778,10 +1850,11 @@ impl Global { } let (trackers, used_swapchain) = info.finish(&*texture_guard).map_pass_err(scope)?; - (raw, trackers, used_swapchain) + (raw, trackers, used_swapchain, query_reset_state) }; let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, _) = hub.textures.read(&mut token); let cmd_buf = @@ -1798,15 +1871,26 @@ impl Global { }); } + let last_cmd_buf = cmd_buf.raw.last_mut().unwrap(); + + query_reset_state + .reset_queries( + last_cmd_buf, + &query_set_guard, + cmd_buf.device_id.value.0.backend(), + ) + .map_err(RenderCommandError::InvalidQuerySet) + .map_pass_err(PassErrorScope::QueryReset)?; + super::CommandBuffer::insert_barriers( - cmd_buf.raw.last_mut().unwrap(), + last_cmd_buf, &mut cmd_buf.trackers, &trackers, &*buffer_guard, &*texture_guard, ); unsafe { - cmd_buf.raw.last_mut().unwrap().finish(); + last_cmd_buf.finish(); } cmd_buf.raw.push(cmd_buf_raw); @@ -2148,6 +2232,45 @@ pub mod render_ffi { }); } + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_write_timestamp( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + span!(_guard, DEBUG, "RenderPass::write_timestamp"); + + pass.base.commands.push(RenderCommand::WriteTimestamp { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query( + pass: &mut RenderPass, + query_set_id: id::QuerySetId, + query_index: u32, + ) { + span!(_guard, DEBUG, "RenderPass::begin_pipeline_statistics query"); + + pass.base + .commands + .push(RenderCommand::BeginPipelineStatisticsQuery { + query_set_id, + query_index, + }); + } + + #[no_mangle] + pub unsafe extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) { + span!(_guard, DEBUG, "RenderPass::end_pipeline_statistics_query"); + + pass.base + .commands + .push(RenderCommand::EndPipelineStatisticsQuery); + } + #[no_mangle] pub unsafe fn wgpu_render_pass_execute_bundles( pass: &mut RenderPass, diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs index 121740737..22ba707ea 100644 --- a/wgpu-core/src/conv.rs +++ b/wgpu-core/src/conv.rs @@ -677,6 +677,43 @@ pub(crate) fn map_texture_state( (access, layout) } +pub fn map_query_type(ty: &wgt::QueryType) -> (hal::query::Type, u32) { + match ty { + wgt::QueryType::PipelineStatistics(pipeline_statistics) => { + let mut ps = hal::query::PipelineStatistic::empty(); + ps.set( + hal::query::PipelineStatistic::VERTEX_SHADER_INVOCATIONS, + pipeline_statistics + .contains(wgt::PipelineStatisticsTypes::VERTEX_SHADER_INVOCATIONS), + ); + ps.set( + hal::query::PipelineStatistic::CLIPPING_INVOCATIONS, + pipeline_statistics.contains(wgt::PipelineStatisticsTypes::CLIPPER_INVOCATIONS), + ); + ps.set( + hal::query::PipelineStatistic::CLIPPING_PRIMITIVES, + pipeline_statistics.contains(wgt::PipelineStatisticsTypes::CLIPPER_PRIMITIVES_OUT), + ); + ps.set( + hal::query::PipelineStatistic::FRAGMENT_SHADER_INVOCATIONS, + pipeline_statistics + .contains(wgt::PipelineStatisticsTypes::FRAGMENT_SHADER_INVOCATIONS), + ); + ps.set( + hal::query::PipelineStatistic::COMPUTE_SHADER_INVOCATIONS, + pipeline_statistics + .contains(wgt::PipelineStatisticsTypes::COMPUTE_SHADER_INVOCATIONS), + ); + + ( + hal::query::Type::PipelineStatistics(ps), + pipeline_statistics.bits().count_ones(), + ) + } + wgt::QueryType::Timestamp => (hal::query::Type::Timestamp, 1), + } +} + pub fn map_load_store_ops(channel: &PassChannel) -> hal::pass::AttachmentOps { hal::pass::AttachmentOps { load: match channel.load_op { diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs index 594ea4eda..3541043a3 100644 --- a/wgpu-core/src/device/life.rs +++ b/wgpu-core/src/device/life.rs @@ -39,6 +39,7 @@ pub struct SuspectedResources { pub(crate) bind_group_layouts: Vec>, pub(crate) pipeline_layouts: Vec>, pub(crate) render_bundles: Vec>, + pub(crate) query_sets: Vec>, } impl SuspectedResources { @@ -53,6 +54,7 @@ impl SuspectedResources { self.bind_group_layouts.clear(); self.pipeline_layouts.clear(); self.render_bundles.clear(); + self.query_sets.clear(); } pub(crate) fn extend(&mut self, other: &Self) { @@ -70,6 +72,7 @@ impl SuspectedResources { self.pipeline_layouts .extend_from_slice(&other.pipeline_layouts); self.render_bundles.extend_from_slice(&other.render_bundles); + self.query_sets.extend_from_slice(&other.query_sets); } pub(crate) fn add_trackers(&mut self, trackers: &TrackerSet) { @@ -81,6 +84,7 @@ impl SuspectedResources { self.compute_pipelines.extend(trackers.compute_pipes.used()); self.render_pipelines.extend(trackers.render_pipes.used()); self.render_bundles.extend(trackers.bundles.used()); + self.query_sets.extend(trackers.query_sets.used()); } } @@ -99,6 +103,7 @@ struct NonReferencedResources { graphics_pipes: Vec, descriptor_set_layouts: Vec, pipeline_layouts: Vec, + query_sets: Vec, } impl NonReferencedResources { @@ -114,6 +119,7 @@ impl NonReferencedResources { graphics_pipes: Vec::new(), descriptor_set_layouts: Vec::new(), pipeline_layouts: Vec::new(), + query_sets: Vec::new(), } } @@ -126,6 +132,7 @@ impl NonReferencedResources { self.desc_sets.extend(other.desc_sets); self.compute_pipes.extend(other.compute_pipes); self.graphics_pipes.extend(other.graphics_pipes); + self.query_sets.extend(other.query_sets); assert!(other.descriptor_set_layouts.is_empty()); assert!(other.pipeline_layouts.is_empty()); } @@ -178,6 +185,9 @@ impl NonReferencedResources { for raw in self.pipeline_layouts.drain(..) { device.destroy_pipeline_layout(raw); } + for raw in self.query_sets.drain(..) { + device.destroy_query_pool(raw); + } } } @@ -604,6 +614,27 @@ impl LifetimeTracker { } } } + + if !self.suspected_resources.query_sets.is_empty() { + let (mut guard, _) = hub.query_sets.write(token); + let mut trackers = trackers.lock(); + + for id in self.suspected_resources.query_sets.drain(..) { + if trackers.query_sets.remove_abandoned(id) { + // #[cfg(feature = "trace")] + // trace.map(|t| t.lock().add(trace::Action::DestroyComputePipeline(id.0))); + if let Some(res) = hub.query_sets.unregister_locked(id.0, &mut *guard) { + let submit_index = res.life_guard.submission_index.load(Ordering::Acquire); + self.active + .iter_mut() + .find(|a| a.index == submit_index) + .map_or(&mut self.free_resources, |a| &mut a.last_resources) + .query_sets + .push(res.raw); + } + } + } + } } pub(crate) fn triage_mapped( diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index a43f336ac..827b26be3 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -401,6 +401,7 @@ impl Device { let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token); let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token); let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token); + let (query_set_guard, mut token) = hub.query_sets.read(&mut token); let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, mut token) = hub.textures.read(&mut token); let (texture_view_guard, mut token) = hub.texture_views.read(&mut token); @@ -441,6 +442,11 @@ impl Device { self.temp_suspected.render_pipelines.push(id); } } + for id in trackers.query_sets.used() { + if query_set_guard[id].life_guard.ref_count.is_none() { + self.temp_suspected.query_sets.push(id); + } + } } self.lock_life(&mut token) @@ -3673,6 +3679,132 @@ impl Global { .push(id::Valid(render_bundle_id)); } + pub fn device_create_query_set( + &self, + device_id: id::DeviceId, + desc: &wgt::QuerySetDescriptor, + id_in: Input, + ) -> (id::QuerySetId, Option) { + span!(_guard, INFO, "Device::create_query_set"); + + let hub = B::hub(self); + let mut token = Token::root(); + + let (device_guard, mut token) = hub.devices.read(&mut token); + + let error = loop { + let device = match device_guard.get(device_id) { + Ok(device) => device, + Err(_) => break DeviceError::Invalid.into(), + }; + + match desc.ty { + wgt::QueryType::Timestamp => { + if !device.features.contains(wgt::Features::TIMESTAMP_QUERY) { + break resource::CreateQuerySetError::MissingFeature( + wgt::Features::TIMESTAMP_QUERY, + ); + } + } + wgt::QueryType::PipelineStatistics(..) => { + if !device + .features + .contains(wgt::Features::PIPELINE_STATISTICS_QUERY) + { + break resource::CreateQuerySetError::MissingFeature( + wgt::Features::PIPELINE_STATISTICS_QUERY, + ); + } + } + } + + if desc.count == 0 { + break resource::CreateQuerySetError::ZeroCount; + } + + if desc.count >= wgt::QUERY_SET_MAX_QUERIES { + break resource::CreateQuerySetError::TooManyQueries { + count: desc.count, + maximum: wgt::QUERY_SET_MAX_QUERIES, + }; + } + + let query_set = { + let (hal_type, elements) = conv::map_query_type(&desc.ty); + + resource::QuerySet { + raw: unsafe { device.raw.create_query_pool(hal_type, desc.count).unwrap() }, + device_id: Stored { + value: id::Valid(device_id), + ref_count: device.life_guard.add_ref(), + }, + life_guard: LifeGuard::new(""), + desc: desc.clone(), + elements, + } + }; + + let ref_count = query_set.life_guard.add_ref(); + + let id = hub + .query_sets + .register_identity(id_in, query_set, &mut token); + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => trace.lock().add(trace::Action::CreateQuerySet { + id: id.0, + desc: desc.clone(), + }), + None => (), + }; + + device + .trackers + .lock() + .query_sets + .init(id, ref_count, PhantomData) + .unwrap(); + + return (id.0, None); + }; + + let id = B::hub(self) + .query_sets + .register_error(id_in, "", &mut token); + (id, Some(error)) + } + + pub fn query_set_drop(&self, query_set_id: id::QuerySetId) { + span!(_guard, INFO, "QuerySet::drop"); + + let hub = B::hub(self); + let mut token = Token::root(); + + let device_id = { + let (mut query_set_guard, _) = hub.query_sets.write(&mut token); + let query_set = query_set_guard.get_mut(query_set_id).unwrap(); + query_set.life_guard.ref_count.take(); + query_set.device_id.value + }; + + let (device_guard, mut token) = hub.devices.read(&mut token); + let device = &device_guard[device_id]; + + #[cfg(feature = "trace")] + match device.trace { + Some(ref trace) => trace + .lock() + .add(trace::Action::DestroyQuerySet(query_set_id)), + None => (), + }; + + device + .lock_life(&mut token) + .suspected_resources + .query_sets + .push(id::Valid(query_set_id)); + } + pub fn device_create_render_pipeline( &self, device_id: id::DeviceId, diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs index 2b2924ed3..48207aa30 100644 --- a/wgpu-core/src/device/trace.rs +++ b/wgpu-core/src/device/trace.rs @@ -91,6 +91,11 @@ pub enum Action<'a> { base: crate::command::BasePass, }, DestroyRenderBundle(id::RenderBundleId), + CreateQuerySet { + id: id::QuerySetId, + desc: wgt::QuerySetDescriptor, + }, + DestroyQuerySet(id::QuerySetId), WriteBuffer { id: id::BufferId, data: FileName, @@ -132,6 +137,17 @@ pub enum Command { dst: crate::command::TextureCopyView, size: wgt::Extent3d, }, + WriteTimestamp { + query_set_id: id::QuerySetId, + query_index: u32, + }, + ResolveQuerySet { + query_set_id: id::QuerySetId, + start_query: u32, + query_count: u32, + destination: id::BufferId, + destination_offset: wgt::BufferAddress, + }, RunComputePass { base: crate::command::BasePass, }, diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 0e95c3b23..9ec417cdd 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -23,6 +23,8 @@ use crate::{ use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard}; use wgt::Backend; +use crate::id::QuerySetId; +use crate::resource::QuerySet; #[cfg(debug_assertions)] use std::cell::Cell; use std::{fmt::Debug, marker::PhantomData, ops, thread}; @@ -264,6 +266,11 @@ impl Access> for BindGroup {} impl Access> for Device {} impl Access> for BindGroup {} impl Access> for ComputePipeline {} +impl Access> for Root {} +impl Access> for Device {} +impl Access> for CommandBuffer {} +impl Access> for RenderPipeline {} +impl Access> for ComputePipeline {} impl Access> for Device {} impl Access> for BindGroupLayout {} impl Access> for Root {} @@ -273,6 +280,7 @@ impl Access> for BindGroup {} impl Access> for CommandBuffer {} impl Access> for ComputePipeline {} impl Access> for RenderPipeline {} +impl Access> for QuerySet {} impl Access> for Root {} impl Access> for Device {} impl Access> for Buffer {} @@ -374,6 +382,7 @@ pub trait GlobalIdentityHandlerFactory: + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory + + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory @@ -547,6 +556,7 @@ pub struct Hub { pub render_bundles: Registry, pub render_pipelines: Registry, RenderPipelineId, F>, pub compute_pipelines: Registry, ComputePipelineId, F>, + pub query_sets: Registry, QuerySetId, F>, pub buffers: Registry, BufferId, F>, pub textures: Registry, TextureId, F>, pub texture_views: Registry, TextureViewId, F>, @@ -567,6 +577,7 @@ impl Hub { render_bundles: Registry::new(B::VARIANT, factory), render_pipelines: Registry::new(B::VARIANT, factory), compute_pipelines: Registry::new(B::VARIANT, factory), + query_sets: Registry::new(B::VARIANT, factory), buffers: Registry::new(B::VARIANT, factory), textures: Registry::new(B::VARIANT, factory), texture_views: Registry::new(B::VARIANT, factory), @@ -698,6 +709,15 @@ impl Hub { } } + for element in self.query_sets.data.write().map.drain(..) { + if let Element::Occupied(query_set, _) = element { + let device = &devices[query_set.device_id.value]; + unsafe { + device.raw.destroy_query_pool(query_set.raw); + } + } + } + for element in devices.map.drain(..) { if let Element::Occupied(device, _) = element { device.dispose(); diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs index 7a1201be2..9578a77b5 100644 --- a/wgpu-core/src/id.rs +++ b/wgpu-core/src/id.rs @@ -164,6 +164,7 @@ pub type RenderPassEncoderId = *mut crate::command::RenderPass; pub type ComputePassEncoderId = *mut crate::command::ComputePass; pub type RenderBundleEncoderId = *mut crate::command::RenderBundleEncoder; pub type RenderBundleId = Id; +pub type QuerySetId = Id>; // Swap chain pub type SwapChainId = Id>; diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs index 79a6983d1..0b2b34b29 100644 --- a/wgpu-core/src/instance.rs +++ b/wgpu-core/src/instance.rs @@ -120,7 +120,7 @@ impl crate::hub::Resource for Surface { pub struct Adapter { pub(crate) raw: hal::adapter::Adapter, features: wgt::Features, - private_features: PrivateFeatures, + pub(crate) private_features: PrivateFeatures, limits: wgt::Limits, life_guard: LifeGuard, } @@ -130,6 +130,7 @@ impl Adapter { span!(_guard, INFO, "Adapter::new"); let adapter_features = raw.physical_device.features(); + let adapter_limits = raw.physical_device.limits(); let mut features = wgt::Features::default() | wgt::Features::MAPPABLE_PRIMARY_BUFFERS @@ -179,6 +180,14 @@ impl Adapter { wgt::Features::NON_FILL_POLYGON_MODE, adapter_features.contains(hal::Features::NON_FILL_POLYGON_MODE), ); + features.set( + wgt::Features::TIMESTAMP_QUERY, + adapter_limits.timestamp_compute_and_graphics, + ); + features.set( + wgt::Features::PIPELINE_STATISTICS_QUERY, + adapter_features.contains(hal::Features::PIPELINE_STATISTICS_QUERY), + ); #[cfg(not(target_os = "ios"))] //TODO: https://github.com/gfx-rs/gfx/issues/3346 features.set(wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER, true); @@ -195,10 +204,9 @@ impl Adapter { .format_properties(Some(hal::format::Format::D24UnormS8Uint)) .optimal_tiling .contains(hal::format::ImageFeature::DEPTH_STENCIL_ATTACHMENT), + timestamp_period: adapter_limits.timestamp_period, }; - let adapter_limits = raw.physical_device.limits(); - let default_limits = wgt::Limits::default(); // All these casts to u32 are safe as the underlying vulkan types are u32s. @@ -422,6 +430,11 @@ impl Adapter { hal::Features::NON_FILL_POLYGON_MODE, desc.features.contains(wgt::Features::NON_FILL_POLYGON_MODE), ); + enabled_features.set( + hal::Features::PIPELINE_STATISTICS_QUERY, + desc.features + .contains(wgt::Features::PIPELINE_STATISTICS_QUERY), + ); let family = self .raw @@ -429,6 +442,7 @@ impl Adapter { .iter() .find(|family| family.queue_type().supports_graphics()) .ok_or(RequestDeviceError::NoGraphicsQueue)?; + let mut gpu = unsafe { phd.open(&[(family, &[1.0])], enabled_features) }.map_err(|err| { use hal::device::CreationError::*; @@ -855,6 +869,22 @@ impl Global { .map_err(|_| InvalidAdapter) } + pub fn adapter_get_timestamp_period( + &self, + adapter_id: AdapterId, + ) -> Result { + span!(_guard, INFO, "Adapter::get_timestamp_period"); + + let hub = B::hub(self); + let mut token = Token::root(); + let (adapter_guard, _) = hub.adapters.read(&mut token); + + adapter_guard + .get(adapter_id) + .map(|adapter| adapter.private_features.timestamp_period) + .map_err(|_| InvalidAdapter) + } + pub fn adapter_drop(&self, adapter_id: AdapterId) { span!(_guard, INFO, "Adapter::drop"); diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs index fc48d7eb4..bd099a0fb 100644 --- a/wgpu-core/src/lib.rs +++ b/wgpu-core/src/lib.rs @@ -222,6 +222,7 @@ struct PrivateFeatures { anisotropic_filtering: bool, texture_d24: bool, texture_d24_s8: bool, + timestamp_period: f32, } #[macro_export] diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 412a29cdb..ebd996736 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -447,6 +447,42 @@ impl Borrow<()> for Sampler { &DUMMY_SELECTOR } } +#[derive(Clone, Debug, Error)] +pub enum CreateQuerySetError { + #[error(transparent)] + Device(#[from] DeviceError), + #[error("QuerySets cannot be made with zero queries")] + ZeroCount, + #[error("{count} is too many queries for a single QuerySet. QuerySets cannot be made more than {maximum} queries.")] + TooManyQueries { count: u32, maximum: u32 }, + #[error("Feature {0:?} must be enabled")] + MissingFeature(wgt::Features), +} + +#[derive(Debug)] +pub struct QuerySet { + pub(crate) raw: B::QueryPool, + pub(crate) device_id: Stored, + pub(crate) life_guard: LifeGuard, + /// Amount of queries in the query set. + pub(crate) desc: wgt::QuerySetDescriptor, + /// Amount of numbers in each query (i.e. a pipeline statistics query for two attributes will have this number be two) + pub(crate) elements: u32, +} + +impl Resource for QuerySet { + const TYPE: &'static str = "QuerySet"; + + fn life_guard(&self) -> &LifeGuard { + &self.life_guard + } +} + +impl Borrow<()> for QuerySet { + fn borrow(&self) -> &() { + &DUMMY_SELECTOR + } +} #[derive(Clone, Debug, Error)] pub enum DestroyError { diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs index 2d2d76359..0a9f9a315 100644 --- a/wgpu-core/src/track/mod.rs +++ b/wgpu-core/src/track/mod.rs @@ -528,6 +528,7 @@ pub(crate) struct TrackerSet { pub compute_pipes: ResourceTracker>, pub render_pipes: ResourceTracker>, pub bundles: ResourceTracker>, + pub query_sets: ResourceTracker>, } impl TrackerSet { @@ -542,6 +543,7 @@ impl TrackerSet { compute_pipes: ResourceTracker::new(backend), render_pipes: ResourceTracker::new(backend), bundles: ResourceTracker::new(backend), + query_sets: ResourceTracker::new(backend), } } @@ -555,6 +557,7 @@ impl TrackerSet { self.compute_pipes.clear(); self.render_pipes.clear(); self.bundles.clear(); + self.query_sets.clear(); } /// Try to optimize the tracking representation. @@ -567,6 +570,7 @@ impl TrackerSet { self.compute_pipes.optimize(); self.render_pipes.optimize(); self.bundles.optimize(); + self.query_sets.optimize(); } /// Merge all the trackers of another instance by extending @@ -594,6 +598,7 @@ impl TrackerSet { .unwrap(); self.render_pipes.merge_extend(&other.render_pipes).unwrap(); self.bundles.merge_extend(&other.bundles).unwrap(); + self.query_sets.merge_extend(&other.query_sets).unwrap(); Ok(()) } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 1a7a49762..732030383 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -35,6 +35,10 @@ pub const COPY_BUFFER_ALIGNMENT: BufferAddress = 4; pub const VERTEX_STRIDE_ALIGNMENT: BufferAddress = 4; /// Alignment all push constants need pub const PUSH_CONSTANT_ALIGNMENT: u32 = 4; +/// Maximum queries in a query set +pub const QUERY_SET_MAX_QUERIES: u32 = 8192; +/// Size of a single piece of query data. +pub const QUERY_SIZE: u32 = 8; /// Backends supported by wgpu. #[repr(u8)] @@ -170,6 +174,39 @@ bitflags::bitflags! { /// /// This is a web and native feature. const TEXTURE_COMPRESSION_BC = 0x0000_0000_0000_0002; + /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when + /// all work before the query is finished. Call [`CommandEncoder::write_timestamp`], + /// [`RenderPassEncoder::write_timestamp`], or [`ComputePassEncoder::write_timestamp`] to + /// write out a timestamp. + /// + /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer, + /// then the result must be multiplied by the timestamp period [`Device::get_timestamp_period`] + /// to get the timestamp in nanoseconds. Multiple timestamps can then be diffed to get the + /// time for operations between them to finish. + /// + /// Due to gfx-hal limitations, this is only supported on vulkan for now. + /// + /// Supported Platforms: + /// - Vulkan (works) + /// - DX12 (future) + /// + /// This is a web and native feature. + const TIMESTAMP_QUERY = 0x0000_0000_0000_0004; + /// Enables use of Pipeline Statistics Queries. These queries tell the count of various operations + /// performed between the start and stop call. Call [`RenderPassEncoder::begin_pipeline_statistics_query`] to start + /// a query, then call [`RenderPassEncoder::end_pipeline_statistics_query`] to stop one. + /// + /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer. + /// The rules on how these resolve into buffers are detailed in the documentation for [`PipelineStatisticsTypes`]. + /// + /// Due to gfx-hal limitations, this is only supported on vulkan for now. + /// + /// Supported Platforms: + /// - Vulkan (works) + /// - DX12 (future) + /// + /// This is a web and native feature. + const PIPELINE_STATISTICS_QUERY = 0x0000_0000_0000_0008; /// Webgpu only allows the MAP_READ and MAP_WRITE buffer usage to be matched with /// COPY_DST and COPY_SRC respectively. This removes this requirement. /// @@ -401,7 +438,7 @@ pub struct Limits { /// - DX12: 256 bytes /// - Metal: 4096 bytes /// - DX11 & OpenGL don't natively support push constants, and are emulated with uniforms, - /// so this number is less useful. + /// so this number is less useful but likely 256. pub max_push_constant_size: u32, } @@ -2512,3 +2549,75 @@ pub enum SamplerBorderColor { OpaqueBlack, OpaqueWhite, } + +/// Describes how to create a QuerySet. +#[derive(Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub struct QuerySetDescriptor { + /// Kind of query that this query set should contain. + pub ty: QueryType, + /// Total count of queries the set contains. Must not be zero. + /// Must not be greater than [`QUERY_SET_MAX_QUERIES`]. + pub count: u32, +} + +/// Type of query contained in a QuerySet. +#[derive(Copy, Clone, Debug)] +#[cfg_attr(feature = "trace", derive(serde::Serialize))] +#[cfg_attr(feature = "replay", derive(serde::Deserialize))] +pub enum QueryType { + /// Query returns up to 5 64-bit numbers based on the given flags. + /// + /// See [`PipelineStatisticsTypes`]'s documentation for more information + /// on how they get resolved. + /// + /// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled to use this query type. + PipelineStatistics(PipelineStatisticsTypes), + /// Query returns a 64-bit number indicating the GPU-timestamp + /// where all previous commands have finished executing. + /// + /// Must be multiplied by [`Device::get_timestamp_period`] to get + /// the value in nanoseconds. Absolute values have no meaning, + /// but timestamps can be subtracted to get the time it takes + /// for a string of operations to complete. + /// + /// [`Features::TIMESTAMP_QUERY`] must be enabled to use this query type. + Timestamp, +} + +bitflags::bitflags! { + /// Flags for which pipeline data should be recorded. + /// + /// The amount of values written when resolved depends + /// on the amount of flags. If 3 flags are enabled, 3 + /// 64-bit values will be writen per-query. + /// + /// The order they are written is the order they are declared + /// in this bitflags. If you enabled `CLIPPER_PRIMITIVES_OUT` + /// and `COMPUTE_SHADER_INVOCATIONS`, it would write 16 bytes, + /// the first 8 bytes being the primative out value, the last 8 + /// bytes being the compute shader invocation count. + #[repr(transparent)] + #[cfg_attr(feature = "trace", derive(Serialize))] + #[cfg_attr(feature = "replay", derive(Deserialize))] + pub struct PipelineStatisticsTypes : u8 { + /// Amount of times the vertex shader is ran. Accounts for + /// the vertex cache when doing indexed rendering. + const VERTEX_SHADER_INVOCATIONS = 0x01; + /// Amount of times the clipper is invoked. This + /// is also the amount of triangles output by the vertex shader. + const CLIPPER_INVOCATIONS = 0x02; + /// Amount of primitives that are not culled by the clipper. + /// This is the amount of triangles that are actually on screen + /// and will be rasterized and rendered. + const CLIPPER_PRIMITIVES_OUT = 0x04; + /// Amount of times the fragment shader is ran. Accounts for + /// fragment shaders running in 2x2 blocks in order to get + /// derivatives. + const FRAGMENT_SHADER_INVOCATIONS = 0x08; + /// Amount of times a compute shader is invoked. This will + /// be equivilent to the dispatch count times the workgroup size. + const COMPUTE_SHADER_INVOCATIONS = 0x10; + } +}