From a22037ced05cc9747b39ff630b09582f2b06f1da Mon Sep 17 00:00:00 2001
From: Connor Fitzgerald <connorwadefitzgerald@gmail.com>
Date: Sun, 3 Jan 2021 02:33:29 -0500
Subject: [PATCH] Add pipeline statistics and timeline queries

---
 player/src/lib.rs                |  32 +++
 wgpu-core/src/command/bundle.rs  |   6 +
 wgpu-core/src/command/compute.rs | 121 ++++++++-
 wgpu-core/src/command/draw.rs    |  11 +
 wgpu-core/src/command/mod.rs     |  10 +
 wgpu-core/src/command/query.rs   | 424 +++++++++++++++++++++++++++++++
 wgpu-core/src/command/render.rs  | 135 +++++++++-
 wgpu-core/src/conv.rs            |  37 +++
 wgpu-core/src/device/life.rs     |  31 +++
 wgpu-core/src/device/mod.rs      | 132 ++++++++++
 wgpu-core/src/device/trace.rs    |  16 ++
 wgpu-core/src/hub.rs             |  20 ++
 wgpu-core/src/id.rs              |   1 +
 wgpu-core/src/instance.rs        |  36 ++-
 wgpu-core/src/lib.rs             |   1 +
 wgpu-core/src/resource.rs        |  36 +++
 wgpu-core/src/track/mod.rs       |   5 +
 wgpu-types/src/lib.rs            | 111 +++++++-
 18 files changed, 1153 insertions(+), 12 deletions(-)
 create mode 100644 wgpu-core/src/command/query.rs
diff --git a/player/src/lib.rs b/player/src/lib.rs
index 4ee3cc4b5..91bfe15ce 100644
--- a/player/src/lib.rs
+++ b/player/src/lib.rs
@@ -81,6 +81,28 @@ impl GlobalPlay for wgc::hub::Global<IdentityPassThroughFactory> {
                 trace::Command::CopyTextureToTexture { src, dst, size } => self
                     .command_encoder_copy_texture_to_texture::<B>(encoder, &src, &dst, &size)
                     .unwrap(),
+                trace::Command::WriteTimestamp {
+                    query_set_id,
+                    query_index,
+                } => self
+                    .command_encoder_write_timestamp::<B>(encoder, query_set_id, query_index)
+                    .unwrap(),
+                trace::Command::ResolveQuerySet {
+                    query_set_id,
+                    start_query,
+                    query_count,
+                    destination,
+                    destination_offset,
+                } => self
+                    .command_encoder_resolve_query_set::<B>(
+                        encoder,
+                        query_set_id,
+                        start_query,
+                        query_count,
+                        destination,
+                        destination_offset,
+                    )
+                    .unwrap(),
                 trace::Command::RunComputePass { base } => {
                     self.command_encoder_run_compute_pass_impl::<B>(encoder, base.as_ref())
                         .unwrap();
@@ -267,6 +289,16 @@ impl GlobalPlay for wgc::hub::Global<IdentityPassThroughFactory> {
             A::DestroyRenderBundle(id) => {
                 self.render_bundle_drop::<B>(id);
             }
+            A::CreateQuerySet { id, desc } => {
+                self.device_maintain_ids::<B>(device).unwrap();
+                let (_, error) = self.device_create_query_set::<B>(device, &desc, id);
+                if let Some(e) = error {
+                    panic!("{:?}", e);
+                }
+            }
+            A::DestroyQuerySet(id) => {
+                self.query_set_drop::<B>(id);
+            }
             A::WriteBuffer {
                 id,
                 data,
diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs
index 6481e7bd4..e8ba2b543 100644
--- a/wgpu-core/src/command/bundle.rs
+++ b/wgpu-core/src/command/bundle.rs
@@ -430,6 +430,9 @@ impl RenderBundleEncoder {
                 RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(),
                 RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(),
                 RenderCommand::PopDebugGroup => unimplemented!(),
+                RenderCommand::WriteTimestamp { .. }
+                | RenderCommand::BeginPipelineStatisticsQuery { .. }
+                | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(),
                 RenderCommand::ExecuteBundle(_)
                 | RenderCommand::SetBlendColor(_)
                 | RenderCommand::SetStencilReference(_)
@@ -693,6 +696,9 @@ impl RenderBundle {
                 RenderCommand::PushDebugGroup { color: _, len: _ } => unimplemented!(),
                 RenderCommand::InsertDebugMarker { color: _, len: _ } => unimplemented!(),
                 RenderCommand::PopDebugGroup => unimplemented!(),
+                RenderCommand::WriteTimestamp { .. }
+                | RenderCommand::BeginPipelineStatisticsQuery { .. }
+                | RenderCommand::EndPipelineStatisticsQuery => unimplemented!(),
                 RenderCommand::ExecuteBundle(_)
                 | RenderCommand::SetBlendColor(_)
                 | RenderCommand::SetStencilReference(_)
diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs
index fb1e8fddb..4450ae2cb 100644
--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@@ -6,8 +6,8 @@ use crate::{
     binding_model::{BindError, BindGroup, PushConstantUploadError},
     command::{
         bind::{Binder, LayoutChange},
-        BasePass, BasePassRef, CommandBuffer, CommandEncoderError, MapPassErr, PassErrorScope,
-        StateChange,
+        end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, CommandEncoderError,
+        MapPassErr, PassErrorScope, QueryUseError, StateChange,
     },
     hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
     id,
@@ -23,6 +23,7 @@ use hal::command::CommandBuffer as _;
 use thiserror::Error;
 use wgt::{BufferAddress, BufferUsage, ShaderStage};
 
+use crate::track::UseExtendError;
 use std::{fmt, iter, str};
 
 #[doc(hidden)]
@@ -61,6 +62,15 @@ pub enum ComputeCommand {
         color: u32,
         len: usize,
     },
+    WriteTimestamp {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    BeginPipelineStatisticsQuery {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    EndPipelineStatisticsQuery,
 }
 
 #[cfg_attr(feature = "serial-pass", derive(serde::Deserialize, serde::Serialize))]
@@ -127,6 +137,8 @@ pub enum ComputePassErrorInner {
     BindGroupIndexOutOfRange { index: u8, max: u32 },
     #[error("compute pipeline {0:?} is invalid")]
     InvalidPipeline(id::ComputePipelineId),
+    #[error("QuerySet {0:?} is invalid")]
+    InvalidQuerySet(id::QuerySetId),
     #[error("indirect buffer {0:?} is invalid or destroyed")]
     InvalidIndirectBuffer(id::BufferId),
     #[error(transparent)]
@@ -141,6 +153,8 @@ pub enum ComputePassErrorInner {
     Bind(#[from] BindError),
     #[error(transparent)]
     PushConstants(#[from] PushConstantUploadError),
+    #[error(transparent)]
+    QueryUse(#[from] QueryUseError),
 }
 
 /// Error encountered when performing a compute pass.
@@ -260,6 +274,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token);
         let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
         let (pipeline_guard, mut token) = hub.compute_pipelines.read(&mut token);
+        let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
         let (buffer_guard, mut token) = hub.buffers.read(&mut token);
         let (texture_guard, _) = hub.textures.read(&mut token);
 
@@ -272,6 +287,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
         let mut temp_offsets = Vec::new();
         let mut dynamic_offset_count = 0;
         let mut string_offset = 0;
+        let mut active_query = None;
 
         for command in base.commands {
             match *command {
@@ -525,6 +541,62 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                     string_offset += len;
                     unsafe { raw.insert_debug_marker(label, color) }
                 }
+                ComputeCommand::WriteTimestamp {
+                    query_set_id,
+                    query_index,
+                } => {
+                    let scope = PassErrorScope::WriteTimestamp;
+
+                    let query_set = cmd_buf
+                        .trackers
+                        .query_sets
+                        .use_extend(&*query_set_guard, query_set_id, (), ())
+                        .map_err(|e| match e {
+                            UseExtendError::InvalidResource => {
+                                ComputePassErrorInner::InvalidQuerySet(query_set_id)
+                            }
+                            _ => unreachable!(),
+                        })
+                        .map_pass_err(scope)?;
+
+                    query_set
+                        .validate_and_write_timestamp(raw, query_set_id, query_index, None)
+                        .map_pass_err(scope)?;
+                }
+                ComputeCommand::BeginPipelineStatisticsQuery {
+                    query_set_id,
+                    query_index,
+                } => {
+                    let scope = PassErrorScope::BeginPipelineStatisticsQuery;
+
+                    let query_set = cmd_buf
+                        .trackers
+                        .query_sets
+                        .use_extend(&*query_set_guard, query_set_id, (), ())
+                        .map_err(|e| match e {
+                            UseExtendError::InvalidResource => {
+                                ComputePassErrorInner::InvalidQuerySet(query_set_id)
+                            }
+                            _ => unreachable!(),
+                        })
+                        .map_pass_err(scope)?;
+
+                    query_set
+                        .validate_and_begin_pipeline_statistics_query(
+                            raw,
+                            query_set_id,
+                            query_index,
+                            None,
+                            &mut active_query,
+                        )
+                        .map_pass_err(scope)?;
+                }
+                ComputeCommand::EndPipelineStatisticsQuery => {
+                    let scope = PassErrorScope::EndPipelineStatisticsQuery;
+
+                    end_pipeline_statistics_query(raw, &*query_set_guard, &mut active_query)
+                        .map_pass_err(scope)?;
+                }
             }
         }
 
@@ -680,4 +752,49 @@ pub mod compute_ffi {
             len: bytes.len(),
         });
     }
+
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_compute_pass_write_timestamp(
+        pass: &mut ComputePass,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    ) {
+        span!(_guard, DEBUG, "ComputePass::write_timestamp");
+
+        pass.base.commands.push(ComputeCommand::WriteTimestamp {
+            query_set_id,
+            query_index,
+        });
+    }
+
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_compute_pass_begin_pipeline_statistics_query(
+        pass: &mut ComputePass,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    ) {
+        span!(
+            _guard,
+            DEBUG,
+            "ComputePass::begin_pipeline_statistics query"
+        );
+
+        pass.base
+            .commands
+            .push(ComputeCommand::BeginPipelineStatisticsQuery {
+                query_set_id,
+                query_index,
+            });
+    }
+
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_compute_pass_end_pipeline_statistics_query(
+        pass: &mut ComputePass,
+    ) {
+        span!(_guard, DEBUG, "ComputePass::end_pipeline_statistics_query");
+
+        pass.base
+            .commands
+            .push(ComputeCommand::EndPipelineStatisticsQuery);
+    }
 }
diff --git a/wgpu-core/src/command/draw.rs b/wgpu-core/src/command/draw.rs
index 87ac116e8..1df50a83c 100644
--- a/wgpu-core/src/command/draw.rs
+++ b/wgpu-core/src/command/draw.rs
@@ -75,6 +75,8 @@ pub enum RenderCommandError {
     InvalidDynamicOffsetCount { actual: usize, expected: usize },
     #[error("render pipeline {0:?} is invalid")]
     InvalidPipeline(id::RenderPipelineId),
+    #[error("QuerySet {0:?} is invalid")]
+    InvalidQuerySet(id::QuerySetId),
     #[error("Render pipeline is incompatible with render pass")]
     IncompatiblePipeline(#[from] crate::device::RenderPassCompatibilityError),
     #[error("pipeline is not compatible with the depth-stencil read-only render pass")]
@@ -195,5 +197,14 @@ pub enum RenderCommand {
         color: u32,
         len: usize,
     },
+    WriteTimestamp {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    BeginPipelineStatisticsQuery {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    EndPipelineStatisticsQuery,
     ExecuteBundle(id::RenderBundleId),
 }
diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs
index 6a19f9378..b98ce3959 100644
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@@ -7,6 +7,7 @@ mod bind;
 mod bundle;
 mod compute;
 mod draw;
+mod query;
 mod render;
 mod transfer;
 
@@ -15,6 +16,7 @@ pub use self::allocator::CommandAllocatorError;
 pub use self::bundle::*;
 pub use self::compute::*;
 pub use self::draw::*;
+pub use self::query::*;
 pub use self::render::*;
 pub use self::transfer::*;
 
@@ -367,6 +369,14 @@ pub enum PassErrorScope {
         indirect: bool,
         pipeline: Option<id::RenderPipelineId>,
     },
+    #[error("While resetting queries after the renderpass was ran")]
+    QueryReset,
+    #[error("In a write_timestamp command")]
+    WriteTimestamp,
+    #[error("In a begin_pipeline_statistics_query command")]
+    BeginPipelineStatisticsQuery,
+    #[error("In a end_pipeline_statistics_query command")]
+    EndPipelineStatisticsQuery,
     #[error("In a execute_bundle command")]
     ExecuteBundle,
     #[error("In a dispatch command, indirect:{indirect}")]
diff --git a/wgpu-core/src/command/query.rs b/wgpu-core/src/command/query.rs
new file mode 100644
index 000000000..fe25f264c
--- /dev/null
+++ b/wgpu-core/src/command/query.rs
@@ -0,0 +1,424 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+use hal::command::CommandBuffer as _;
+
+#[cfg(feature = "trace")]
+use crate::device::trace::Command as TraceCommand;
+use crate::{
+    command::{CommandBuffer, CommandEncoderError},
+    device::all_buffer_stages,
+    hub::{GfxBackend, Global, GlobalIdentityHandlerFactory, Storage, Token},
+    id::{self, Id, TypedId},
+    resource::{BufferUse, QuerySet},
+    track::UseExtendError,
+    Epoch, FastHashMap, Index,
+};
+use std::{iter, marker::PhantomData};
+use thiserror::Error;
+use wgt::BufferAddress;
+
+#[derive(Debug)]
+pub(super) struct QueryResetMap<B: hal::Backend> {
+    map: FastHashMap<Index, (Vec<bool>, Epoch)>,
+    _phantom: PhantomData<B>,
+}
+impl<B: hal::Backend> QueryResetMap<B> {
+    pub fn new() -> Self {
+        Self {
+            map: FastHashMap::default(),
+            _phantom: PhantomData,
+        }
+    }
+
+    pub fn use_query_set(
+        &mut self,
+        id: id::QuerySetId,
+        query_set: &QuerySet<B>,
+        query: u32,
+    ) -> bool {
+        let (index, epoch, _) = id.unzip();
+        let (vec, _) = self
+            .map
+            .entry(index)
+            .or_insert_with(|| (vec![false; query_set.desc.count as usize], epoch));
+
+        std::mem::replace(&mut vec[query as usize], true)
+    }
+
+    pub fn reset_queries(
+        self,
+        cmd_buf_raw: &mut B::CommandBuffer,
+        query_set_storage: &Storage<QuerySet<B>, id::QuerySetId>,
+        backend: wgt::Backend,
+    ) -> Result<(), id::QuerySetId> {
+        for (query_set_id, (state, epoch)) in self.map.into_iter() {
+            let id = Id::zip(query_set_id, epoch, backend);
+            let query_set = query_set_storage.get(id).map_err(|_| id)?;
+
+            debug_assert_eq!(state.len(), query_set.desc.count as usize);
+
+            // Need to find all "runs" of values which need resets. If the state vector is:
+            // [false, true, true, false, true], we want to reset [1..3, 4..5]. This minimizes
+            // the amount of resets needed.
+            let mut state_iter = state.into_iter().chain(iter::once(false)).enumerate();
+            let mut run_start: Option<u32> = None;
+            while let Some((idx, value)) = state_iter.next() {
+                match (run_start, value) {
+                    // We're inside of a run, do nothing
+                    (Some(..), true) => {}
+                    // We've hit the end of a run, dispatch a reset
+                    (Some(start), false) => {
+                        run_start = None;
+                        unsafe { cmd_buf_raw.reset_query_pool(&query_set.raw, start..idx as u32) };
+                    }
+                    // We're starting a run
+                    (None, true) => {
+                        run_start = Some(idx as u32);
+                    }
+                    // We're in a run of falses, do nothing.
+                    (None, false) => {}
+                }
+            }
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialEq, Eq)]
+pub enum SimplifiedQueryType {
+    Timestamp,
+    PipelineStatistics,
+}
+impl From<wgt::QueryType> for SimplifiedQueryType {
+    fn from(q: wgt::QueryType) -> Self {
+        match q {
+            wgt::QueryType::Timestamp => SimplifiedQueryType::Timestamp,
+            wgt::QueryType::PipelineStatistics(..) => SimplifiedQueryType::PipelineStatistics,
+        }
+    }
+}
+
+/// Error encountered when dealing with queries
+#[derive(Clone, Debug, Error)]
+pub enum QueryError {
+    #[error(transparent)]
+    Encoder(#[from] CommandEncoderError),
+    #[error("Error encountered while trying to use queries")]
+    Use(#[from] QueryUseError),
+    #[error("Error encountered while trying to resolve a query")]
+    Resolve(#[from] ResolveError),
+    #[error("Buffer {0:?} is invalid or destroyed")]
+    InvalidBuffer(id::BufferId),
+    #[error("QuerySet {0:?} is invalid or destroyed")]
+    InvalidQuerySet(id::QuerySetId),
+}
+
+/// Error encountered while trying to use queries
+#[derive(Clone, Debug, Error)]
+pub enum QueryUseError {
+    #[error("Query {query_index} is out of bounds for a query set of size {query_set_size}")]
+    OutOfBounds {
+        query_index: u32,
+        query_set_size: u32,
+    },
+    #[error("Query {query_index} has already been used within the same renderpass. Queries must only be used once per renderpass")]
+    UsedTwiceInsideRenderpass { query_index: u32 },
+    #[error("Query {new_query_index} was started while query {active_query_index} was already active. No more than one statistic or occlusion query may be active at once")]
+    AlreadyStarted {
+        active_query_index: u32,
+        new_query_index: u32,
+    },
+    #[error("Query was stopped while there was no active query")]
+    AlreadyStopped,
+    #[error("A query of type {query_type:?} was started using a query set of type {set_type:?}")]
+    IncompatibleType {
+        set_type: SimplifiedQueryType,
+        query_type: SimplifiedQueryType,
+    },
+}
+
+/// Error encountered while trying to resolve a query.
+#[derive(Clone, Debug, Error)]
+pub enum ResolveError {
+    #[error("Queries can only be resolved to buffers that contain the COPY_DST usage")]
+    MissingBufferUsage,
+    #[error("Resolving queries {start_query}..{end_query} would overrun the query set of size {query_set_size}")]
+    QueryOverrun {
+        start_query: u32,
+        end_query: u32,
+        query_set_size: u32,
+    },
+    #[error("Resolving queries {start_query}..{end_query} ({stride} byte queries) will end up overruning the bounds of the destination buffer of size {buffer_size} using offsets {buffer_start_offset}..{buffer_end_offset}")]
+    BufferOverrun {
+        start_query: u32,
+        end_query: u32,
+        stride: u32,
+        buffer_size: BufferAddress,
+        buffer_start_offset: BufferAddress,
+        buffer_end_offset: BufferAddress,
+    },
+}
+
+impl<B: GfxBackend> QuerySet<B> {
+    fn validate_query(
+        &self,
+        query_set_id: id::QuerySetId,
+        query_type: SimplifiedQueryType,
+        query_index: u32,
+        reset_state: Option<&mut QueryResetMap<B>>,
+    ) -> Result<hal::query::Query<'_, B>, QueryUseError> {
+        // We need to defer our resets because we are in a renderpass, add the usage to the reset map.
+        if let Some(reset) = reset_state {
+            let used = reset.use_query_set(query_set_id, self, query_index);
+            if used {
+                return Err(QueryUseError::UsedTwiceInsideRenderpass { query_index }.into());
+            }
+        }
+
+        let simple_set_type = SimplifiedQueryType::from(self.desc.ty);
+        if simple_set_type != query_type {
+            return Err(QueryUseError::IncompatibleType {
+                query_type,
+                set_type: simple_set_type,
+            }
+            .into());
+        }
+
+        if query_index >= self.desc.count {
+            return Err(QueryUseError::OutOfBounds {
+                query_index,
+                query_set_size: self.desc.count,
+            }
+            .into());
+        }
+
+        let hal_query = hal::query::Query::<B> {
+            pool: &self.raw,
+            id: query_index,
+        };
+
+        Ok(hal_query)
+    }
+
+    pub(super) fn validate_and_write_timestamp(
+        &self,
+        cmd_buf_raw: &mut B::CommandBuffer,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+        reset_state: Option<&mut QueryResetMap<B>>,
+    ) -> Result<(), QueryUseError> {
+        let needs_reset = reset_state.is_none();
+        let hal_query = self.validate_query(
+            query_set_id,
+            SimplifiedQueryType::Timestamp,
+            query_index,
+            reset_state,
+        )?;
+
+        unsafe {
+            // If we don't have a reset state tracker which can defer resets, we must reset now.
+            if needs_reset {
+                cmd_buf_raw.reset_query_pool(&self.raw, query_index..(query_index + 1));
+            }
+            cmd_buf_raw.write_timestamp(hal::pso::PipelineStage::BOTTOM_OF_PIPE, hal_query);
+        }
+
+        Ok(())
+    }
+
+    pub(super) fn validate_and_begin_pipeline_statistics_query(
+        &self,
+        cmd_buf_raw: &mut B::CommandBuffer,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+        reset_state: Option<&mut QueryResetMap<B>>,
+        active_query: &mut Option<(id::QuerySetId, u32)>,
+    ) -> Result<(), QueryUseError> {
+        let needs_reset = reset_state.is_none();
+        let hal_query = self.validate_query(
+            query_set_id,
+            SimplifiedQueryType::PipelineStatistics,
+            query_index,
+            reset_state,
+        )?;
+
+        if let Some((_old_id, old_idx)) = active_query.replace((query_set_id, query_index)) {
+            return Err(QueryUseError::AlreadyStarted {
+                active_query_index: old_idx,
+                new_query_index: query_index,
+            }
+            .into());
+        }
+
+        unsafe {
+            // If we don't have a reset state tracker which can defer resets, we must reset now.
+            if needs_reset {
+                cmd_buf_raw.reset_query_pool(&self.raw, query_index..(query_index + 1));
+            }
+            cmd_buf_raw.begin_query(hal_query, hal::query::ControlFlags::empty());
+        }
+
+        Ok(())
+    }
+}
+
+pub(super) fn end_pipeline_statistics_query<B: GfxBackend>(
+    cmd_buf_raw: &mut B::CommandBuffer,
+    storage: &Storage<QuerySet<B>, id::QuerySetId>,
+    active_query: &mut Option<(id::QuerySetId, u32)>,
+) -> Result<(), QueryUseError> {
+    if let Some((query_set_id, query_index)) = active_query.take() {
+        // We can unwrap here as the validity was validated when the active query was set
+        let query_set = storage.get(query_set_id).unwrap();
+
+        let hal_query = hal::query::Query::<B> {
+            pool: &query_set.raw,
+            id: query_index,
+        };
+
+        unsafe { cmd_buf_raw.end_query(hal_query) }
+
+        Ok(())
+    } else {
+        Err(QueryUseError::AlreadyStopped)
+    }
+}
+
+impl<G: GlobalIdentityHandlerFactory> Global<G> {
+    pub fn command_encoder_write_timestamp<B: GfxBackend>(
+        &self,
+        command_encoder_id: id::CommandEncoderId,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    ) -> Result<(), QueryError> {
+        let hub = B::hub(self);
+        let mut token = Token::root();
+
+        let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token);
+        let (query_set_guard, _) = hub.query_sets.read(&mut token);
+
+        let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?;
+        let cmd_buf_raw = cmd_buf.raw.last_mut().unwrap();
+
+        #[cfg(feature = "trace")]
+        if let Some(ref mut list) = cmd_buf.commands {
+            list.push(TraceCommand::WriteTimestamp {
+                query_set_id,
+                query_index,
+            });
+        }
+
+        let query_set = cmd_buf
+            .trackers
+            .query_sets
+            .use_extend(&*query_set_guard, query_set_id, (), ())
+            .map_err(|e| match e {
+                UseExtendError::InvalidResource => QueryError::InvalidQuerySet(query_set_id),
+                _ => unreachable!(),
+            })?;
+
+        query_set.validate_and_write_timestamp(cmd_buf_raw, query_set_id, query_index, None)?;
+
+        Ok(())
+    }
+
+    pub fn command_encoder_resolve_query_set<B: GfxBackend>(
+        &self,
+        command_encoder_id: id::CommandEncoderId,
+        query_set_id: id::QuerySetId,
+        start_query: u32,
+        query_count: u32,
+        destination: id::BufferId,
+        destination_offset: BufferAddress,
+    ) -> Result<(), QueryError> {
+        let hub = B::hub(self);
+        let mut token = Token::root();
+
+        let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token);
+        let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
+        let (buffer_guard, _) = hub.buffers.read(&mut token);
+
+        let cmd_buf = CommandBuffer::get_encoder_mut(&mut cmd_buf_guard, command_encoder_id)?;
+        let cmd_buf_raw = cmd_buf.raw.last_mut().unwrap();
+
+        #[cfg(feature = "trace")]
+        if let Some(ref mut list) = cmd_buf.commands {
+            list.push(TraceCommand::ResolveQuerySet {
+                query_set_id,
+                start_query,
+                query_count,
+                destination,
+                destination_offset,
+            });
+        }
+
+        let query_set = cmd_buf
+            .trackers
+            .query_sets
+            .use_extend(&*query_set_guard, query_set_id, (), ())
+            .map_err(|e| match e {
+                UseExtendError::InvalidResource => QueryError::InvalidQuerySet(query_set_id),
+                _ => unreachable!(),
+            })?;
+
+        let (dst_buffer, dst_pending) = cmd_buf
+            .trackers
+            .buffers
+            .use_replace(&*buffer_guard, destination, (), BufferUse::COPY_DST)
+            .map_err(QueryError::InvalidBuffer)?;
+        let dst_barrier = dst_pending.map(|pending| pending.into_hal(dst_buffer));
+
+        if !dst_buffer.usage.contains(wgt::BufferUsage::COPY_DST) {
+            return Err(ResolveError::MissingBufferUsage.into());
+        }
+
+        let end_query = start_query + query_count;
+        if end_query > query_set.desc.count {
+            return Err(ResolveError::QueryOverrun {
+                start_query,
+                end_query,
+                query_set_size: query_set.desc.count,
+            }
+            .into());
+        }
+
+        let stride = query_set.elements * wgt::QUERY_SIZE;
+        let bytes_used = (stride * query_count) as BufferAddress;
+
+        let buffer_start_offset = destination_offset;
+        let buffer_end_offset = buffer_start_offset + bytes_used;
+
+        if buffer_end_offset > dst_buffer.size {
+            return Err(ResolveError::BufferOverrun {
+                start_query,
+                end_query,
+                stride,
+                buffer_size: dst_buffer.size,
+                buffer_start_offset,
+                buffer_end_offset,
+            }
+            .into());
+        }
+
+        unsafe {
+            cmd_buf_raw.pipeline_barrier(
+                all_buffer_stages()..hal::pso::PipelineStage::TRANSFER,
+                hal::memory::Dependencies::empty(),
+                dst_barrier,
+            );
+            cmd_buf_raw.copy_query_pool_results(
+                &query_set.raw,
+                start_query..end_query,
+                &dst_buffer.raw.as_ref().unwrap().0,
+                destination_offset,
+                stride,
+                hal::query::ResultFlags::WAIT | hal::query::ResultFlags::BITS_64,
+            );
+        }
+
+        Ok(())
+    }
+}
diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs
index f342cf6b8..6dc742df4 100644
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@@ -6,8 +6,9 @@ use crate::{
     binding_model::BindError,
     command::{
         bind::{Binder, LayoutChange},
-        BasePass, BasePassRef, CommandBuffer, CommandEncoderError, DrawError, ExecutionError,
-        MapPassErr, PassErrorScope, RenderCommand, RenderCommandError, StateChange,
+        end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, CommandEncoderError,
+        DrawError, ExecutionError, MapPassErr, PassErrorScope, QueryResetMap, QueryUseError,
+        RenderCommand, RenderCommandError, StateChange,
     },
     conv,
     device::{
@@ -38,6 +39,7 @@ use serde::Deserialize;
 #[cfg(any(feature = "serial-pass", feature = "trace"))]
 use serde::Serialize;
 
+use crate::track::UseExtendError;
 use std::{
     borrow::{Borrow, Cow},
     collections::hash_map::Entry,
@@ -441,6 +443,8 @@ pub enum RenderPassErrorInner {
     Draw(#[from] DrawError),
     #[error(transparent)]
     Bind(#[from] BindError),
+    #[error(transparent)]
+    QueryUse(#[from] QueryUseError),
 }
 
 impl From<MissingBufferUsageError> for RenderPassErrorInner {
@@ -1019,7 +1023,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
 
         let (device_guard, mut token) = hub.devices.read(&mut token);
 
-        let (cmd_buf_raw, trackers, used_swapchain) = {
+        let (cmd_buf_raw, trackers, used_swapchain, query_reset_state) = {
             // read-only lock guard
             let (cmb_guard, mut token) = hub.command_buffers.read(&mut token);
 
@@ -1039,6 +1043,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let (pipeline_layout_guard, mut token) = hub.pipeline_layouts.read(&mut token);
             let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
             let (pipeline_guard, mut token) = hub.render_pipelines.read(&mut token);
+            let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
             let (buffer_guard, mut token) = hub.buffers.read(&mut token);
             let (texture_guard, mut token) = hub.textures.read(&mut token);
             let (view_guard, _) = hub.texture_views.read(&mut token);
@@ -1071,6 +1076,8 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             let mut temp_offsets = Vec::new();
             let mut dynamic_offset_count = 0;
             let mut string_offset = 0;
+            let mut active_query = None;
+            let mut query_reset_state = QueryResetMap::new();
 
             for command in base.commands {
                 match *command {
@@ -1734,6 +1741,71 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
                             raw.insert_debug_marker(label, color);
                         }
                     }
+                    RenderCommand::WriteTimestamp {
+                        query_set_id,
+                        query_index,
+                    } => {
+                        let scope = PassErrorScope::WriteTimestamp;
+
+                        let query_set = info
+                            .trackers
+                            .query_sets
+                            .use_extend(&*query_set_guard, query_set_id, (), ())
+                            .map_err(|e| match e {
+                                UseExtendError::InvalidResource => {
+                                    RenderCommandError::InvalidQuerySet(query_set_id)
+                                }
+                                _ => unreachable!(),
+                            })
+                            .map_pass_err(scope)?;
+
+                        query_set
+                            .validate_and_write_timestamp(
+                                &mut raw,
+                                query_set_id,
+                                query_index,
+                                Some(&mut query_reset_state),
+                            )
+                            .map_pass_err(scope)?;
+                    }
+                    RenderCommand::BeginPipelineStatisticsQuery {
+                        query_set_id,
+                        query_index,
+                    } => {
+                        let scope = PassErrorScope::BeginPipelineStatisticsQuery;
+
+                        let query_set = info
+                            .trackers
+                            .query_sets
+                            .use_extend(&*query_set_guard, query_set_id, (), ())
+                            .map_err(|e| match e {
+                                UseExtendError::InvalidResource => {
+                                    RenderCommandError::InvalidQuerySet(query_set_id)
+                                }
+                                _ => unreachable!(),
+                            })
+                            .map_pass_err(scope)?;
+
+                        query_set
+                            .validate_and_begin_pipeline_statistics_query(
+                                &mut raw,
+                                query_set_id,
+                                query_index,
+                                Some(&mut query_reset_state),
+                                &mut active_query,
+                            )
+                            .map_pass_err(scope)?;
+                    }
+                    RenderCommand::EndPipelineStatisticsQuery => {
+                        let scope = PassErrorScope::EndPipelineStatisticsQuery;
+
+                        end_pipeline_statistics_query(
+                            &mut raw,
+                            &*query_set_guard,
+                            &mut active_query,
+                        )
+                        .map_pass_err(scope)?;
+                    }
                     RenderCommand::ExecuteBundle(bundle_id) => {
                         let scope = PassErrorScope::ExecuteBundle;
                         let bundle = info
@@ -1778,10 +1850,11 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             }
 
             let (trackers, used_swapchain) = info.finish(&*texture_guard).map_pass_err(scope)?;
-            (raw, trackers, used_swapchain)
+            (raw, trackers, used_swapchain, query_reset_state)
         };
 
         let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token);
+        let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
         let (buffer_guard, mut token) = hub.buffers.read(&mut token);
         let (texture_guard, _) = hub.textures.read(&mut token);
         let cmd_buf =
@@ -1798,15 +1871,26 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             });
         }
 
+        let last_cmd_buf = cmd_buf.raw.last_mut().unwrap();
+
+        query_reset_state
+            .reset_queries(
+                last_cmd_buf,
+                &query_set_guard,
+                cmd_buf.device_id.value.0.backend(),
+            )
+            .map_err(RenderCommandError::InvalidQuerySet)
+            .map_pass_err(PassErrorScope::QueryReset)?;
+
         super::CommandBuffer::insert_barriers(
-            cmd_buf.raw.last_mut().unwrap(),
+            last_cmd_buf,
             &mut cmd_buf.trackers,
             &trackers,
             &*buffer_guard,
             &*texture_guard,
         );
         unsafe {
-            cmd_buf.raw.last_mut().unwrap().finish();
+            last_cmd_buf.finish();
         }
         cmd_buf.raw.push(cmd_buf_raw);
 
@@ -2148,6 +2232,45 @@ pub mod render_ffi {
         });
     }
 
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_render_pass_write_timestamp(
+        pass: &mut RenderPass,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    ) {
+        span!(_guard, DEBUG, "RenderPass::write_timestamp");
+
+        pass.base.commands.push(RenderCommand::WriteTimestamp {
+            query_set_id,
+            query_index,
+        });
+    }
+
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_render_pass_begin_pipeline_statistics_query(
+        pass: &mut RenderPass,
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    ) {
+        span!(_guard, DEBUG, "RenderPass::begin_pipeline_statistics query");
+
+        pass.base
+            .commands
+            .push(RenderCommand::BeginPipelineStatisticsQuery {
+                query_set_id,
+                query_index,
+            });
+    }
+
+    #[no_mangle]
+    pub unsafe extern "C" fn wgpu_render_pass_end_pipeline_statistics_query(pass: &mut RenderPass) {
+        span!(_guard, DEBUG, "RenderPass::end_pipeline_statistics_query");
+
+        pass.base
+            .commands
+            .push(RenderCommand::EndPipelineStatisticsQuery);
+    }
+
     #[no_mangle]
     pub unsafe fn wgpu_render_pass_execute_bundles(
         pass: &mut RenderPass,
diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs
index 121740737..22ba707ea 100644
--- a/wgpu-core/src/conv.rs
+++ b/wgpu-core/src/conv.rs
@@ -677,6 +677,43 @@ pub(crate) fn map_texture_state(
     (access, layout)
 }
 
+pub fn map_query_type(ty: &wgt::QueryType) -> (hal::query::Type, u32) {
+    match ty {
+        wgt::QueryType::PipelineStatistics(pipeline_statistics) => {
+            let mut ps = hal::query::PipelineStatistic::empty();
+            ps.set(
+                hal::query::PipelineStatistic::VERTEX_SHADER_INVOCATIONS,
+                pipeline_statistics
+                    .contains(wgt::PipelineStatisticsTypes::VERTEX_SHADER_INVOCATIONS),
+            );
+            ps.set(
+                hal::query::PipelineStatistic::CLIPPING_INVOCATIONS,
+                pipeline_statistics.contains(wgt::PipelineStatisticsTypes::CLIPPER_INVOCATIONS),
+            );
+            ps.set(
+                hal::query::PipelineStatistic::CLIPPING_PRIMITIVES,
+                pipeline_statistics.contains(wgt::PipelineStatisticsTypes::CLIPPER_PRIMITIVES_OUT),
+            );
+            ps.set(
+                hal::query::PipelineStatistic::FRAGMENT_SHADER_INVOCATIONS,
+                pipeline_statistics
+                    .contains(wgt::PipelineStatisticsTypes::FRAGMENT_SHADER_INVOCATIONS),
+            );
+            ps.set(
+                hal::query::PipelineStatistic::COMPUTE_SHADER_INVOCATIONS,
+                pipeline_statistics
+                    .contains(wgt::PipelineStatisticsTypes::COMPUTE_SHADER_INVOCATIONS),
+            );
+
+            (
+                hal::query::Type::PipelineStatistics(ps),
+                pipeline_statistics.bits().count_ones(),
+            )
+        }
+        wgt::QueryType::Timestamp => (hal::query::Type::Timestamp, 1),
+    }
+}
+
 pub fn map_load_store_ops<V>(channel: &PassChannel<V>) -> hal::pass::AttachmentOps {
     hal::pass::AttachmentOps {
         load: match channel.load_op {
diff --git a/wgpu-core/src/device/life.rs b/wgpu-core/src/device/life.rs
index 594ea4eda..3541043a3 100644
--- a/wgpu-core/src/device/life.rs
+++ b/wgpu-core/src/device/life.rs
@@ -39,6 +39,7 @@ pub struct SuspectedResources {
     pub(crate) bind_group_layouts: Vec<id::Valid<id::BindGroupLayoutId>>,
     pub(crate) pipeline_layouts: Vec<Stored<id::PipelineLayoutId>>,
     pub(crate) render_bundles: Vec<id::Valid<id::RenderBundleId>>,
+    pub(crate) query_sets: Vec<id::Valid<id::QuerySetId>>,
 }
 
 impl SuspectedResources {
@@ -53,6 +54,7 @@ impl SuspectedResources {
         self.bind_group_layouts.clear();
         self.pipeline_layouts.clear();
         self.render_bundles.clear();
+        self.query_sets.clear();
     }
 
     pub(crate) fn extend(&mut self, other: &Self) {
@@ -70,6 +72,7 @@ impl SuspectedResources {
         self.pipeline_layouts
             .extend_from_slice(&other.pipeline_layouts);
         self.render_bundles.extend_from_slice(&other.render_bundles);
+        self.query_sets.extend_from_slice(&other.query_sets);
     }
 
     pub(crate) fn add_trackers(&mut self, trackers: &TrackerSet) {
@@ -81,6 +84,7 @@ impl SuspectedResources {
         self.compute_pipelines.extend(trackers.compute_pipes.used());
         self.render_pipelines.extend(trackers.render_pipes.used());
         self.render_bundles.extend(trackers.bundles.used());
+        self.query_sets.extend(trackers.query_sets.used());
     }
 }
 
@@ -99,6 +103,7 @@ struct NonReferencedResources<B: hal::Backend> {
     graphics_pipes: Vec<B::GraphicsPipeline>,
     descriptor_set_layouts: Vec<B::DescriptorSetLayout>,
     pipeline_layouts: Vec<B::PipelineLayout>,
+    query_sets: Vec<B::QueryPool>,
 }
 
 impl<B: hal::Backend> NonReferencedResources<B> {
@@ -114,6 +119,7 @@ impl<B: hal::Backend> NonReferencedResources<B> {
             graphics_pipes: Vec::new(),
             descriptor_set_layouts: Vec::new(),
             pipeline_layouts: Vec::new(),
+            query_sets: Vec::new(),
         }
     }
 
@@ -126,6 +132,7 @@ impl<B: hal::Backend> NonReferencedResources<B> {
         self.desc_sets.extend(other.desc_sets);
         self.compute_pipes.extend(other.compute_pipes);
         self.graphics_pipes.extend(other.graphics_pipes);
+        self.query_sets.extend(other.query_sets);
         assert!(other.descriptor_set_layouts.is_empty());
         assert!(other.pipeline_layouts.is_empty());
     }
@@ -178,6 +185,9 @@ impl<B: hal::Backend> NonReferencedResources<B> {
         for raw in self.pipeline_layouts.drain(..) {
             device.destroy_pipeline_layout(raw);
         }
+        for raw in self.query_sets.drain(..) {
+            device.destroy_query_pool(raw);
+        }
     }
 }
 
@@ -604,6 +614,27 @@ impl<B: GfxBackend> LifetimeTracker<B> {
                 }
             }
         }
+
+        if !self.suspected_resources.query_sets.is_empty() {
+            let (mut guard, _) = hub.query_sets.write(token);
+            let mut trackers = trackers.lock();
+
+            for id in self.suspected_resources.query_sets.drain(..) {
+                if trackers.query_sets.remove_abandoned(id) {
+                    // #[cfg(feature = "trace")]
+                    // trace.map(|t| t.lock().add(trace::Action::DestroyComputePipeline(id.0)));
+                    if let Some(res) = hub.query_sets.unregister_locked(id.0, &mut *guard) {
+                        let submit_index = res.life_guard.submission_index.load(Ordering::Acquire);
+                        self.active
+                            .iter_mut()
+                            .find(|a| a.index == submit_index)
+                            .map_or(&mut self.free_resources, |a| &mut a.last_resources)
+                            .query_sets
+                            .push(res.raw);
+                    }
+                }
+            }
+        }
     }
 
     pub(crate) fn triage_mapped<G: GlobalIdentityHandlerFactory>(
diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs
index a43f336ac..827b26be3 100644
--- a/wgpu-core/src/device/mod.rs
+++ b/wgpu-core/src/device/mod.rs
@@ -401,6 +401,7 @@ impl<B: GfxBackend> Device<B> {
             let (bind_group_guard, mut token) = hub.bind_groups.read(&mut token);
             let (compute_pipe_guard, mut token) = hub.compute_pipelines.read(&mut token);
             let (render_pipe_guard, mut token) = hub.render_pipelines.read(&mut token);
+            let (query_set_guard, mut token) = hub.query_sets.read(&mut token);
             let (buffer_guard, mut token) = hub.buffers.read(&mut token);
             let (texture_guard, mut token) = hub.textures.read(&mut token);
             let (texture_view_guard, mut token) = hub.texture_views.read(&mut token);
@@ -441,6 +442,11 @@ impl<B: GfxBackend> Device<B> {
                     self.temp_suspected.render_pipelines.push(id);
                 }
             }
+            for id in trackers.query_sets.used() {
+                if query_set_guard[id].life_guard.ref_count.is_none() {
+                    self.temp_suspected.query_sets.push(id);
+                }
+            }
         }
 
         self.lock_life(&mut token)
@@ -3673,6 +3679,132 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             .push(id::Valid(render_bundle_id));
     }
 
+    pub fn device_create_query_set<B: GfxBackend>(
+        &self,
+        device_id: id::DeviceId,
+        desc: &wgt::QuerySetDescriptor,
+        id_in: Input<G, id::QuerySetId>,
+    ) -> (id::QuerySetId, Option<resource::CreateQuerySetError>) {
+        span!(_guard, INFO, "Device::create_query_set");
+
+        let hub = B::hub(self);
+        let mut token = Token::root();
+
+        let (device_guard, mut token) = hub.devices.read(&mut token);
+
+        let error = loop {
+            let device = match device_guard.get(device_id) {
+                Ok(device) => device,
+                Err(_) => break DeviceError::Invalid.into(),
+            };
+
+            match desc.ty {
+                wgt::QueryType::Timestamp => {
+                    if !device.features.contains(wgt::Features::TIMESTAMP_QUERY) {
+                        break resource::CreateQuerySetError::MissingFeature(
+                            wgt::Features::TIMESTAMP_QUERY,
+                        );
+                    }
+                }
+                wgt::QueryType::PipelineStatistics(..) => {
+                    if !device
+                        .features
+                        .contains(wgt::Features::PIPELINE_STATISTICS_QUERY)
+                    {
+                        break resource::CreateQuerySetError::MissingFeature(
+                            wgt::Features::PIPELINE_STATISTICS_QUERY,
+                        );
+                    }
+                }
+            }
+
+            if desc.count == 0 {
+                break resource::CreateQuerySetError::ZeroCount;
+            }
+
+            if desc.count >= wgt::QUERY_SET_MAX_QUERIES {
+                break resource::CreateQuerySetError::TooManyQueries {
+                    count: desc.count,
+                    maximum: wgt::QUERY_SET_MAX_QUERIES,
+                };
+            }
+
+            let query_set = {
+                let (hal_type, elements) = conv::map_query_type(&desc.ty);
+
+                resource::QuerySet {
+                    raw: unsafe { device.raw.create_query_pool(hal_type, desc.count).unwrap() },
+                    device_id: Stored {
+                        value: id::Valid(device_id),
+                        ref_count: device.life_guard.add_ref(),
+                    },
+                    life_guard: LifeGuard::new(""),
+                    desc: desc.clone(),
+                    elements,
+                }
+            };
+
+            let ref_count = query_set.life_guard.add_ref();
+
+            let id = hub
+                .query_sets
+                .register_identity(id_in, query_set, &mut token);
+            #[cfg(feature = "trace")]
+            match device.trace {
+                Some(ref trace) => trace.lock().add(trace::Action::CreateQuerySet {
+                    id: id.0,
+                    desc: desc.clone(),
+                }),
+                None => (),
+            };
+
+            device
+                .trackers
+                .lock()
+                .query_sets
+                .init(id, ref_count, PhantomData)
+                .unwrap();
+
+            return (id.0, None);
+        };
+
+        let id = B::hub(self)
+            .query_sets
+            .register_error(id_in, "", &mut token);
+        (id, Some(error))
+    }
+
+    pub fn query_set_drop<B: GfxBackend>(&self, query_set_id: id::QuerySetId) {
+        span!(_guard, INFO, "QuerySet::drop");
+
+        let hub = B::hub(self);
+        let mut token = Token::root();
+
+        let device_id = {
+            let (mut query_set_guard, _) = hub.query_sets.write(&mut token);
+            let query_set = query_set_guard.get_mut(query_set_id).unwrap();
+            query_set.life_guard.ref_count.take();
+            query_set.device_id.value
+        };
+
+        let (device_guard, mut token) = hub.devices.read(&mut token);
+        let device = &device_guard[device_id];
+
+        #[cfg(feature = "trace")]
+        match device.trace {
+            Some(ref trace) => trace
+                .lock()
+                .add(trace::Action::DestroyQuerySet(query_set_id)),
+            None => (),
+        };
+
+        device
+            .lock_life(&mut token)
+            .suspected_resources
+            .query_sets
+            .push(id::Valid(query_set_id));
+    }
+
     pub fn device_create_render_pipeline<B: GfxBackend>(
         &self,
         device_id: id::DeviceId,
diff --git a/wgpu-core/src/device/trace.rs b/wgpu-core/src/device/trace.rs
index 2b2924ed3..48207aa30 100644
--- a/wgpu-core/src/device/trace.rs
+++ b/wgpu-core/src/device/trace.rs
@@ -91,6 +91,11 @@ pub enum Action<'a> {
         base: crate::command::BasePass<crate::command::RenderCommand>,
     },
     DestroyRenderBundle(id::RenderBundleId),
+    CreateQuerySet {
+        id: id::QuerySetId,
+        desc: wgt::QuerySetDescriptor,
+    },
+    DestroyQuerySet(id::QuerySetId),
     WriteBuffer {
         id: id::BufferId,
         data: FileName,
@@ -132,6 +137,17 @@ pub enum Command {
         dst: crate::command::TextureCopyView,
         size: wgt::Extent3d,
     },
+    WriteTimestamp {
+        query_set_id: id::QuerySetId,
+        query_index: u32,
+    },
+    ResolveQuerySet {
+        query_set_id: id::QuerySetId,
+        start_query: u32,
+        query_count: u32,
+        destination: id::BufferId,
+        destination_offset: wgt::BufferAddress,
+    },
     RunComputePass {
         base: crate::command::BasePass<crate::command::ComputeCommand>,
     },
diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs
index 0e95c3b23..9ec417cdd 100644
--- a/wgpu-core/src/hub.rs
+++ b/wgpu-core/src/hub.rs
@@ -23,6 +23,8 @@ use crate::{
 use parking_lot::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
 use wgt::Backend;
 
+use crate::id::QuerySetId;
+use crate::resource::QuerySet;
 #[cfg(debug_assertions)]
 use std::cell::Cell;
 use std::{fmt::Debug, marker::PhantomData, ops, thread};
@@ -264,6 +266,11 @@ impl<B: hal::Backend> Access<ComputePipeline<B>> for BindGroup<B> {}
 impl<B: hal::Backend> Access<RenderPipeline<B>> for Device<B> {}
 impl<B: hal::Backend> Access<RenderPipeline<B>> for BindGroup<B> {}
 impl<B: hal::Backend> Access<RenderPipeline<B>> for ComputePipeline<B> {}
+impl<B: hal::Backend> Access<QuerySet<B>> for Root {}
+impl<B: hal::Backend> Access<QuerySet<B>> for Device<B> {}
+impl<B: hal::Backend> Access<QuerySet<B>> for CommandBuffer<B> {}
+impl<B: hal::Backend> Access<QuerySet<B>> for RenderPipeline<B> {}
+impl<B: hal::Backend> Access<QuerySet<B>> for ComputePipeline<B> {}
 impl<B: hal::Backend> Access<ShaderModule<B>> for Device<B> {}
 impl<B: hal::Backend> Access<ShaderModule<B>> for BindGroupLayout<B> {}
 impl<B: hal::Backend> Access<Buffer<B>> for Root {}
@@ -273,6 +280,7 @@ impl<B: hal::Backend> Access<Buffer<B>> for BindGroup<B> {}
 impl<B: hal::Backend> Access<Buffer<B>> for CommandBuffer<B> {}
 impl<B: hal::Backend> Access<Buffer<B>> for ComputePipeline<B> {}
 impl<B: hal::Backend> Access<Buffer<B>> for RenderPipeline<B> {}
+impl<B: hal::Backend> Access<Buffer<B>> for QuerySet<B> {}
 impl<B: hal::Backend> Access<Texture<B>> for Root {}
 impl<B: hal::Backend> Access<Texture<B>> for Device<B> {}
 impl<B: hal::Backend> Access<Texture<B>> for Buffer<B> {}
@@ -374,6 +382,7 @@ pub trait GlobalIdentityHandlerFactory:
     + IdentityHandlerFactory<RenderBundleId>
     + IdentityHandlerFactory<RenderPipelineId>
     + IdentityHandlerFactory<ComputePipelineId>
+    + IdentityHandlerFactory<QuerySetId>
     + IdentityHandlerFactory<BufferId>
     + IdentityHandlerFactory<TextureId>
     + IdentityHandlerFactory<TextureViewId>
@@ -547,6 +556,7 @@ pub struct Hub<B: hal::Backend, F: GlobalIdentityHandlerFactory> {
     pub render_bundles: Registry<RenderBundle, RenderBundleId, F>,
     pub render_pipelines: Registry<RenderPipeline<B>, RenderPipelineId, F>,
     pub compute_pipelines: Registry<ComputePipeline<B>, ComputePipelineId, F>,
+    pub query_sets: Registry<QuerySet<B>, QuerySetId, F>,
     pub buffers: Registry<Buffer<B>, BufferId, F>,
     pub textures: Registry<Texture<B>, TextureId, F>,
     pub texture_views: Registry<TextureView<B>, TextureViewId, F>,
@@ -567,6 +577,7 @@ impl<B: GfxBackend, F: GlobalIdentityHandlerFactory> Hub<B, F> {
             render_bundles: Registry::new(B::VARIANT, factory),
             render_pipelines: Registry::new(B::VARIANT, factory),
             compute_pipelines: Registry::new(B::VARIANT, factory),
+            query_sets: Registry::new(B::VARIANT, factory),
             buffers: Registry::new(B::VARIANT, factory),
             textures: Registry::new(B::VARIANT, factory),
             texture_views: Registry::new(B::VARIANT, factory),
@@ -698,6 +709,15 @@ impl<B: GfxBackend, F: GlobalIdentityHandlerFactory> Hub<B, F> {
             }
         }
 
+        for element in self.query_sets.data.write().map.drain(..) {
+            if let Element::Occupied(query_set, _) = element {
+                let device = &devices[query_set.device_id.value];
+                unsafe {
+                    device.raw.destroy_query_pool(query_set.raw);
+                }
+            }
+        }
+
         for element in devices.map.drain(..) {
             if let Element::Occupied(device, _) = element {
                 device.dispose();
diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs
index 7a1201be2..9578a77b5 100644
--- a/wgpu-core/src/id.rs
+++ b/wgpu-core/src/id.rs
@@ -164,6 +164,7 @@ pub type RenderPassEncoderId = *mut crate::command::RenderPass;
 pub type ComputePassEncoderId = *mut crate::command::ComputePass;
 pub type RenderBundleEncoderId = *mut crate::command::RenderBundleEncoder;
 pub type RenderBundleId = Id<crate::command::RenderBundle>;
+pub type QuerySetId = Id<crate::resource::QuerySet<Dummy>>;
 // Swap chain
 pub type SwapChainId = Id<crate::swap_chain::SwapChain<Dummy>>;
 
diff --git a/wgpu-core/src/instance.rs b/wgpu-core/src/instance.rs
index 79a6983d1..0b2b34b29 100644
--- a/wgpu-core/src/instance.rs
+++ b/wgpu-core/src/instance.rs
@@ -120,7 +120,7 @@ impl crate::hub::Resource for Surface {
 pub struct Adapter<B: hal::Backend> {
     pub(crate) raw: hal::adapter::Adapter<B>,
     features: wgt::Features,
-    private_features: PrivateFeatures,
+    pub(crate) private_features: PrivateFeatures,
     limits: wgt::Limits,
     life_guard: LifeGuard,
 }
@@ -130,6 +130,7 @@ impl<B: GfxBackend> Adapter<B> {
         span!(_guard, INFO, "Adapter::new");
 
         let adapter_features = raw.physical_device.features();
+        let adapter_limits = raw.physical_device.limits();
 
         let mut features = wgt::Features::default()
             | wgt::Features::MAPPABLE_PRIMARY_BUFFERS
@@ -179,6 +180,14 @@ impl<B: GfxBackend> Adapter<B> {
             wgt::Features::NON_FILL_POLYGON_MODE,
             adapter_features.contains(hal::Features::NON_FILL_POLYGON_MODE),
         );
+        features.set(
+            wgt::Features::TIMESTAMP_QUERY,
+            adapter_limits.timestamp_compute_and_graphics,
+        );
+        features.set(
+            wgt::Features::PIPELINE_STATISTICS_QUERY,
+            adapter_features.contains(hal::Features::PIPELINE_STATISTICS_QUERY),
+        );
         #[cfg(not(target_os = "ios"))]
         //TODO: https://github.com/gfx-rs/gfx/issues/3346
         features.set(wgt::Features::ADDRESS_MODE_CLAMP_TO_BORDER, true);
@@ -195,10 +204,9 @@ impl<B: GfxBackend> Adapter<B> {
                 .format_properties(Some(hal::format::Format::D24UnormS8Uint))
                 .optimal_tiling
                 .contains(hal::format::ImageFeature::DEPTH_STENCIL_ATTACHMENT),
+            timestamp_period: adapter_limits.timestamp_period,
         };
 
-        let adapter_limits = raw.physical_device.limits();
-
         let default_limits = wgt::Limits::default();
 
         // All these casts to u32 are safe as the underlying vulkan types are u32s.
@@ -422,6 +430,11 @@ impl<B: GfxBackend> Adapter<B> {
             hal::Features::NON_FILL_POLYGON_MODE,
             desc.features.contains(wgt::Features::NON_FILL_POLYGON_MODE),
         );
+        enabled_features.set(
+            hal::Features::PIPELINE_STATISTICS_QUERY,
+            desc.features
+                .contains(wgt::Features::PIPELINE_STATISTICS_QUERY),
+        );
 
         let family = self
             .raw
@@ -429,6 +442,7 @@ impl<B: GfxBackend> Adapter<B> {
             .iter()
             .find(|family| family.queue_type().supports_graphics())
             .ok_or(RequestDeviceError::NoGraphicsQueue)?;
+
         let mut gpu =
             unsafe { phd.open(&[(family, &[1.0])], enabled_features) }.map_err(|err| {
                 use hal::device::CreationError::*;
@@ -855,6 +869,22 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
             .map_err(|_| InvalidAdapter)
     }
 
+    pub fn adapter_get_timestamp_period<B: GfxBackend>(
+        &self,
+        adapter_id: AdapterId,
+    ) -> Result<f32, InvalidAdapter> {
+        span!(_guard, INFO, "Adapter::get_timestamp_period");
+
+        let hub = B::hub(self);
+        let mut token = Token::root();
+        let (adapter_guard, _) = hub.adapters.read(&mut token);
+
+        adapter_guard
+            .get(adapter_id)
+            .map(|adapter| adapter.private_features.timestamp_period)
+            .map_err(|_| InvalidAdapter)
+    }
+
     pub fn adapter_drop<B: GfxBackend>(&self, adapter_id: AdapterId) {
         span!(_guard, INFO, "Adapter::drop");
 
diff --git a/wgpu-core/src/lib.rs b/wgpu-core/src/lib.rs
index fc48d7eb4..bd099a0fb 100644
--- a/wgpu-core/src/lib.rs
+++ b/wgpu-core/src/lib.rs
@@ -222,6 +222,7 @@ struct PrivateFeatures {
     anisotropic_filtering: bool,
     texture_d24: bool,
     texture_d24_s8: bool,
+    timestamp_period: f32,
 }
 
 #[macro_export]
diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs
index 412a29cdb..ebd996736 100644
--- a/wgpu-core/src/resource.rs
+++ b/wgpu-core/src/resource.rs
@@ -447,6 +447,42 @@ impl<B: hal::Backend> Borrow<()> for Sampler<B> {
         &DUMMY_SELECTOR
     }
 }
+#[derive(Clone, Debug, Error)]
+pub enum CreateQuerySetError {
+    #[error(transparent)]
+    Device(#[from] DeviceError),
+    #[error("QuerySets cannot be made with zero queries")]
+    ZeroCount,
+    #[error("{count} is too many queries for a single QuerySet. QuerySets cannot be made more than {maximum} queries.")]
+    TooManyQueries { count: u32, maximum: u32 },
+    #[error("Feature {0:?} must be enabled")]
+    MissingFeature(wgt::Features),
+}
+
+#[derive(Debug)]
+pub struct QuerySet<B: hal::Backend> {
+    pub(crate) raw: B::QueryPool,
+    pub(crate) device_id: Stored<DeviceId>,
+    pub(crate) life_guard: LifeGuard,
+    /// Amount of queries in the query set.
+    pub(crate) desc: wgt::QuerySetDescriptor,
+    /// Amount of numbers in each query (i.e. a pipeline statistics query for two attributes will have this number be two)
+    pub(crate) elements: u32,
+}
+
+impl<B: hal::Backend> Resource for QuerySet<B> {
+    const TYPE: &'static str = "QuerySet";
+
+    fn life_guard(&self) -> &LifeGuard {
+        &self.life_guard
+    }
+}
+
+impl<B: hal::Backend> Borrow<()> for QuerySet<B> {
+    fn borrow(&self) -> &() {
+        &DUMMY_SELECTOR
+    }
+}
 
 #[derive(Clone, Debug, Error)]
 pub enum DestroyError {
diff --git a/wgpu-core/src/track/mod.rs b/wgpu-core/src/track/mod.rs
index 2d2d76359..0a9f9a315 100644
--- a/wgpu-core/src/track/mod.rs
+++ b/wgpu-core/src/track/mod.rs
@@ -528,6 +528,7 @@ pub(crate) struct TrackerSet {
     pub compute_pipes: ResourceTracker<PhantomData<id::ComputePipelineId>>,
     pub render_pipes: ResourceTracker<PhantomData<id::RenderPipelineId>>,
     pub bundles: ResourceTracker<PhantomData<id::RenderBundleId>>,
+    pub query_sets: ResourceTracker<PhantomData<id::QuerySetId>>,
 }
 
 impl TrackerSet {
@@ -542,6 +543,7 @@ impl TrackerSet {
             compute_pipes: ResourceTracker::new(backend),
             render_pipes: ResourceTracker::new(backend),
             bundles: ResourceTracker::new(backend),
+            query_sets: ResourceTracker::new(backend),
         }
     }
 
@@ -555,6 +557,7 @@ impl TrackerSet {
         self.compute_pipes.clear();
         self.render_pipes.clear();
         self.bundles.clear();
+        self.query_sets.clear();
     }
 
     /// Try to optimize the tracking representation.
@@ -567,6 +570,7 @@ impl TrackerSet {
         self.compute_pipes.optimize();
         self.render_pipes.optimize();
         self.bundles.optimize();
+        self.query_sets.optimize();
     }
 
     /// Merge all the trackers of another instance by extending
@@ -594,6 +598,7 @@ impl TrackerSet {
             .unwrap();
         self.render_pipes.merge_extend(&other.render_pipes).unwrap();
         self.bundles.merge_extend(&other.bundles).unwrap();
+        self.query_sets.merge_extend(&other.query_sets).unwrap();
         Ok(())
     }
 
diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs
index 1a7a49762..732030383 100644
--- a/wgpu-types/src/lib.rs
+++ b/wgpu-types/src/lib.rs
@@ -35,6 +35,10 @@ pub const COPY_BUFFER_ALIGNMENT: BufferAddress = 4;
 pub const VERTEX_STRIDE_ALIGNMENT: BufferAddress = 4;
 /// Alignment all push constants need
 pub const PUSH_CONSTANT_ALIGNMENT: u32 = 4;
+/// Maximum queries in a query set
+pub const QUERY_SET_MAX_QUERIES: u32 = 8192;
+/// Size of a single piece of query data.
+pub const QUERY_SIZE: u32 = 8;
 
 /// Backends supported by wgpu.
 #[repr(u8)]
@@ -170,6 +174,39 @@ bitflags::bitflags! {
         ///
         /// This is a web and native feature.
         const TEXTURE_COMPRESSION_BC = 0x0000_0000_0000_0002;
+        /// Enables use of Timestamp Queries. These queries tell the current gpu timestamp when
+        /// all work before the query is finished. Call [`CommandEncoder::write_timestamp`],
+        /// [`RenderPassEncoder::write_timestamp`], or [`ComputePassEncoder::write_timestamp`] to
+        /// write out a timestamp.
+        ///
+        /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer,
+        /// then the result must be multiplied by the timestamp period [`Device::get_timestamp_period`]
+        /// to get the timestamp in nanoseconds. Multiple timestamps can then be diffed to get the
+        /// time for operations between them to finish.
+        ///
+        /// Due to gfx-hal limitations, this is only supported on vulkan for now.
+        ///
+        /// Supported Platforms:
+        /// - Vulkan (works)
+        /// - DX12 (future)
+        ///
+        /// This is a web and native feature.
+        const TIMESTAMP_QUERY = 0x0000_0000_0000_0004;
+        /// Enables use of Pipeline Statistics Queries. These queries tell the count of various operations
+        /// performed between the start and stop call. Call [`RenderPassEncoder::begin_pipeline_statistics_query`] to start
+        /// a query, then call [`RenderPassEncoder::end_pipeline_statistics_query`] to stop one.
+        ///
+        /// They must be resolved using [`CommandEncoder::resolve_query_sets`] into a buffer.
+        /// The rules on how these resolve into buffers are detailed in the documentation for [`PipelineStatisticsTypes`].
+        ///
+        /// Due to gfx-hal limitations, this is only supported on vulkan for now.
+        ///
+        /// Supported Platforms:
+        /// - Vulkan (works)
+        /// - DX12 (future)
+        ///
+        /// This is a web and native feature.
+        const PIPELINE_STATISTICS_QUERY = 0x0000_0000_0000_0008;
         /// Webgpu only allows the MAP_READ and MAP_WRITE buffer usage to be matched with
         /// COPY_DST and COPY_SRC respectively. This removes this requirement.
         ///
@@ -401,7 +438,7 @@ pub struct Limits {
     /// - DX12: 256 bytes
     /// - Metal: 4096 bytes
     /// - DX11 & OpenGL don't natively support push constants, and are emulated with uniforms,
-    ///   so this number is less useful.
+    ///   so this number is less useful but likely 256.
     pub max_push_constant_size: u32,
 }
 
@@ -2512,3 +2549,75 @@ pub enum SamplerBorderColor {
     OpaqueBlack,
     OpaqueWhite,
 }
+
+/// Describes how to create a QuerySet.
+#[derive(Clone, Debug)]
+#[cfg_attr(feature = "trace", derive(serde::Serialize))]
+#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
+pub struct QuerySetDescriptor {
+    /// Kind of query that this query set should contain.
+    pub ty: QueryType,
+    /// Total count of queries the set contains. Must not be zero.
+    /// Must not be greater than [`QUERY_SET_MAX_QUERIES`].
+    pub count: u32,
+}
+
+/// Type of query contained in a QuerySet.
+#[derive(Copy, Clone, Debug)]
+#[cfg_attr(feature = "trace", derive(serde::Serialize))]
+#[cfg_attr(feature = "replay", derive(serde::Deserialize))]
+pub enum QueryType {
+    /// Query returns up to 5 64-bit numbers based on the given flags.
+    ///
+    /// See [`PipelineStatisticsTypes`]'s documentation for more information
+    /// on how they get resolved.
+    ///
+    /// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled to use this query type.
+    PipelineStatistics(PipelineStatisticsTypes),
+    /// Query returns a 64-bit number indicating the GPU-timestamp
+    /// where all previous commands have finished executing.
+    ///
+    /// Must be multiplied by [`Device::get_timestamp_period`] to get
+    /// the value in nanoseconds. Absolute values have no meaning,
+    /// but timestamps can be subtracted to get the time it takes
+    /// for a string of operations to complete.
+    ///
+    /// [`Features::TIMESTAMP_QUERY`] must be enabled to use this query type.
+    Timestamp,
+}
+
+bitflags::bitflags! {
+    /// Flags for which pipeline data should be recorded.
+    ///
+    /// The amount of values written when resolved depends
+    /// on the amount of flags. If 3 flags are enabled, 3
+    /// 64-bit values will be writen per-query.
+    ///
+    /// The order they are written is the order they are declared
+    /// in this bitflags. If you enabled `CLIPPER_PRIMITIVES_OUT`
+    /// and `COMPUTE_SHADER_INVOCATIONS`, it would write 16 bytes,
+    /// the first 8 bytes being the primative out value, the last 8
+    /// bytes being the compute shader invocation count.
+    #[repr(transparent)]
+    #[cfg_attr(feature = "trace", derive(Serialize))]
+    #[cfg_attr(feature = "replay", derive(Deserialize))]
+    pub struct PipelineStatisticsTypes : u8 {
+        /// Amount of times the vertex shader is ran. Accounts for
+        /// the vertex cache when doing indexed rendering.
+        const VERTEX_SHADER_INVOCATIONS = 0x01;
+        /// Amount of times the clipper is invoked. This
+        /// is also the amount of triangles output by the vertex shader.
+        const CLIPPER_INVOCATIONS = 0x02;
+        /// Amount of primitives that are not culled by the clipper.
+        /// This is the amount of triangles that are actually on screen
+        /// and will be rasterized and rendered.
+        const CLIPPER_PRIMITIVES_OUT = 0x04;
+        /// Amount of times the fragment shader is ran. Accounts for
+        /// fragment shaders running in 2x2 blocks in order to get
+        /// derivatives.
+        const FRAGMENT_SHADER_INVOCATIONS = 0x08;
+        /// Amount of times a compute shader is invoked. This will
+        /// be equivilent to the dispatch count times the workgroup size.
+        const COMPUTE_SHADER_INVOCATIONS = 0x10;
+    }
+}