Remove lifetime dependency of ComputePass to its parent command encoder (#5620)

* lift encoder->computepass lifetime constraint and add now failing test * compute passes now take an arc to their parent command encoder, thus removing compile time dependency to it * Command encoder goes now into locked state while compute pass is open * changelog entry * share most of the code between get_encoder and lock_encoder
2024-11-21 22:33:49 +00:00 · 2024-05-30 00:43:24 +02:00 · 2024-05-30 00:43:24 +02:00 · 588950110a
commit 588950110a
parent 071fb14e15
12 changed files with 490 additions and 88 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -47,7 +47,13 @@ TODO(wumpf): This is still work in progress. Should write a bit more about it. A

 `wgpu::ComputePass` recording methods (e.g. `wgpu::ComputePass:set_render_pipeline`) no longer impose a lifetime constraint passed in resources.

-By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://github.com/gfx-rs/wgpu/pull/5575).
+Furthermore, `wgpu::ComputePass` no longer has a life time dependency on its parent `wgpu::CommandEncoder`.
+⚠️ As long as a `wgpu::ComputePass` is pending for a given `wgpu::CommandEncoder`, creation of a compute or render pass is an error and invalidates the `wgpu::CommandEncoder`.
+Previously, this was statically enforced by a lifetime constraint.
+TODO(wumpf): There was some discussion on whether to make this life time constraint opt-in or opt-out (entirely on `wgpu` side, no changes to `wgpu-core`).
+Lifting this lifetime dependencies is very useful for library authors, but opens up an easy way for incorrect use.
+
+By @wumpf in [#5569](https://github.com/gfx-rs/wgpu/pull/5569), [#5575](https://github.com/gfx-rs/wgpu/pull/5575), [#5620](https://github.com/gfx-rs/wgpu/pull/5620).

 #### Querying shader compilation errors

--- a/deno_webgpu/command_encoder.rs
+++ b/deno_webgpu/command_encoder.rs
@ -261,15 +261,14 @@ pub fn op_webgpu_command_encoder_begin_compute_pass(
        timestamp_writes: timestamp_writes.as_ref(),
    };

-    let compute_pass = gfx_select!(command_encoder => instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor));
-
+    let (compute_pass, error) = gfx_select!(command_encoder => instance.command_encoder_create_compute_pass_dyn(*command_encoder, &descriptor));
    let rid = state
        .resource_table
        .add(super::compute_pass::WebGpuComputePass(RefCell::new(
            compute_pass,
        )));

-    Ok(WebGpuResult::rid(rid))
+    Ok(WebGpuResult::rid_err(rid, error))
 }

 #[op2]
--- a/tests/tests/compute_pass_resource_ownership.rs
+++ b/tests/tests/compute_pass_resource_ownership.rs
@ -1,9 +1,6 @@
 //! Tests that compute passes take ownership of resources that are associated with.
 //! I.e. once a resource is passed in to a compute pass, it can be dropped.
 //!
-//! TODO: Test doesn't check on timestamp writes & pipeline statistics queries yet.
-//!       (Not important as long as they are lifetime constrained to the command encoder,
-//!       but once we lift this constraint, we should add tests for this as well!)
 //! TODO: Also should test resource ownership for:
 //!       * write_timestamp
 //!       * begin_pipeline_statistics_query
@ -11,7 +8,7 @@
 use std::num::NonZeroU64;

 use wgpu::util::DeviceExt as _;
-use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+use wgpu_test::{gpu_test, valid, GpuTestConfiguration, TestParameters, TestingContext};

 const SHADER_SRC: &str = "
@group(0) @binding(0)
@ -75,6 +72,50 @@ async fn compute_pass_resource_ownership(ctx: TestingContext) {
    assert_eq!(floats, [2.0, 4.0, 6.0, 8.0]);
 }

+#[gpu_test]
+static COMPUTE_PASS_KEEP_ENCODER_ALIVE: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(TestParameters::default().test_features_limits())
+    .run_async(compute_pass_keep_encoder_alive);
+
+async fn compute_pass_keep_encoder_alive(ctx: TestingContext) {
+    let ResourceSetup {
+        gpu_buffer: _,
+        cpu_buffer: _,
+        buffer_size: _,
+        indirect_buffer,
+        bind_group,
+        pipeline,
+    } = resource_setup(&ctx);
+
+    let mut encoder = ctx
+        .device
+        .create_command_encoder(&wgpu::CommandEncoderDescriptor {
+            label: Some("encoder"),
+        });
+
+    let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
+        label: Some("compute_pass"),
+        timestamp_writes: None,
+    });
+
+    // Now drop the encoder - it is kept alive by the compute pass.
+    drop(encoder);
+    ctx.async_poll(wgpu::Maintain::wait())
+        .await
+        .panic_on_timeout();
+
+    // Record some draw commands.
+    cpass.set_pipeline(&pipeline);
+    cpass.set_bind_group(0, &bind_group, &[]);
+    cpass.dispatch_workgroups_indirect(&indirect_buffer, 0);
+
+    // Dropping the pass will still execute the pass, even though there's no way to submit it.
+    // Ideally, this would log an error, but the encoder is not dropped until the compute pass is dropped,
+    // making this a valid operation.
+    // (If instead the encoder was explicitly destroyed or finished, this would be an error.)
+    valid(&ctx.device, || drop(cpass));
+}
+
 // Setup ------------------------------------------------------------

 struct ResourceSetup {
--- a/tests/tests/encoder.rs
+++ b/tests/tests/encoder.rs
@ -1,4 +1,8 @@
-use wgpu_test::{fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters};
+use wgpu::util::DeviceExt;
+use wgpu::CommandEncoder;
+use wgpu_test::{
+    fail, gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext,
+};

 #[gpu_test]
 static DROP_ENCODER: GpuTestConfiguration = GpuTestConfiguration::new().run_sync(|ctx| {
@ -72,3 +76,227 @@ static DROP_ENCODER_AFTER_ERROR: GpuTestConfiguration = GpuTestConfiguration::ne
        // The encoder is still open!
        drop(encoder);
    });
+
+// TODO: This should also apply to render passes once the lifetime bound is lifted.
+#[gpu_test]
+static ENCODER_OPERATIONS_FAIL_WHILE_COMPUTE_PASS_ALIVE: GpuTestConfiguration =
+    GpuTestConfiguration::new()
+        .parameters(TestParameters::default().features(
+            wgpu::Features::CLEAR_TEXTURE
+                | wgpu::Features::TIMESTAMP_QUERY
+                | wgpu::Features::TIMESTAMP_QUERY_INSIDE_ENCODERS,
+        ))
+        .run_sync(encoder_operations_fail_while_compute_pass_alive);
+
+fn encoder_operations_fail_while_compute_pass_alive(ctx: TestingContext) {
+    let buffer_source = ctx
+        .device
+        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: None,
+            contents: &[0u8; 4],
+            usage: wgpu::BufferUsages::COPY_SRC,
+        });
+    let buffer_dest = ctx
+        .device
+        .create_buffer_init(&wgpu::util::BufferInitDescriptor {
+            label: None,
+            contents: &[0u8; 4],
+            usage: wgpu::BufferUsages::COPY_DST,
+        });
+
+    let texture_desc = wgpu::TextureDescriptor {
+        label: None,
+        size: wgpu::Extent3d {
+            width: 1,
+            height: 1,
+            depth_or_array_layers: 1,
+        },
+        mip_level_count: 1,
+        sample_count: 1,
+        dimension: wgpu::TextureDimension::D2,
+        format: wgpu::TextureFormat::Rgba8Unorm,
+        usage: wgpu::TextureUsages::COPY_DST,
+        view_formats: &[],
+    };
+    let texture_dst = ctx.device.create_texture(&texture_desc);
+    let texture_src = ctx.device.create_texture(&wgpu::TextureDescriptor {
+        usage: wgpu::TextureUsages::COPY_SRC,
+        ..texture_desc
+    });
+    let query_set = ctx.device.create_query_set(&wgpu::QuerySetDescriptor {
+        count: 1,
+        ty: wgpu::QueryType::Timestamp,
+        label: None,
+    });
+
+    #[allow(clippy::type_complexity)]
+    let recording_ops: Vec<(_, Box<dyn Fn(&mut CommandEncoder)>)> = vec![
+        (
+            "begin_compute_pass",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
+            }),
+        ),
+        (
+            "begin_render_pass",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.begin_render_pass(&wgpu::RenderPassDescriptor::default());
+            }),
+        ),
+        (
+            "copy_buffer_to_buffer",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.copy_buffer_to_buffer(&buffer_source, 0, &buffer_dest, 0, 4);
+            }),
+        ),
+        (
+            "copy_buffer_to_texture",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.copy_buffer_to_texture(
+                    wgpu::ImageCopyBuffer {
+                        buffer: &buffer_source,
+                        layout: wgpu::ImageDataLayout {
+                            offset: 0,
+                            bytes_per_row: Some(4),
+                            rows_per_image: None,
+                        },
+                    },
+                    texture_dst.as_image_copy(),
+                    texture_dst.size(),
+                );
+            }),
+        ),
+        (
+            "copy_texture_to_buffer",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.copy_texture_to_buffer(
+                    wgpu::ImageCopyTexture {
+                        texture: &texture_src,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    wgpu::ImageCopyBuffer {
+                        buffer: &buffer_dest,
+                        layout: wgpu::ImageDataLayout {
+                            offset: 0,
+                            bytes_per_row: Some(4),
+                            rows_per_image: None,
+                        },
+                    },
+                    texture_dst.size(),
+                );
+            }),
+        ),
+        (
+            "copy_texture_to_texture",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.copy_texture_to_texture(
+                    wgpu::ImageCopyTexture {
+                        texture: &texture_src,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    wgpu::ImageCopyTexture {
+                        texture: &texture_dst,
+                        mip_level: 0,
+                        origin: wgpu::Origin3d::ZERO,
+                        aspect: wgpu::TextureAspect::All,
+                    },
+                    texture_dst.size(),
+                );
+            }),
+        ),
+        (
+            "clear_texture",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.clear_texture(&texture_dst, &wgpu::ImageSubresourceRange::default());
+            }),
+        ),
+        (
+            "clear_buffer",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.clear_buffer(&buffer_dest, 0, None);
+            }),
+        ),
+        (
+            "insert_debug_marker",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.insert_debug_marker("marker");
+            }),
+        ),
+        (
+            "push_debug_group",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.push_debug_group("marker");
+            }),
+        ),
+        (
+            "pop_debug_group",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.pop_debug_group();
+            }),
+        ),
+        (
+            "resolve_query_set",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.resolve_query_set(&query_set, 0..1, &buffer_dest, 0);
+            }),
+        ),
+        (
+            "write_timestamp",
+            Box::new(|encoder: &mut wgpu::CommandEncoder| {
+                encoder.write_timestamp(&query_set, 0);
+            }),
+        ),
+    ];
+
+    for (op_name, op) in recording_ops.iter() {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
+
+        ctx.device.push_error_scope(wgpu::ErrorFilter::Validation);
+
+        log::info!("Testing operation {} on a locked command encoder", op_name);
+        fail(
+            &ctx.device,
+            || op(&mut encoder),
+            Some("Command encoder is locked"),
+        );
+
+        // Drop the pass - this also fails now since the encoder is invalid:
+        fail(
+            &ctx.device,
+            || drop(pass),
+            Some("Command encoder is invalid"),
+        );
+        // Also, it's not possible to create a new pass on the encoder:
+        fail(
+            &ctx.device,
+            || encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default()),
+            Some("Command encoder is invalid"),
+        );
+    }
+
+    // Test encoder finishing separately since it consumes the encoder and doesn't fit above pattern.
+    {
+        let mut encoder = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+        let pass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor::default());
+        fail(
+            &ctx.device,
+            || encoder.finish(),
+            Some("Command encoder is locked"),
+        );
+        fail(
+            &ctx.device,
+            || drop(pass),
+            Some("Command encoder is invalid"),
+        );
+    }
+}
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@ -11,7 +11,7 @@ mod buffer;
 mod buffer_copy;
 mod buffer_usages;
 mod clear_texture;
-mod compute_pass_resource_ownership;
+mod compute_pass_ownership;
 mod create_surface_error;
 mod device;
 mod encoder;
--- a/wgpu-core/src/command/clear.rs
+++ b/wgpu-core/src/command/clear.rs
@ -26,8 +26,6 @@ use wgt::{math::align_to, BufferAddress, BufferUsages, ImageSubresourceRange, Te
 pub enum ClearError {
    #[error("To use clear_texture the CLEAR_TEXTURE feature needs to be enabled")]
    MissingClearTextureFeature,
-    #[error("Command encoder {0:?} is invalid")]
-    InvalidCommandEncoder(CommandEncoderId),
    #[error("Device {0:?} is invalid")]
    InvalidDevice(DeviceId),
    #[error("Buffer {0:?} is invalid or destroyed")]
@ -74,6 +72,8 @@ whereas subesource range specified start {subresource_base_array_layer} and coun
    },
    #[error(transparent)]
    Device(#[from] DeviceError),
+    #[error(transparent)]
+    CommandEncoderError(#[from] super::CommandEncoderError),
 }

 impl Global {
@ -89,8 +89,7 @@ impl Global {

        let hub = A::hub(self);

-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)
-            .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?;
+        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
        let mut cmd_buf_data = cmd_buf.data.lock();
        let cmd_buf_data = cmd_buf_data.as_mut().unwrap();

@ -183,8 +182,7 @@ impl Global {

        let hub = A::hub(self);

-        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)
-            .map_err(|_| ClearError::InvalidCommandEncoder(command_encoder_id))?;
+        let cmd_buf = CommandBuffer::get_encoder(hub, command_encoder_id)?;
        let mut cmd_buf_data = cmd_buf.data.lock();
        let cmd_buf_data = cmd_buf_data.as_mut().unwrap();

--- a/wgpu-core/src/command/compute.rs
+++ b/wgpu-core/src/command/compute.rs
@ -13,7 +13,7 @@ use crate::{
    global::Global,
    hal_api::HalApi,
    hal_label,
-    id::{self, DeviceId},
+    id::{self},
    init_tracker::MemoryInitKind,
    resource::{self, Resource},
    snatch::SnatchGuard,
@ -34,14 +34,20 @@ use wgt::{BufferAddress, DynamicOffset};
 use std::sync::Arc;
 use std::{fmt, mem, str};

+use super::DynComputePass;
+
 pub struct ComputePass<A: HalApi> {
    /// All pass data & records is stored here.
    ///
-    /// If this is `None`, the pass has been ended and can no longer be used.
+    /// If this is `None`, the pass is in the 'ended' state and can no longer be used.
    /// Any attempt to record more commands will result in a validation error.
    base: Option<BasePass<ArcComputeCommand<A>>>,

-    parent_id: id::CommandEncoderId,
+    /// Parent command buffer that this pass records commands into.
+    ///
+    /// If it is none, this pass is invalid and any operation on it will return an error.
+    parent: Option<Arc<CommandBuffer<A>>>,
+
    timestamp_writes: Option<ComputePassTimestampWrites>,

    // Resource binding dedupe state.
@ -50,10 +56,11 @@ pub struct ComputePass<A: HalApi> {
 }

 impl<A: HalApi> ComputePass<A> {
-    fn new(parent_id: id::CommandEncoderId, desc: &ComputePassDescriptor) -> Self {
+    /// If the parent command buffer is invalid, the returned pass will be invalid.
+    fn new(parent: Option<Arc<CommandBuffer<A>>>, desc: &ComputePassDescriptor) -> Self {
        Self {
-            base: Some(BasePass::<ArcComputeCommand<A>>::new(&desc.label)),
-            parent_id,
+            base: Some(BasePass::new(&desc.label)),
+            parent,
            timestamp_writes: desc.timestamp_writes.cloned(),

            current_bind_groups: BindGroupStateChange::new(),
@ -62,8 +69,8 @@ impl<A: HalApi> ComputePass<A> {
    }

    #[inline]
-    pub fn parent_id(&self) -> id::CommandEncoderId {
-        self.parent_id
+    pub fn parent_id(&self) -> Option<id::CommandBufferId> {
+        self.parent.as_ref().map(|cmd_buf| cmd_buf.as_info().id())
    }

    #[inline]
@ -84,7 +91,7 @@ impl<A: HalApi> ComputePass<A> {

 impl<A: HalApi> fmt::Debug for ComputePass<A> {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
-        write!(f, "ComputePass {{ encoder_id: {:?} }}", self.parent_id)
+        write!(f, "ComputePass {{ parent: {:?} }}", self.parent_id())
    }
 }

@ -129,10 +136,12 @@ pub enum ComputePassErrorInner {
    Device(#[from] DeviceError),
    #[error(transparent)]
    Encoder(#[from] CommandEncoderError),
+    #[error("Parent encoder is invalid")]
+    InvalidParentEncoder,
    #[error("Bind group at index {0:?} is invalid")]
    InvalidBindGroup(u32),
    #[error("Device {0:?} is invalid")]
-    InvalidDevice(DeviceId),
+    InvalidDevice(id::DeviceId),
    #[error("Bind group index {index} is greater than the device's requested `max_bind_group` limit {max}")]
    BindGroupIndexOutOfRange { index: u32, max: u32 },
    #[error("Compute pipeline {0:?} is invalid")]
@ -292,31 +301,55 @@ impl<'a, A: HalApi> State<'a, A> {
 // Running the compute pass.

 impl Global {
+    /// Creates a compute pass.
+    ///
+    /// If creation fails, an invalid pass is returned.
+    /// Any operation on an invalid pass will return an error.
+    ///
+    /// If successful, puts the encoder into the [`CommandEncoderStatus::Locked`] state.
    pub fn command_encoder_create_compute_pass<A: HalApi>(
        &self,
-        parent_id: id::CommandEncoderId,
+        encoder_id: id::CommandEncoderId,
        desc: &ComputePassDescriptor,
-    ) -> ComputePass<A> {
-        ComputePass::new(parent_id, desc)
+    ) -> (ComputePass<A>, Option<CommandEncoderError>) {
+        let hub = A::hub(self);
+
+        match CommandBuffer::lock_encoder(hub, encoder_id) {
+            Ok(cmd_buf) => (ComputePass::new(Some(cmd_buf), desc), None),
+            Err(err) => (ComputePass::new(None, desc), Some(err)),
+        }
    }

+    /// Creates a type erased compute pass.
+    ///
+    /// If creation fails, an invalid pass is returned.
+    /// Any operation on an invalid pass will return an error.
    pub fn command_encoder_create_compute_pass_dyn<A: HalApi>(
        &self,
-        parent_id: id::CommandEncoderId,
+        encoder_id: id::CommandEncoderId,
        desc: &ComputePassDescriptor,
-    ) -> Box<dyn super::DynComputePass> {
-        Box::new(ComputePass::<A>::new(parent_id, desc))
+    ) -> (Box<dyn DynComputePass>, Option<CommandEncoderError>) {
+        let (pass, err) = self.command_encoder_create_compute_pass::<A>(encoder_id, desc);
+        (Box::new(pass), err)
    }

    pub fn compute_pass_end<A: HalApi>(
        &self,
        pass: &mut ComputePass<A>,
    ) -> Result<(), ComputePassError> {
-        let base = pass.base.take().ok_or(ComputePassError {
-            scope: PassErrorScope::Pass(pass.parent_id),
-            inner: ComputePassErrorInner::PassEnded,
-        })?;
-        self.compute_pass_end_impl(pass.parent_id, base, pass.timestamp_writes.as_ref())
+        let scope = PassErrorScope::Pass(pass.parent_id());
+        let Some(parent) = pass.parent.as_ref() else {
+            return Err(ComputePassErrorInner::InvalidParentEncoder).map_pass_err(scope);
+        };
+
+        parent.unlock_encoder().map_pass_err(scope)?;
+
+        let base = pass
+            .base
+            .take()
+            .ok_or(ComputePassErrorInner::PassEnded)
+            .map_pass_err(scope)?;
+        self.compute_pass_end_impl(parent, base, pass.timestamp_writes.as_ref())
    }

    #[doc(hidden)]
@ -326,10 +359,14 @@ impl Global {
        base: BasePass<ComputeCommand>,
        timestamp_writes: Option<&ComputePassTimestampWrites>,
    ) -> Result<(), ComputePassError> {
+        let hub = A::hub(self);
+
+        let cmd_buf = CommandBuffer::get_encoder(hub, encoder_id)
+            .map_pass_err(PassErrorScope::PassEncoder(encoder_id))?;
        let commands = ComputeCommand::resolve_compute_command_ids(A::hub(self), &base.commands)?;

        self.compute_pass_end_impl::<A>(
-            encoder_id,
+            &cmd_buf,
            BasePass {
                label: base.label,
                commands,
@ -343,17 +380,15 @@ impl Global {

    fn compute_pass_end_impl<A: HalApi>(
        &self,
-        encoder_id: id::CommandEncoderId,
+        cmd_buf: &CommandBuffer<A>,
        base: BasePass<ArcComputeCommand<A>>,
        timestamp_writes: Option<&ComputePassTimestampWrites>,
    ) -> Result<(), ComputePassError> {
        profiling::scope!("CommandEncoder::run_compute_pass");
-        let pass_scope = PassErrorScope::Pass(encoder_id);
+        let pass_scope = PassErrorScope::Pass(Some(cmd_buf.as_info().id()));

        let hub = A::hub(self);

-        let cmd_buf: Arc<CommandBuffer<A>> =
-            CommandBuffer::get_encoder(hub, encoder_id).map_pass_err(pass_scope)?;
        let device = &cmd_buf.device;
        if !device.is_valid() {
            return Err(ComputePassErrorInner::InvalidDevice(
--- a/wgpu-core/src/command/mod.rs
+++ b/wgpu-core/src/command/mod.rs
@ -25,7 +25,6 @@ use self::memory_init::CommandBufferTextureMemoryActions;
 use crate::device::{Device, DeviceError};
 use crate::error::{ErrorFormatter, PrettyError};
 use crate::hub::Hub;
-use crate::id::CommandBufferId;
 use crate::lock::{rank, Mutex};
 use crate::snatch::SnatchGuard;

@ -51,10 +50,23 @@ pub(crate) enum CommandEncoderStatus {
    /// [`compute_pass_end`] require the encoder to be in this
    /// state.
    ///
+    /// This corresponds to WebGPU's "open" state.
+    /// See <https://www.w3.org/TR/webgpu/#encoder-state-open>
+    ///
    /// [`command_encoder_clear_buffer`]: Global::command_encoder_clear_buffer
    /// [`compute_pass_end`]: Global::compute_pass_end
    Recording,

+    /// Locked by a render or compute pass.
+    ///
+    /// This state is entered when a render/compute pass is created,
+    /// and exited when the pass is ended.
+    ///
+    /// As long as the command encoder is locked, any command building operation on it will fail
+    /// and put the encoder into the [`CommandEncoderStatus::Error`] state.
+    /// See <https://www.w3.org/TR/webgpu/#encoder-state-locked>
+    Locked,
+
    /// Command recording is complete, and the buffer is ready for submission.
    ///
    /// [`Global::command_encoder_finish`] transitions a
@ -410,6 +422,38 @@ impl<A: HalApi> CommandBuffer<A> {
 }

 impl<A: HalApi> CommandBuffer<A> {
+    fn get_encoder_impl(
+        hub: &Hub<A>,
+        id: id::CommandEncoderId,
+        lock_on_acquire: bool,
+    ) -> Result<Arc<Self>, CommandEncoderError> {
+        let storage = hub.command_buffers.read();
+        match storage.get(id.into_command_buffer_id()) {
+            Ok(cmd_buf) => {
+                let mut cmd_buf_data = cmd_buf.data.lock();
+                let cmd_buf_data = cmd_buf_data.as_mut().unwrap();
+                match cmd_buf_data.status {
+                    CommandEncoderStatus::Recording => {
+                        if lock_on_acquire {
+                            cmd_buf_data.status = CommandEncoderStatus::Locked;
+                        }
+                        Ok(cmd_buf.clone())
+                    }
+                    CommandEncoderStatus::Locked => {
+                        // Any operation on a locked encoder is required to put it into the invalid/error state.
+                        // See https://www.w3.org/TR/webgpu/#encoder-state-locked
+                        cmd_buf_data.encoder.discard();
+                        cmd_buf_data.status = CommandEncoderStatus::Error;
+                        Err(CommandEncoderError::Locked)
+                    }
+                    CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording),
+                    CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid),
+                }
+            }
+            Err(_) => Err(CommandEncoderError::Invalid),
+        }
+    }
+
    /// Return the [`CommandBuffer`] for `id`, for recording new commands.
    ///
    /// In `wgpu_core`, the [`CommandBuffer`] type serves both as encoder and
@ -420,14 +464,37 @@ impl<A: HalApi> CommandBuffer<A> {
        hub: &Hub<A>,
        id: id::CommandEncoderId,
    ) -> Result<Arc<Self>, CommandEncoderError> {
-        let storage = hub.command_buffers.read();
-        match storage.get(id.into_command_buffer_id()) {
-            Ok(cmd_buf) => match cmd_buf.data.lock().as_ref().unwrap().status {
-                CommandEncoderStatus::Recording => Ok(cmd_buf.clone()),
-                CommandEncoderStatus::Finished => Err(CommandEncoderError::NotRecording),
-                CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid),
-            },
-            Err(_) => Err(CommandEncoderError::Invalid),
+        let lock_on_acquire = false;
+        Self::get_encoder_impl(hub, id, lock_on_acquire)
+    }
+
+    /// Return the [`CommandBuffer`] for `id` and if successful puts it into the [`CommandEncoderStatus::Locked`] state.
+    ///
+    /// See [`CommandBuffer::get_encoder`].
+    /// Call [`CommandBuffer::unlock_encoder`] to put the [`CommandBuffer`] back into the [`CommandEncoderStatus::Recording`] state.
+    fn lock_encoder(
+        hub: &Hub<A>,
+        id: id::CommandEncoderId,
+    ) -> Result<Arc<Self>, CommandEncoderError> {
+        let lock_on_acquire = true;
+        Self::get_encoder_impl(hub, id, lock_on_acquire)
+    }
+
+    /// Unlocks the [`CommandBuffer`] for `id` and puts it back into the [`CommandEncoderStatus::Recording`] state.
+    ///
+    /// This function is the counterpart to [`CommandBuffer::lock_encoder`].
+    /// It is only valid to call this function if the encoder is in the [`CommandEncoderStatus::Locked`] state.
+    fn unlock_encoder(&self) -> Result<(), CommandEncoderError> {
+        let mut data_lock = self.data.lock();
+        let status = &mut data_lock.as_mut().unwrap().status;
+        match *status {
+            CommandEncoderStatus::Recording => Err(CommandEncoderError::Invalid),
+            CommandEncoderStatus::Locked => {
+                *status = CommandEncoderStatus::Recording;
+                Ok(())
+            }
+            CommandEncoderStatus::Finished => Err(CommandEncoderError::Invalid),
+            CommandEncoderStatus::Error => Err(CommandEncoderError::Invalid),
        }
    }

@ -564,6 +631,8 @@ pub enum CommandEncoderError {
    NotRecording,
    #[error(transparent)]
    Device(#[from] DeviceError),
+    #[error("Command encoder is locked by a previously created render/compute pass. Before recording any new commands, the pass must be ended.")]
+    Locked,
 }

 impl Global {
@ -571,7 +640,7 @@ impl Global {
        &self,
        encoder_id: id::CommandEncoderId,
        _desc: &wgt::CommandBufferDescriptor<Label>,
-    ) -> (CommandBufferId, Option<CommandEncoderError>) {
+    ) -> (id::CommandBufferId, Option<CommandEncoderError>) {
        profiling::scope!("CommandEncoder::finish");

        let hub = A::hub(self);
@ -592,6 +661,11 @@ impl Global {
                            None
                        }
                    }
+                    CommandEncoderStatus::Locked => {
+                        cmd_buf_data.encoder.discard();
+                        cmd_buf_data.status = CommandEncoderStatus::Error;
+                        Some(CommandEncoderError::Locked)
+                    }
                    CommandEncoderStatus::Finished => Some(CommandEncoderError::NotRecording),
                    CommandEncoderStatus::Error => {
                        cmd_buf_data.encoder.discard();
@ -805,7 +879,12 @@ pub enum PassErrorScope {
    #[error("In a bundle parameter")]
    Bundle,
    #[error("In a pass parameter")]
-    Pass(id::CommandEncoderId),
+    // TODO: To be removed in favor of `Pass`.
+    // ComputePass is already operating on command buffer instead,
+    // same should apply to RenderPass in the future.
+    PassEncoder(id::CommandEncoderId),
+    #[error("In a pass parameter")]
+    Pass(Option<id::CommandBufferId>),
    #[error("In a set_bind_group command")]
    SetBindGroup(id::BindGroupId),
    #[error("In a set_pipeline command")]
@ -859,9 +938,12 @@ impl PrettyError for PassErrorScope {
    fn fmt_pretty(&self, fmt: &mut ErrorFormatter) {
        // This error is not in the error chain, only notes are needed
        match *self {
-            Self::Pass(id) => {
+            Self::PassEncoder(id) => {
                fmt.command_buffer_label(&id.into_command_buffer_id());
            }
+            Self::Pass(Some(id)) => {
+                fmt.command_buffer_label(&id);
+            }
            Self::SetBindGroup(id) => {
                fmt.bind_group_label(&id);
            }
--- a/wgpu-core/src/command/render.rs
+++ b/wgpu-core/src/command/render.rs
@ -1341,7 +1341,7 @@ impl Global {
            .contains(wgt::InstanceFlags::DISCARD_HAL_LABELS);
        let label = hal_label(base.label, self.instance.flags);

-        let pass_scope = PassErrorScope::Pass(encoder_id);
+        let pass_scope = PassErrorScope::PassEncoder(encoder_id);

        let hub = A::hub(self);

--- a/wgpu-core/src/registry.rs
+++ b/wgpu-core/src/registry.rs
@ -176,8 +176,14 @@ impl<T: Resource> Registry<T> {
        let guard = self.storage.read();

        let type_name = guard.kind();
-        match guard.get(id) {
-            Ok(res) => {
+
+        // Using `get` over `try_get` is fine for the most part.
+        // However, there's corner cases where it can happen that a resource still holds an Arc
+        // to another resource that was already dropped explicitly from the registry.
+        // That resource is now in an invalid state, likely causing an error that lead
+        // us here, trying to print its label but failing because the id is now vacant.
+        match guard.try_get(id) {
+            Ok(Some(res)) => {
                let label = res.label();
                if label.is_empty() {
                    format!("<{}-{:?}>", type_name, id.unzip())
@ -185,7 +191,7 @@ impl<T: Resource> Registry<T> {
                    label.to_owned()
                }
            }
-            Err(_) => format!(
+            _ => format!(
                "<Invalid-{} label={}>",
                type_name,
                guard.label_for_invalid_id(id)
--- a/wgpu/src/backend/wgpu_core.rs
+++ b/wgpu/src/backend/wgpu_core.rs
@ -1918,13 +1918,25 @@ impl crate::Context for ContextWgpuCore {
                    end_of_pass_write_index: tw.end_of_pass_write_index,
                });

+        let (pass, err) = gfx_select!(encoder => self.0.command_encoder_create_compute_pass_dyn(*encoder, &wgc::command::ComputePassDescriptor {
+            label: desc.label.map(Borrowed),
+            timestamp_writes: timestamp_writes.as_ref(),
+        }));
+
+        if let Some(cause) = err {
+            self.handle_error(
+                &encoder_data.error_sink,
+                cause,
+                LABEL,
+                desc.label,
+                "CommandEncoder::begin_compute_pass",
+            );
+        }
+
        (
            Unused,
            Self::ComputePassData {
-                pass: gfx_select!(encoder => self.0.command_encoder_create_compute_pass_dyn(*encoder, &wgc::command::ComputePassDescriptor {
-                    label: desc.label.map(Borrowed),
-                    timestamp_writes: timestamp_writes.as_ref(),
-                })),
+                pass,
                error_sink: encoder_data.error_sink.clone(),
            },
        )
--- a/wgpu/src/lib.rs
+++ b/wgpu/src/lib.rs
@ -1286,10 +1286,10 @@ pub struct RenderPass<'a> {
 /// Corresponds to [WebGPU `GPUComputePassEncoder`](
 /// https://gpuweb.github.io/gpuweb/#compute-pass-encoder).
 #[derive(Debug)]
-pub struct ComputePass<'a> {
+pub struct ComputePass {
    id: ObjectId,
    data: Box<Data>,
-    parent: &'a mut CommandEncoder,
+    context: Arc<C>,
 }

 /// Encodes a series of GPU operations into a reusable "render bundle".
@ -3876,7 +3876,7 @@ impl CommandEncoder {
    /// Begins recording of a compute pass.
    ///
    /// This function returns a [`ComputePass`] object which records a single compute pass.
-    pub fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<'_>) -> ComputePass<'_> {
+    pub fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor<'_>) -> ComputePass {
        let id = self.id.as_ref().unwrap();
        let (id, data) = DynContext::command_encoder_begin_compute_pass(
            &*self.context,
@ -3887,7 +3887,7 @@ impl CommandEncoder {
        ComputePass {
            id,
            data,
-            parent: self,
+            context: self.context.clone(),
        }
    }

@ -4728,7 +4728,7 @@ impl<'a> Drop for RenderPass<'a> {
    }
 }

-impl<'a> ComputePass<'a> {
+impl ComputePass {
    /// Sets the active bind group for a given bind group index. The bind group layout
    /// in the active pipeline when the `dispatch()` function is called must match the layout of this bind group.
    ///
@ -4742,7 +4742,7 @@ impl<'a> ComputePass<'a> {
        offsets: &[DynamicOffset],
    ) {
        DynContext::compute_pass_set_bind_group(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            index,
@ -4755,7 +4755,7 @@ impl<'a> ComputePass<'a> {
    /// Sets the active compute pipeline.
    pub fn set_pipeline(&mut self, pipeline: &ComputePipeline) {
        DynContext::compute_pass_set_pipeline(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            &pipeline.id,
@ -4766,7 +4766,7 @@ impl<'a> ComputePass<'a> {
    /// Inserts debug marker.
    pub fn insert_debug_marker(&mut self, label: &str) {
        DynContext::compute_pass_insert_debug_marker(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            label,
@ -4776,7 +4776,7 @@ impl<'a> ComputePass<'a> {
    /// Start record commands and group it into debug marker group.
    pub fn push_debug_group(&mut self, label: &str) {
        DynContext::compute_pass_push_debug_group(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            label,
@ -4785,11 +4785,7 @@ impl<'a> ComputePass<'a> {

    /// Stops command recording and creates debug group.
    pub fn pop_debug_group(&mut self) {
-        DynContext::compute_pass_pop_debug_group(
-            &*self.parent.context,
-            &mut self.id,
-            self.data.as_mut(),
-        );
+        DynContext::compute_pass_pop_debug_group(&*self.context, &mut self.id, self.data.as_mut());
    }

    /// Dispatches compute work operations.
@ -4797,7 +4793,7 @@ impl<'a> ComputePass<'a> {
    /// `x`, `y` and `z` denote the number of work groups to dispatch in each dimension.
    pub fn dispatch_workgroups(&mut self, x: u32, y: u32, z: u32) {
        DynContext::compute_pass_dispatch_workgroups(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            x,
@ -4815,7 +4811,7 @@ impl<'a> ComputePass<'a> {
        indirect_offset: BufferAddress,
    ) {
        DynContext::compute_pass_dispatch_workgroups_indirect(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            &indirect_buffer.id,
@ -4826,7 +4822,7 @@ impl<'a> ComputePass<'a> {
 }

 /// [`Features::PUSH_CONSTANTS`] must be enabled on the device in order to call these functions.
-impl<'a> ComputePass<'a> {
+impl ComputePass {
    /// Set push constant data for subsequent dispatch calls.
    ///
    /// Write the bytes in `data` at offset `offset` within push constant
@ -4837,7 +4833,7 @@ impl<'a> ComputePass<'a> {
    /// call will write `data` to bytes `4..12` of push constant storage.
    pub fn set_push_constants(&mut self, offset: u32, data: &[u8]) {
        DynContext::compute_pass_set_push_constants(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            offset,
@ -4847,7 +4843,7 @@ impl<'a> ComputePass<'a> {
 }

 /// [`Features::TIMESTAMP_QUERY_INSIDE_PASSES`] must be enabled on the device in order to call these functions.
-impl<'a> ComputePass<'a> {
+impl ComputePass {
    /// Issue a timestamp command at this point in the queue. The timestamp will be written to the specified query set, at the specified index.
    ///
    /// Must be multiplied by [`Queue::get_timestamp_period`] to get
@ -4856,7 +4852,7 @@ impl<'a> ComputePass<'a> {
    /// for a string of operations to complete.
    pub fn write_timestamp(&mut self, query_set: &QuerySet, query_index: u32) {
        DynContext::compute_pass_write_timestamp(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            &query_set.id,
@ -4867,12 +4863,12 @@ impl<'a> ComputePass<'a> {
 }

 /// [`Features::PIPELINE_STATISTICS_QUERY`] must be enabled on the device in order to call these functions.
-impl<'a> ComputePass<'a> {
+impl ComputePass {
    /// Start a pipeline statistics query on this compute pass. It can be ended with
    /// `end_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
    pub fn begin_pipeline_statistics_query(&mut self, query_set: &QuerySet, query_index: u32) {
        DynContext::compute_pass_begin_pipeline_statistics_query(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
            &query_set.id,
@ -4885,18 +4881,17 @@ impl<'a> ComputePass<'a> {
    /// `begin_pipeline_statistics_query`. Pipeline statistics queries may not be nested.
    pub fn end_pipeline_statistics_query(&mut self) {
        DynContext::compute_pass_end_pipeline_statistics_query(
-            &*self.parent.context,
+            &*self.context,
            &mut self.id,
            self.data.as_mut(),
        );
    }
 }

-impl<'a> Drop for ComputePass<'a> {
+impl Drop for ComputePass {
    fn drop(&mut self) {
        if !thread::panicking() {
-            self.parent
-                .context
+            self.context
                .compute_pass_end(&mut self.id, self.data.as_mut());
        }
    }