diff --git a/player/tests/data/all.ron b/player/tests/data/all.ron index 13b42803c..8d74fa884 100644 --- a/player/tests/data/all.ron +++ b/player/tests/data/all.ron @@ -4,8 +4,11 @@ "bind-group.ron", "buffer-copy.ron", "clear-buffer-texture.ron", - "buffer-zero-init.ron", "pipeline-statistics-query.ron", "quad.ron", + "zero-init-buffer.ron", + "zero-init-texture-binding.ron", + "zero-init-texture-copytobuffer.ron", + "zero-init-texture-rendertarget.ron", ], ) \ No newline at end of file diff --git a/player/tests/data/zero-16k.bin b/player/tests/data/zero-16k.bin new file mode 100644 index 000000000..294f4016d Binary files /dev/null and b/player/tests/data/zero-16k.bin differ diff --git a/player/tests/data/buffer-zero-init-for-binding.wgsl b/player/tests/data/zero-init-buffer-for-binding.wgsl similarity index 100% rename from player/tests/data/buffer-zero-init-for-binding.wgsl rename to player/tests/data/zero-init-buffer-for-binding.wgsl diff --git a/player/tests/data/buffer-zero-init.ron b/player/tests/data/zero-init-buffer.ron similarity index 98% rename from player/tests/data/buffer-zero-init.ron rename to player/tests/data/zero-init-buffer.ron index b415a81c9..a51c5cc4f 100644 --- a/player/tests/data/buffer-zero-init.ron +++ b/player/tests/data/zero-init-buffer.ron @@ -86,7 +86,7 @@ label: None, flags: (bits: 3), ), - data: "buffer-zero-init-for-binding.wgsl", + data: "zero-init-buffer-for-binding.wgsl", ), CreateBuffer(Id(3, 1, Empty), ( label: Some("used in binding"), diff --git a/player/tests/data/zero-init-texture-binding.ron b/player/tests/data/zero-init-texture-binding.ron new file mode 100644 index 000000000..408d5eb20 --- /dev/null +++ b/player/tests/data/zero-init-texture-binding.ron @@ -0,0 +1,213 @@ +( + features: (bits: 0x0), + expectations: [ + ( + name: "Sampled Texture", + buffer: (index: 0, epoch: 1), + offset: 0, + data: File("zero-16k.bin", 16384), + ), + ( + name: "Storage Texture", + buffer: (index: 1, epoch: 1), + offset: 0, + data: File("zero-16k.bin", 16384), + ), + // MISSING: Texture binding arrays + // MISSING: Partial views + ], + actions: [ + CreateTexture(Id(0, 1, Empty), ( + label: Some("Sampled Texture"), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + mip_level_count: 1, + sample_count: 1, + dimension: D2, + format: Rgba8Unorm, + usage: ( + bits: 5, // SAMPLED + COPY_SRC + ), + )), + CreateTextureView( + id: Id(0, 1, Empty), + parent_id: Id(0, 1, Empty), + desc: (), + ), + CreateBuffer( + Id(0, 1, Empty), + ( + label: Some("Sampled Texture Buffer"), + size: 16384, + usage: ( + bits: 9, + ), + mapped_at_creation: false, + ), + ), + CreateTexture(Id(1, 1, Empty), ( + label: Some("Storage Texture"), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + mip_level_count: 1, + sample_count: 1, + dimension: D2, + format: Rgba8Unorm, + usage: ( + bits: 9, // STORAGE + COPY_SRC + ), + )), + CreateTextureView( + id: Id(1, 1, Empty), + parent_id: Id(1, 1, Empty), + desc: (), + ), + CreateBuffer( + Id(1, 1, Empty), + ( + label: Some("Storage Texture Buffer"), + size: 16384, + usage: ( + bits: 9, + ), + mapped_at_creation: false, + ), + ), + + + CreateBindGroupLayout(Id(0, 1, Empty), ( + label: None, + entries: [ + ( + binding: 0, + visibility: ( + bits: 4, // COMPUTE + ), + ty: Texture ( + sample_type: Float(filterable: true), + view_dimension: D2, + multisampled: false, + ), + count: None, + ), + ( + binding: 1, + visibility: ( + bits: 4, // COMPUTE + ), + ty: StorageTexture ( + access: WriteOnly, + format: Rgba8Unorm, + view_dimension: D2, + ), + count: None, + ), + ], + )), + CreateBindGroup(Id(0, 1, Empty), ( + label: None, + layout: Id(0, 1, Empty), + entries: [ + ( + binding: 0, + resource: TextureView(Id(0, 1, Empty)), + ), + ( + binding: 1, + resource: TextureView(Id(1, 1, Empty)), + ), + ], + )), + CreatePipelineLayout(Id(0, 1, Empty), ( + label: None, + bind_group_layouts: [ + Id(0, 1, Empty), + ], + push_constant_ranges: [], + )), + CreateShaderModule( + id: Id(0, 1, Empty), + desc: ( + label: None, + flags: (bits: 3), + ), + data: "zero-init-texture-binding.wgsl", + ), + CreateComputePipeline( + id: Id(0, 1, Empty), + desc: ( + label: None, + layout: Some(Id(0, 1, Empty)), + stage: ( + module: Id(0, 1, Empty), + entry_point: "main", + ), + ), + ), + + Submit(1, [ + RunComputePass( + base: ( + commands: [ + SetPipeline(Id(0, 1, Empty)), + SetBindGroup( + index: 0, + num_dynamic_offsets: 0, + bind_group_id: Id(0, 1, Empty), + ), + Dispatch((4, 1, 1)), + ], + dynamic_offsets: [], + string_data: [], + push_constant_data: [], + ), + ), + CopyTextureToBuffer( + src: ( + texture: Id(0, 1, Empty), + mip_level: 0, + array_layer: 0, + ), + dst: ( + buffer: Id(0, 1, Empty), + layout: ( + offset: 0, + bytes_per_row: Some(256), + rows_per_image: Some(64), + ), + ), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + ), + CopyTextureToBuffer( + src: ( + texture: Id(1, 1, Empty), + mip_level: 0, + array_layer: 0, + ), + dst: ( + buffer: Id(1, 1, Empty), + layout: ( + offset: 0, + bytes_per_row: Some(256), + rows_per_image: Some(64), + ), + ), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + ), + ]), + ], +) \ No newline at end of file diff --git a/player/tests/data/zero-init-texture-binding.wgsl b/player/tests/data/zero-init-texture-binding.wgsl new file mode 100644 index 000000000..94968b704 --- /dev/null +++ b/player/tests/data/zero-init-texture-binding.wgsl @@ -0,0 +1,6 @@ +[[group(0), binding(0)]] var tex: texture_2d; +[[group(0), binding(1)]] var tex_storage: texture_storage_2d; + +[[stage(compute), workgroup_size(1)]] +fn main([[builtin(global_invocation_id)]] global_id: vec3) { +} diff --git a/player/tests/data/zero-init-texture-copytobuffer.ron b/player/tests/data/zero-init-texture-copytobuffer.ron new file mode 100644 index 000000000..c006e9c09 --- /dev/null +++ b/player/tests/data/zero-init-texture-copytobuffer.ron @@ -0,0 +1,62 @@ +( + features: (bits: 0x0), + expectations: [ + ( + name: "Copy to Buffer", + buffer: (index: 0, epoch: 1), + offset: 0, + data: File("zero-16k.bin", 16384), + ), + // MISSING: Partial copies + ], + actions: [ + CreateTexture(Id(0, 1, Empty), ( + label: Some("Copy To Buffer Texture"), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + mip_level_count: 1, + sample_count: 1, + dimension: D2, + format: Rgba8Unorm, + usage: ( + bits: 1, // COPY_SRC + ), + )), + CreateBuffer( + Id(0, 1, Empty), + ( + label: Some("Copy to Buffer Buffer"), + size: 16384, + usage: ( + bits: 9, + ), + mapped_at_creation: false, + ), + ), + Submit(1, [ + CopyTextureToBuffer( + src: ( + texture: Id(0, 1, Empty), + mip_level: 0, + array_layer: 0, + ), + dst: ( + buffer: Id(0, 1, Empty), + layout: ( + offset: 0, + bytes_per_row: Some(256), + rows_per_image: Some(64), + ), + ), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + ), + ]), + ], +) \ No newline at end of file diff --git a/player/tests/data/zero-init-texture-rendertarget.ron b/player/tests/data/zero-init-texture-rendertarget.ron new file mode 100644 index 000000000..59ce62b5b --- /dev/null +++ b/player/tests/data/zero-init-texture-rendertarget.ron @@ -0,0 +1,91 @@ +( + features: (bits: 0x0), + expectations: [ + ( + name: "Render Target", + buffer: (index: 0, epoch: 1), + offset: 0, + data: File("zero-16k.bin", 16384), + ), + // MISSING: Partial view. + ], + actions: [ + CreateTexture(Id(0, 1, Empty), ( + label: Some("Render Target Texture"), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + mip_level_count: 1, + sample_count: 1, + dimension: D2, + format: Rgba8Unorm, + usage: ( + bits: 17, // RENDER_ATTACHMENT + COPY_SRC + ), + )), + CreateTextureView( + id: Id(0, 1, Empty), + parent_id: Id(0, 1, Empty), + desc: (), + ), + CreateBuffer( + Id(0, 1, Empty), + ( + label: Some("Render Target Buffer"), + size: 16384, + usage: ( + bits: 9, + ), + mapped_at_creation: false, + ), + ), + + Submit(1, [ + RunRenderPass( + base: ( + commands: [], + dynamic_offsets: [], + string_data: [], + push_constant_data: [], + ), + target_colors: [ + ( + view: Id(0, 1, Empty), + resolve_target: None, + channel: ( + load_op: Load, + store_op: Store, + clear_value: ( + r: 1, g: 1, b: 1, a: 1, + ), + read_only: false, + ), + ), + ], + target_depth_stencil: None, + ), + CopyTextureToBuffer( + src: ( + texture: Id(0, 1, Empty), + mip_level: 0, + array_layer: 0, + ), + dst: ( + buffer: Id(0, 1, Empty), + layout: ( + offset: 0, + bytes_per_row: Some(256), + rows_per_image: Some(64), + ), + ), + size: ( + width: 64, + height: 64, + depth_or_array_layers: 1, + ), + ), + ]), + ], +) \ No newline at end of file diff --git a/wgpu-core/src/binding_model.rs b/wgpu-core/src/binding_model.rs index 457b0b914..de9447f3e 100644 --- a/wgpu-core/src/binding_model.rs +++ b/wgpu-core/src/binding_model.rs @@ -3,7 +3,7 @@ use crate::{ error::{ErrorFormatter, PrettyError}, hub::Resource, id::{BindGroupLayoutId, BufferId, DeviceId, SamplerId, TextureViewId, Valid}, - init_tracker::BufferInitTrackerAction, + init_tracker::{BufferInitTrackerAction, TextureInitTrackerAction}, track::{TrackerSet, UsageConflict, DUMMY_SELECTOR}, validation::{MissingBufferUsageError, MissingTextureUsageError}, FastHashMap, Label, LifeGuard, MultiRefCount, Stored, @@ -716,6 +716,7 @@ pub struct BindGroup { pub(crate) life_guard: LifeGuard, pub(crate) used: TrackerSet, pub(crate) used_buffer_ranges: Vec, + pub(crate) used_texture_ranges: Vec, pub(crate) dynamic_binding_info: Vec, } diff --git a/wgpu-core/src/command/bundle.rs b/wgpu-core/src/command/bundle.rs index f6825fb13..01c710eab 100644 --- a/wgpu-core/src/command/bundle.rs +++ b/wgpu-core/src/command/bundle.rs @@ -47,7 +47,7 @@ use crate::{ error::{ErrorFormatter, PrettyError}, hub::{GlobalIdentityHandlerFactory, HalApi, Hub, Resource, Storage, Token}, id, - init_tracker::{BufferInitTrackerAction, MemoryInitKind}, + init_tracker::{BufferInitTrackerAction, MemoryInitKind, TextureInitTrackerAction}, pipeline::PipelineFlags, track::{TrackerSet, UsageConflict}, validation::check_buffer_usage, @@ -180,6 +180,7 @@ impl RenderBundleEncoder { let mut base = self.base.as_ref(); let mut pipeline_layout_id = None::>; let mut buffer_memory_init_actions = Vec::new(); + let mut texture_memory_init_actions = Vec::new(); for &command in base.commands { match command { @@ -233,6 +234,7 @@ impl RenderBundleEncoder { } buffer_memory_init_actions.extend_from_slice(&bind_group.used_buffer_ranges); + texture_memory_init_actions.extend_from_slice(&bind_group.used_texture_ranges); state.set_bind_group(index, bind_group_id, bind_group.layout_id, offsets); state @@ -523,6 +525,7 @@ impl RenderBundleEncoder { }, used: state.trackers, buffer_memory_init_actions, + texture_memory_init_actions, context: self.context, life_guard: LifeGuard::new(desc.label.borrow_or_default()), }) @@ -587,6 +590,7 @@ pub struct RenderBundle { pub(crate) device_id: Stored, pub(crate) used: TrackerSet, pub(super) buffer_memory_init_actions: Vec, + pub(super) texture_memory_init_actions: Vec, pub(super) context: RenderPassContext, pub(crate) life_guard: LifeGuard, } diff --git a/wgpu-core/src/command/clear.rs b/wgpu-core/src/command/clear.rs index e7e6032b7..845122f49 100644 --- a/wgpu-core/src/command/clear.rs +++ b/wgpu-core/src/command/clear.rs @@ -294,13 +294,13 @@ pub(crate) fn collect_zero_buffer_copies_for_clear_texture( get_lowest_common_denom(buffer_copy_pitch, format_desc.block_size as u32); for mip_level in mip_range { - let mip_size = texture_desc.mip_level_size(mip_level).unwrap(); + let mut mip_size = texture_desc.mip_level_size(mip_level).unwrap(); + // Round to multiple of block size + mip_size.width = align_to(mip_size.width, format_desc.block_dimensions.0 as u32); + mip_size.height = align_to(mip_size.height, format_desc.block_dimensions.1 as u32); let bytes_per_row = align_to( - // row is at least one block wide, need to round up - (mip_size.width + format_desc.block_dimensions.0 as u32 - 1) - / format_desc.block_dimensions.0 as u32 - * format_desc.block_size as u32, + mip_size.width / format_desc.block_dimensions.0 as u32 * format_desc.block_size as u32, bytes_per_row_alignment, ); diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index c2a3e1f0e..5258b41f4 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -1,9 +1,11 @@ use crate::{ binding_model::{BindError, BindGroup, PushConstantUploadError}, command::{ - bind::Binder, end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, - CommandEncoderError, CommandEncoderStatus, MapPassErr, PassErrorScope, QueryUseError, - StateChange, + bind::Binder, + end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, CommandBuffer, CommandEncoderError, CommandEncoderStatus, + MapPassErr, PassErrorScope, QueryUseError, StateChange, }, device::MissingDownlevelFlags, error::{ErrorFormatter, PrettyError}, @@ -328,6 +330,9 @@ impl Global { raw.begin_compute_pass(&hal_desc); } + // Immediate texture inits required because of prior discards. Need to be inserted before texture reads. + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + for command in base.commands { match *command { ComputeCommand::SetBindGroup { @@ -371,6 +376,15 @@ impl Global { }, ), ); + + for action in bind_group.used_texture_ranges.iter() { + pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } + let pipeline_layout_id = state.binder.pipeline_layout_id; let entries = state.binder.assign_group( index as usize, @@ -503,6 +517,14 @@ impl Global { pipeline: state.pipeline.last_state, }; + fixup_discarded_surfaces( + pending_discard_init_fixups.drain(..), + raw, + &texture_guard, + &mut cmd_buf.trackers.textures, + device, + ); + state.is_ready().map_pass_err(scope)?; state .flush_states( @@ -670,6 +692,16 @@ impl Global { } cmd_buf.status = CommandEncoderStatus::Recording; + // There can be entries left in pending_discard_init_fixups if a bind group was set, but not used (i.e. no Dispatch occurred) + // However, we already altered the discard/init_action state on this cmd_buf, so we need to apply the promised changes. + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + raw, + &texture_guard, + &mut cmd_buf.trackers.textures, + device, + ); + Ok(()) } } diff --git a/wgpu-core/src/command/memory_init.rs b/wgpu-core/src/command/memory_init.rs new file mode 100644 index 000000000..cfb049756 --- /dev/null +++ b/wgpu-core/src/command/memory_init.rs @@ -0,0 +1,363 @@ +use std::{collections::hash_map::Entry, ops::Range, vec::Drain}; + +use hal::CommandEncoder; + +use crate::{ + command::collect_zero_buffer_copies_for_clear_texture, + device::Device, + hub::Storage, + id::{self, TextureId}, + init_tracker::*, + resource::{Buffer, Texture}, + track::{ResourceTracker, TextureSelector, TextureState, TrackerSet}, + FastHashMap, +}; + +use super::{BakedCommands, DestroyedBufferError, DestroyedTextureError}; + +/// Surface that was discarded by `StoreOp::Discard` of a preceding renderpass. +/// Any read access to this surface needs to be preceded by a texture initialization. +#[derive(Clone)] +pub(crate) struct TextureSurfaceDiscard { + pub texture: TextureId, + pub mip_level: u32, + pub layer: u32, +} + +pub(crate) type SurfacesInDiscardState = Vec; + +#[derive(Default)] +pub(crate) struct CommandBufferTextureMemoryActions { + // init actions describe the tracker actions that we need to be executed before the command buffer is executed + init_actions: Vec, + // discards describe all the discards that haven't been followed by init again within the command buffer + // i.e. everything in this list resets the texture init state *after* the command buffer execution + discards: Vec, +} + +impl CommandBufferTextureMemoryActions { + pub(crate) fn drain_init_actions(&mut self) -> Drain { + self.init_actions.drain(..) + } + + pub(crate) fn discard(&mut self, discard: TextureSurfaceDiscard) { + self.discards.push(discard); + } + + // Registers a TextureInitTrackerAction. + // Returns previously discarded surface that need to be initialized *immediately* now. + // Only returns a non-empty list if action is MemoryInitKind::NeedsInitializedMemory. + #[must_use] + pub(crate) fn register_init_action( + &mut self, + action: &TextureInitTrackerAction, + texture_guard: &Storage, TextureId>, + ) -> SurfacesInDiscardState { + let mut immediately_necessary_clears = SurfacesInDiscardState::new(); + + // Note that within a command buffer we may stack arbitrary memory init actions on the same texture + // Since we react to them in sequence, they are going to be dropped again at queue submit + // + // We don't need to add MemoryInitKind::NeedsInitializedMemory to init_actions if a surface is part of the discard list. + // But that would mean splitting up the action which is more than we'd win here. + self.init_actions + .extend(match texture_guard.get(action.id) { + Ok(texture) => texture.initialization_status.check_action(action), + Err(_) => return immediately_necessary_clears, // texture no longer exists + }); + + // We expect very few discarded surfaces at any point in time which is why a simple linear search is likely best. + // (i.e. most of the time self.discards is empty!) + let init_actions = &mut self.init_actions; + self.discards.retain(|discarded_surface| { + if discarded_surface.texture == action.id + && action.range.layer_range.contains(&discarded_surface.layer) + && action + .range + .mip_range + .contains(&discarded_surface.mip_level) + { + if let MemoryInitKind::NeedsInitializedMemory = action.kind { + immediately_necessary_clears.push(discarded_surface.clone()); + + // Mark surface as implicitly initialized (this is relevant because it might have been uninitialized prior to discarding + init_actions.push(TextureInitTrackerAction { + id: discarded_surface.texture, + range: TextureInitRange { + mip_range: discarded_surface.mip_level + ..(discarded_surface.mip_level + 1), + layer_range: discarded_surface.layer..(discarded_surface.layer + 1), + }, + kind: MemoryInitKind::ImplicitlyInitialized, + }); + } + false + } else { + true + } + }); + + immediately_necessary_clears + } + + // Shortcut for register_init_action when it is known that the action is an implicit init, not requiring any immediate resource init. + pub(crate) fn register_implicit_init( + &mut self, + id: TextureId, + range: TextureInitRange, + texture_guard: &Storage, TextureId>, + ) { + let must_be_empty = self.register_init_action( + &TextureInitTrackerAction { + id, + range, + kind: MemoryInitKind::ImplicitlyInitialized, + }, + texture_guard, + ); + assert!(must_be_empty.is_empty()); + } +} + +// Utility function that takes discarded surfaces from register_init_action and initializes them on the spot. +// Takes care of barriers as well! +pub(crate) fn fixup_discarded_surfaces< + A: hal::Api, + InitIter: Iterator, +>( + inits: InitIter, + encoder: &mut A::CommandEncoder, + texture_guard: &Storage, TextureId>, + texture_tracker: &mut ResourceTracker, + device: &Device, +) { + let mut zero_buffer_copy_regions = Vec::new(); + for init in inits { + let mip_range = init.mip_level..(init.mip_level + 1); + let layer_range = init.layer..(init.layer + 1); + + let (texture, pending) = texture_tracker + .use_replace( + &*texture_guard, + init.texture, + TextureSelector { + levels: mip_range.clone(), + layers: layer_range.clone(), + }, + hal::TextureUses::COPY_DST, + ) + .unwrap(); + + collect_zero_buffer_copies_for_clear_texture( + &texture.desc, + device.alignments.buffer_copy_pitch.get() as u32, + mip_range, + layer_range, + &mut zero_buffer_copy_regions, + ); + + let barriers = pending.map(|pending| pending.into_hal(texture)); + let raw_texture = texture.inner.as_raw().unwrap(); + + unsafe { + // TODO: Should first gather all barriers, do a single transition_textures call, and then send off copy_buffer_to_texture commands. + encoder.transition_textures(barriers); + encoder.copy_buffer_to_texture( + &device.zero_buffer, + raw_texture, + zero_buffer_copy_regions.drain(..), + ); + } + } +} + +impl BakedCommands { + // inserts all buffer initializations that are going to be needed for executing the commands and updates resource init states accordingly + pub(crate) fn initialize_buffer_memory( + &mut self, + device_tracker: &mut TrackerSet, + buffer_guard: &mut Storage, id::BufferId>, + ) -> Result<(), DestroyedBufferError> { + // Gather init ranges for each buffer so we can collapse them. + // It is not possible to do this at an earlier point since previously executed command buffer change the resource init state. + let mut uninitialized_ranges_per_buffer = FastHashMap::default(); + for buffer_use in self.buffer_memory_init_actions.drain(..) { + let buffer = buffer_guard + .get_mut(buffer_use.id) + .map_err(|_| DestroyedBufferError(buffer_use.id))?; + + // align the end to 4 + let end_remainder = buffer_use.range.end % wgt::COPY_BUFFER_ALIGNMENT; + let end = if end_remainder == 0 { + buffer_use.range.end + } else { + buffer_use.range.end + wgt::COPY_BUFFER_ALIGNMENT - end_remainder + }; + let uninitialized_ranges = buffer + .initialization_status + .drain(buffer_use.range.start..end); + + match buffer_use.kind { + MemoryInitKind::ImplicitlyInitialized => {} + MemoryInitKind::NeedsInitializedMemory => { + match uninitialized_ranges_per_buffer.entry(buffer_use.id) { + Entry::Vacant(e) => { + e.insert( + uninitialized_ranges.collect::>>(), + ); + } + Entry::Occupied(mut e) => { + e.get_mut().extend(uninitialized_ranges); + } + } + } + } + } + + for (buffer_id, mut ranges) in uninitialized_ranges_per_buffer { + // Collapse touching ranges. + ranges.sort_by_key(|r| r.start); + for i in (1..ranges.len()).rev() { + assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this! + if ranges[i].start == ranges[i - 1].end { + ranges[i - 1].end = ranges[i].end; + ranges.swap_remove(i); // Ordering not important at this point + } + } + + // Don't do use_replace since the buffer may already no longer have a ref_count. + // However, we *know* that it is currently in use, so the tracker must already know about it. + let transition = device_tracker.buffers.change_replace_tracked( + id::Valid(buffer_id), + (), + hal::BufferUses::COPY_DST, + ); + + let buffer = buffer_guard + .get_mut(buffer_id) + .map_err(|_| DestroyedBufferError(buffer_id))?; + let raw_buf = buffer.raw.as_ref().ok_or(DestroyedBufferError(buffer_id))?; + + unsafe { + self.encoder + .transition_buffers(transition.map(|pending| pending.into_hal(buffer))); + } + + for range in ranges.iter() { + assert!(range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", raw_buf, range.start); + assert!(range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with an end not aligned to 4 (end was {})", raw_buf, range.end); + + unsafe { + self.encoder.clear_buffer(raw_buf, range.clone()); + } + } + } + Ok(()) + } + + // inserts all texture initializations that are going to be needed for executing the commands and updates resource init states accordingly + // any textures that are left discarded by this command buffer will be marked as uninitialized + pub(crate) fn initialize_texture_memory( + &mut self, + device_tracker: &mut TrackerSet, + texture_guard: &mut Storage, TextureId>, + device: &Device, + ) -> Result<(), DestroyedTextureError> { + let mut ranges: Vec = Vec::new(); + for texture_use in self.texture_memory_actions.drain_init_actions() { + let texture = texture_guard + .get_mut(texture_use.id) + .map_err(|_| DestroyedTextureError(texture_use.id))?; + + let use_range = texture_use.range; + let affected_mip_trackers = texture + .initialization_status + .mips + .iter_mut() + .enumerate() + .skip(use_range.mip_range.start as usize) + .take((use_range.mip_range.end - use_range.mip_range.start) as usize); + + match texture_use.kind { + MemoryInitKind::ImplicitlyInitialized => { + for (_, mip_tracker) in affected_mip_trackers { + mip_tracker.drain(use_range.layer_range.clone()); + } + } + MemoryInitKind::NeedsInitializedMemory => { + ranges.clear(); + for (mip_level, mip_tracker) in affected_mip_trackers { + for layer_range in mip_tracker.drain(use_range.layer_range.clone()) { + ranges.push(TextureInitRange { + mip_range: mip_level as u32..(mip_level as u32 + 1), + layer_range, + }) + } + } + + let raw_texture = texture + .inner + .as_raw() + .ok_or(DestroyedTextureError(texture_use.id))?; + + debug_assert!(texture.hal_usage.contains(hal::TextureUses::COPY_DST), + "Every texture needs to have the COPY_DST flag. Otherwise we can't ensure initialized memory!"); + + let mut texture_barriers = Vec::new(); + let mut zero_buffer_copy_regions = Vec::new(); + for range in &ranges { + // Don't do use_replace since the texture may already no longer have a ref_count. + // However, we *know* that it is currently in use, so the tracker must already know about it. + texture_barriers.extend( + device_tracker + .textures + .change_replace_tracked( + id::Valid(texture_use.id), + TextureSelector { + levels: range.mip_range.clone(), + layers: range.layer_range.clone(), + }, + hal::TextureUses::COPY_DST, + ) + .map(|pending| pending.into_hal(texture)), + ); + + collect_zero_buffer_copies_for_clear_texture( + &texture.desc, + device.alignments.buffer_copy_pitch.get() as u32, + range.mip_range.clone(), + range.layer_range.clone(), + &mut zero_buffer_copy_regions, + ); + } + + if !zero_buffer_copy_regions.is_empty() { + unsafe { + // TODO: Could safe on transition_textures calls by bundling barriers from *all* textures. + // (a bbit more tricky because a naive approach would have to borrow same texture several times then) + self.encoder + .transition_textures(texture_barriers.into_iter()); + self.encoder.copy_buffer_to_texture( + &device.zero_buffer, + raw_texture, + zero_buffer_copy_regions.into_iter(), + ); + } + } + } + } + } + + // Now that all buffers/textures have the proper init state for before cmdbuf start, we discard init states for textures it left discarded after its execution. + for surface_discard in self.texture_memory_actions.discards.iter() { + let texture = texture_guard + .get_mut(surface_discard.texture) + .map_err(|_| DestroyedTextureError(surface_discard.texture))?; + texture + .initialization_status + .discard(surface_discard.mip_level, surface_discard.layer); + } + + Ok(()) + } +} diff --git a/wgpu-core/src/command/mod.rs b/wgpu-core/src/command/mod.rs index 6a2fe298a..14b9460a0 100644 --- a/wgpu-core/src/command/mod.rs +++ b/wgpu-core/src/command/mod.rs @@ -3,26 +3,25 @@ mod bundle; mod clear; mod compute; mod draw; +mod memory_init; mod query; mod render; mod transfer; -use std::collections::hash_map::Entry; -use std::ops::Range; - pub use self::bundle::*; +pub(crate) use self::clear::collect_zero_buffer_copies_for_clear_texture; pub use self::compute::*; pub use self::draw::*; +use self::memory_init::CommandBufferTextureMemoryActions; pub use self::query::*; pub use self::render::*; pub use self::transfer::*; use crate::error::{ErrorFormatter, PrettyError}; -use crate::FastHashMap; +use crate::init_tracker::BufferInitTrackerAction; use crate::{ hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, id, - init_tracker::{BufferInitTrackerAction, MemoryInitKind}, resource::{Buffer, Texture}, track::{BufferState, ResourceTracker, TextureState, TrackerSet}, Label, Stored, @@ -72,96 +71,11 @@ pub struct BakedCommands { pub(crate) list: Vec, pub(crate) trackers: TrackerSet, buffer_memory_init_actions: Vec, + texture_memory_actions: CommandBufferTextureMemoryActions, } pub(crate) struct DestroyedBufferError(pub id::BufferId); - -impl BakedCommands { - pub(crate) fn initialize_buffer_memory( - &mut self, - device_tracker: &mut TrackerSet, - buffer_guard: &mut Storage, id::BufferId>, - ) -> Result<(), DestroyedBufferError> { - // Gather init ranges for each buffer so we can collapse them. - // It is not possible to do this at an earlier point since previously executed command buffer change the resource init state. - let mut uninitialized_ranges_per_buffer = FastHashMap::default(); - for buffer_use in self.buffer_memory_init_actions.drain(..) { - let buffer = buffer_guard - .get_mut(buffer_use.id) - .map_err(|_| DestroyedBufferError(buffer_use.id))?; - - // align the end to 4 - let end_remainder = buffer_use.range.end % wgt::COPY_BUFFER_ALIGNMENT; - let end = if end_remainder == 0 { - buffer_use.range.end - } else { - buffer_use.range.end + wgt::COPY_BUFFER_ALIGNMENT - end_remainder - }; - let uninitialized_ranges = buffer - .initialization_status - .drain(buffer_use.range.start..end); - - match buffer_use.kind { - MemoryInitKind::ImplicitlyInitialized => { - uninitialized_ranges.for_each(drop); - } - MemoryInitKind::NeedsInitializedMemory => { - match uninitialized_ranges_per_buffer.entry(buffer_use.id) { - Entry::Vacant(e) => { - e.insert( - uninitialized_ranges.collect::>>(), - ); - } - Entry::Occupied(mut e) => { - e.get_mut().extend(uninitialized_ranges); - } - } - } - } - } - - for (buffer_id, mut ranges) in uninitialized_ranges_per_buffer { - // Collapse touching ranges. - ranges.sort_by(|a, b| a.start.cmp(&b.start)); - for i in (1..ranges.len()).rev() { - assert!(ranges[i - 1].end <= ranges[i].start); // The memory init tracker made sure of this! - if ranges[i].start == ranges[i - 1].end { - ranges[i - 1].end = ranges[i].end; - ranges.swap_remove(i); // Ordering not important at this point - } - } - - // Don't do use_replace since the buffer may already no longer have a ref_count. - // However, we *know* that it is currently in use, so the tracker must already know about it. - let transition = device_tracker.buffers.change_replace_tracked( - id::Valid(buffer_id), - (), - hal::BufferUses::COPY_DST, - ); - - let buffer = buffer_guard - .get_mut(buffer_id) - .map_err(|_| DestroyedBufferError(buffer_id))?; - let raw_buf = buffer.raw.as_ref().ok_or(DestroyedBufferError(buffer_id))?; - - unsafe { - self.encoder - .transition_buffers(transition.map(|pending| pending.into_hal(buffer))); - } - - for range in ranges.iter() { - assert!(range.start % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with a start not aligned to 4 (start was {})", raw_buf, range.start); - assert!(range.end % wgt::COPY_BUFFER_ALIGNMENT == 0, "Buffer {:?} has an uninitialized range with an end not aligned to 4 (end was {})", raw_buf, range.end); - - unsafe { - self.encoder.clear_buffer(raw_buf, range.clone()); - } - } - } - - Ok(()) - } -} +pub(crate) struct DestroyedTextureError(pub id::TextureId); pub struct CommandBuffer { encoder: CommandEncoder, @@ -169,6 +83,7 @@ pub struct CommandBuffer { pub(crate) device_id: Stored, pub(crate) trackers: TrackerSet, buffer_memory_init_actions: Vec, + texture_memory_actions: CommandBufferTextureMemoryActions, limits: wgt::Limits, support_clear_buffer_texture: bool, #[cfg(feature = "trace")] @@ -196,6 +111,7 @@ impl CommandBuffer { device_id, trackers: TrackerSet::new(A::VARIANT), buffer_memory_init_actions: Default::default(), + texture_memory_actions: Default::default(), limits, support_clear_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS), #[cfg(feature = "trace")] @@ -262,6 +178,7 @@ impl CommandBuffer { list: self.encoder.list, trackers: self.trackers, buffer_memory_init_actions: self.buffer_memory_init_actions, + texture_memory_actions: self.texture_memory_actions, } } } diff --git a/wgpu-core/src/command/render.rs b/wgpu-core/src/command/render.rs index 6ac1f42a8..47ed20300 100644 --- a/wgpu-core/src/command/render.rs +++ b/wgpu-core/src/command/render.rs @@ -1,10 +1,12 @@ use crate::{ binding_model::BindError, command::{ - bind::Binder, end_pipeline_statistics_query, BasePass, BasePassRef, CommandBuffer, - CommandEncoderError, CommandEncoderStatus, DrawError, ExecutionError, MapPassErr, - PassErrorScope, QueryResetMap, QueryUseError, RenderCommand, RenderCommandError, - StateChange, + bind::Binder, + end_pipeline_statistics_query, + memory_init::{fixup_discarded_surfaces, SurfacesInDiscardState}, + BasePass, BasePassRef, CommandBuffer, CommandEncoderError, CommandEncoderStatus, DrawError, + ExecutionError, MapPassErr, PassErrorScope, QueryResetMap, QueryUseError, RenderCommand, + RenderCommandError, StateChange, }, device::{ AttachmentData, MissingDownlevelFlags, MissingFeatures, RenderPassCompatibilityError, @@ -13,7 +15,7 @@ use crate::{ error::{ErrorFormatter, PrettyError}, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, id, - init_tracker::MemoryInitKind, + init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, pipeline::PipelineFlags, resource::{Texture, TextureView}, track::{StatefulTrackerSubset, TextureSelector, UsageConflict}, @@ -38,6 +40,8 @@ use serde::Serialize; use crate::track::UseExtendError; use std::{borrow::Cow, fmt, iter, marker::PhantomData, mem, num::NonZeroU32, ops::Range, str}; +use super::{memory_init::TextureSurfaceDiscard, CommandBufferTextureMemoryActions}; + /// Operation to perform to the output attachment at the start of a renderpass. #[repr(C)] #[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)] @@ -74,7 +78,7 @@ pub struct PassChannel { pub load_op: LoadOp, /// Operation to perform to the output attachment at the end of a renderpass. pub store_op: StoreOp, - /// If load_op is [`LoadOp::Clear`], the attachement will be cleared to this color. + /// If load_op is [`LoadOp::Clear`], the attachment will be cleared to this color. pub clear_value: V, /// If true, the relevant channel is not changed by a renderpass, and the corresponding attachment /// can be used inside the pass by other read-only usages. @@ -531,19 +535,59 @@ type AttachmentDataVec = ArrayVec; struct RenderPassInfo<'a, A: hal::Api> { context: RenderPassContext, trackers: StatefulTrackerSubset, - render_attachments: AttachmentDataVec>, + render_attachments: AttachmentDataVec>, // All render attachments, including depth/stencil is_ds_read_only: bool, extent: wgt::Extent3d, _phantom: PhantomData, + + pending_discard_init_fixups: SurfacesInDiscardState, + divergent_discarded_depth_stencil_aspect: Option<(wgt::TextureAspect, &'a TextureView)>, } impl<'a, A: HalApi> RenderPassInfo<'a, A> { + fn add_pass_texture_init_actions( + channel: &PassChannel, + texture_memory_actions: &mut CommandBufferTextureMemoryActions, + view: &TextureView, + texture_guard: &Storage, id::TextureId>, + pending_discard_init_fixups: &mut SurfacesInDiscardState, + ) { + if channel.load_op == LoadOp::Load { + pending_discard_init_fixups.extend(texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + id: view.parent_id.value.0, + range: TextureInitRange::from(view.selector.clone()), + // Note that this is needed even if the target is discarded, + kind: MemoryInitKind::NeedsInitializedMemory, + }, + texture_guard, + )); + } else if channel.store_op == StoreOp::Store { + // Clear + Store + texture_memory_actions.register_implicit_init( + view.parent_id.value.0, + TextureInitRange::from(view.selector.clone()), + texture_guard, + ); + } + if channel.store_op == StoreOp::Discard { + // the discard happens at the *end* of a pass + // but recording the discard right away be alright since the texture can't be used during the pass anyways + texture_memory_actions.discard(TextureSurfaceDiscard { + texture: view.parent_id.value.0, + mip_level: view.selector.levels.start, + layer: view.selector.layers.start, + }); + } + } + fn start( label: Option<&str>, color_attachments: &[RenderPassColorAttachment], depth_stencil_attachment: Option<&RenderPassDepthStencilAttachment>, cmd_buf: &mut CommandBuffer, view_guard: &'a Storage, id::TextureViewId>, + texture_guard: &'a Storage, id::TextureId>, ) -> Result { profiling::scope!("start", "RenderPassInfo"); @@ -553,6 +597,9 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { let mut is_ds_read_only = false; let mut render_attachments = AttachmentDataVec::::new(); + let mut discarded_surfaces = AttachmentDataVec::new(); + let mut pending_discard_init_fixups = SurfacesInDiscardState::new(); + let mut divergent_discarded_depth_stencil_aspect = None; let mut attachment_type_name = ""; let mut extent = None; @@ -599,6 +646,80 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { )); } + if !ds_aspects.contains(hal::FormatAspects::STENCIL) + || (at.stencil.load_op == at.depth.load_op + && at.stencil.store_op == at.depth.store_op) + { + Self::add_pass_texture_init_actions( + &at.depth, + &mut cmd_buf.texture_memory_actions, + view, + texture_guard, + &mut pending_discard_init_fixups, + ); + } else if !ds_aspects.contains(hal::FormatAspects::DEPTH) { + Self::add_pass_texture_init_actions( + &at.stencil, + &mut cmd_buf.texture_memory_actions, + view, + texture_guard, + &mut pending_discard_init_fixups, + ); + } else { + // This is the only place (anywhere in wgpu) where Stencil & Depth init state can diverge. + // To safe us the overhead of tracking init state of texture aspects everywhere, + // we're going to cheat a little bit in order to keep the init state of both Stencil and Depth aspects in sync. + // The expectation is that we hit this path extremely rarely! + + // Diverging LoadOp, i.e. Load + Clear: + // Record MemoryInitKind::NeedsInitializedMemory for the entire surface, a bit wasteful on unit but no negative effect! + // Rationale: If the loaded channel is uninitialized it needs clearing, the cleared channel doesn't care. (If everything is already initialized nothing special happens) + // (possible minor optimization: Clear caused by NeedsInitializedMemory should know that it doesn't need to clear the aspect that was set to C) + let need_init_beforehand = + at.depth.load_op == LoadOp::Load || at.stencil.load_op == LoadOp::Load; + if need_init_beforehand { + pending_discard_init_fixups.extend( + cmd_buf.texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + id: view.parent_id.value.0, + range: TextureInitRange::from(view.selector.clone()), + kind: MemoryInitKind::NeedsInitializedMemory, + }, + texture_guard, + ), + ); + } + + // Diverging Store, i.e. Discard + Store: + // Immediately zero out channel that is set to discard after we're done with the render pass. + // This allows us to set the entire surface to MemoryInitKind::ImplicitlyInitialized (if it isn't already set to NeedsInitializedMemory). + // (possible optimization: Delay and potentially drop this zeroing) + if at.depth.store_op != at.stencil.store_op { + if !need_init_beforehand { + cmd_buf.texture_memory_actions.register_implicit_init( + view.parent_id.value.0, + TextureInitRange::from(view.selector.clone()), + texture_guard, + ); + } + divergent_discarded_depth_stencil_aspect = Some(( + if at.depth.store_op == StoreOp::Discard { + wgt::TextureAspect::DepthOnly + } else { + wgt::TextureAspect::StencilOnly + }, + view, + )); + } else if at.depth.store_op == StoreOp::Discard { + // Both are discarded using the regular path. + discarded_surfaces.push(TextureSurfaceDiscard { + texture: view.parent_id.value.0, + mip_level: view.selector.levels.start, + layer: view.selector.layers.start, + }); + } + } + let usage = if at.is_read_only(ds_aspects)? { is_ds_read_only = true; hal::TextureUses::DEPTH_STENCIL_READ | hal::TextureUses::RESOURCE @@ -636,6 +757,13 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { )); } + Self::add_pass_texture_init_actions( + &at.channel, + &mut cmd_buf.texture_memory_actions, + color_view, + texture_guard, + &mut pending_discard_init_fixups, + ); render_attachments .push(color_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); @@ -659,6 +787,11 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { return Err(RenderPassErrorInner::InvalidResolveTargetSampleCount); } + cmd_buf.texture_memory_actions.register_implicit_init( + resolve_view.parent_id.value.0, + TextureInitRange::from(resolve_view.selector.clone()), + texture_guard, + ); render_attachments .push(resolve_view.to_render_attachment(hal::TextureUses::COLOR_TARGET)); @@ -719,6 +852,8 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { is_ds_read_only, extent, _phantom: PhantomData, + pending_discard_init_fixups, + divergent_discarded_depth_stencil_aspect, }) } @@ -726,7 +861,7 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { mut self, raw: &mut A::CommandEncoder, texture_guard: &Storage, id::TextureId>, - ) -> Result { + ) -> Result<(StatefulTrackerSubset, SurfacesInDiscardState), RenderPassErrorInner> { profiling::scope!("finish", "RenderPassInfo"); unsafe { raw.end_render_pass(); @@ -751,7 +886,44 @@ impl<'a, A: HalApi> RenderPassInfo<'a, A> { .map_err(UsageConflict::from)?; } - Ok(self.trackers) + // If either only stencil or depth was discarded, we put in a special clear pass to keep the init status of the aspects in sync. + // We do this so we don't need to track init state for depth/stencil aspects individually. + // Note that we don't go the usual route of "brute force" initializing the texture when need arises here, + // since this path is actually something a user may genuinely want (where as the other cases are more seen along the lines as gracefully handling a user error). + if let Some((aspect, view)) = self.divergent_discarded_depth_stencil_aspect { + let (depth_ops, stencil_ops) = if aspect == wgt::TextureAspect::DepthOnly { + ( + hal::AttachmentOps::STORE, // clear depth + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + ) + } else { + ( + hal::AttachmentOps::LOAD | hal::AttachmentOps::STORE, // unchanged stencil + hal::AttachmentOps::STORE, // clear depth + ) + }; + let desc = hal::RenderPassDescriptor { + label: Some("Zero init discarded depth/stencil aspect"), + extent: view.extent, + sample_count: view.samples, + color_attachments: &[], + depth_stencil_attachment: Some(hal::DepthStencilAttachment { + target: hal::Attachment { + view: &view.raw, + usage: hal::TextureUses::DEPTH_STENCIL_WRITE, + }, + depth_ops, + stencil_ops, + clear_value: (0.0, 0), + }), + }; + unsafe { + raw.begin_render_pass(&desc); + raw.end_render_pass(); + } + } + + Ok((self.trackers, self.pending_discard_init_fixups)) } } @@ -786,7 +958,7 @@ impl Global { let mut token = Token::root(); let (device_guard, mut token) = hub.devices.read(&mut token); - let (pass_raw, trackers, query_reset_state) = { + let (pass_raw, trackers, query_reset_state, pending_discard_init_fixups) = { let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = @@ -830,6 +1002,7 @@ impl Global { depth_stencil_attachment, cmd_buf, &*view_guard, + &*texture_guard, ) .map_pass_err(scope)?; @@ -900,6 +1073,13 @@ impl Global { } }), ); + for action in bind_group.used_texture_ranges.iter() { + info.pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } let pipeline_layout_id = state.binder.pipeline_layout_id; let entries = state.binder.assign_group( @@ -1630,6 +1810,13 @@ impl Global { Err(_) => None, }), ); + for action in bundle.texture_memory_init_actions.iter() { + info.pending_discard_init_fixups.extend( + cmd_buf + .texture_memory_actions + .register_init_action(action, &texture_guard), + ); + } unsafe { bundle.execute( @@ -1667,7 +1854,8 @@ impl Global { } log::trace!("Merging {:?} with the render pass", encoder_id); - let trackers = info.finish(raw, &*texture_guard).map_pass_err(scope)?; + let (trackers, pending_discard_init_fixups) = + info.finish(raw, &*texture_guard).map_pass_err(scope)?; let raw_cmd_buf = unsafe { raw.end_encoding() @@ -1675,7 +1863,12 @@ impl Global { .map_pass_err(scope)? }; cmd_buf.status = CommandEncoderStatus::Recording; - (raw_cmd_buf, trackers, query_reset_state) + ( + raw_cmd_buf, + trackers, + query_reset_state, + pending_discard_init_fixups, + ) }; let (mut cmb_guard, mut token) = hub.command_buffers.write(&mut token); @@ -1687,6 +1880,15 @@ impl Global { CommandBuffer::get_encoder_mut(&mut *cmb_guard, encoder_id).map_pass_err(scope)?; { let transit = cmd_buf.encoder.open(); + + fixup_discarded_surfaces( + pending_discard_init_fixups.into_iter(), + transit, + &texture_guard, + &mut cmd_buf.trackers.textures, + &device_guard[cmd_buf.device_id.value], + ); + query_reset_state .reset_queries( transit, diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index f1d859b9a..98e6ed853 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -1,12 +1,16 @@ #[cfg(feature = "trace")] use crate::device::trace::Command as TraceCommand; use crate::{ - command::{CommandBuffer, CommandEncoderError}, + command::{ + collect_zero_buffer_copies_for_clear_texture, memory_init::fixup_discarded_surfaces, + CommandBuffer, CommandEncoderError, + }, conv, + device::Device, error::{ErrorFormatter, PrettyError}, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Storage, Token}, id::{BufferId, CommandEncoderId, TextureId}, - init_tracker::MemoryInitKind, + init_tracker::{MemoryInitKind, TextureInitRange, TextureInitTrackerAction}, resource::{Texture, TextureErrorDimension}, track::TextureSelector, }; @@ -350,6 +354,62 @@ pub(crate) fn validate_texture_copy_range( Ok((copy_extent, array_layer_count)) } +fn get_copy_dst_texture_init_requirement( + texture: &Texture, + copy_texture: &wgt::ImageCopyTexture, + copy_size: &Extent3d, +) -> TextureInitTrackerAction { + // Attention: If we don't write full texture subresources, we need to a full clear first since we don't track subrects. + let dst_init_kind = if copy_size.width == texture.desc.size.width + && copy_size.height == texture.desc.size.height + { + MemoryInitKind::ImplicitlyInitialized + } else { + MemoryInitKind::NeedsInitializedMemory + }; + TextureInitTrackerAction { + id: copy_texture.texture, + range: TextureInitRange { + mip_range: copy_texture.mip_level..copy_texture.mip_level + 1, + layer_range: copy_texture.origin.z + ..(copy_texture.origin.z + copy_size.depth_or_array_layers), + }, + kind: dst_init_kind, + } +} + +fn handle_src_texture_init( + cmd_buf: &mut CommandBuffer, + device: &Device, + source: &ImageCopyTexture, + src_base: &hal::TextureCopyBase, + copy_size: &Extent3d, + texture_guard: &Storage, TextureId>, +) { + let immediate_src_init = cmd_buf.texture_memory_actions.register_init_action( + &TextureInitTrackerAction { + id: source.texture, + range: TextureInitRange { + mip_range: src_base.mip_level..src_base.mip_level + 1, + layer_range: src_base.origin.z + ..(src_base.origin.z + copy_size.depth_or_array_layers), + }, + kind: MemoryInitKind::NeedsInitializedMemory, + }, + texture_guard, + ); + if !immediate_src_init.is_empty() { + let cmd_buf_raw = cmd_buf.encoder.open(); + fixup_discarded_surfaces( + immediate_src_init.into_iter(), + cmd_buf_raw, + texture_guard, + &mut cmd_buf.trackers.textures, + device, + ); + } +} + impl Global { pub fn command_encoder_copy_buffer_to_buffer( &self, @@ -493,11 +553,14 @@ impl Global { let hub = A::hub(self); let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, _) = hub.textures.read(&mut token); + let device = &device_guard[cmd_buf.device_id.value]; + #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { list.push(TraceCommand::CopyBufferToTexture { @@ -567,6 +630,12 @@ impl Global { true, )?; + if !conv::is_valid_copy_dst_texture_format(dst_texture.desc.format) { + return Err( + TransferError::CopyToForbiddenTextureFormat(dst_texture.desc.format).into(), + ); + } + cmd_buf .buffer_memory_init_actions .extend(src_buffer.initialization_status.create_action( @@ -574,10 +643,17 @@ impl Global { source.layout.offset..(source.layout.offset + required_buffer_bytes_in_copy), MemoryInitKind::NeedsInitializedMemory, )); - - if !conv::is_valid_copy_dst_texture_format(dst_texture.desc.format) { - return Err( - TransferError::CopyToForbiddenTextureFormat(dst_texture.desc.format).into(), + let mut dst_zero_buffer_copy_regions = Vec::new(); + for immediate_init in cmd_buf.texture_memory_actions.register_init_action( + &get_copy_dst_texture_init_requirement(dst_texture, destination, copy_size), + &texture_guard, + ) { + collect_zero_buffer_copies_for_clear_texture( + &dst_texture.desc, + device.alignments.buffer_copy_pitch.get() as u32, + immediate_init.mip_level..(immediate_init.mip_level + 1), + immediate_init.layer..(immediate_init.layer + 1), + &mut dst_zero_buffer_copy_regions, ); } @@ -594,8 +670,16 @@ impl Global { }); let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { - cmd_buf_raw.transition_buffers(src_barriers); cmd_buf_raw.transition_textures(dst_barriers); + // potential dst buffer init (for previously discarded dst_texture + partial copy) + if !dst_zero_buffer_copy_regions.is_empty() { + cmd_buf_raw.copy_buffer_to_texture( + &device.zero_buffer, + dst_raw, + dst_zero_buffer_copy_regions.into_iter(), + ); + } + cmd_buf_raw.transition_buffers(src_barriers); cmd_buf_raw.copy_buffer_to_texture(src_raw, dst_raw, regions); } Ok(()) @@ -613,11 +697,14 @@ impl Global { let hub = A::hub(self); let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; let (buffer_guard, mut token) = hub.buffers.read(&mut token); let (texture_guard, _) = hub.textures.read(&mut token); + let device = &device_guard[cmd_buf.device_id.value]; + #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { list.push(TraceCommand::CopyTextureToBuffer { @@ -635,6 +722,16 @@ impl Global { let (src_range, src_base, _) = extract_texture_selector(source, copy_size, &*texture_guard)?; + // Handle src texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + handle_src_texture_init( + cmd_buf, + device, + source, + &src_base, + copy_size, + &texture_guard, + ); + let (src_texture, src_pending) = cmd_buf .trackers .textures @@ -740,11 +837,14 @@ impl Global { let hub = A::hub(self); let mut token = Token::root(); + let (device_guard, mut token) = hub.devices.read(&mut token); let (mut cmd_buf_guard, mut token) = hub.command_buffers.write(&mut token); let cmd_buf = CommandBuffer::get_encoder_mut(&mut *cmd_buf_guard, command_encoder_id)?; let (_, mut token) = hub.buffers.read(&mut token); // skip token let (texture_guard, _) = hub.textures.read(&mut token); + let device = &device_guard[cmd_buf.device_id.value]; + #[cfg(feature = "trace")] if let Some(ref mut list) = cmd_buf.commands { list.push(TraceCommand::CopyTextureToTexture { @@ -767,6 +867,16 @@ impl Global { return Err(TransferError::MismatchedAspects.into()); } + // Handle src texture init *before* dealing with barrier transitions so we have an easier time inserting "immediate-inits" that may be required by prior discards in rare cases. + handle_src_texture_init( + cmd_buf, + device, + source, + &src_tex_base, + copy_size, + &texture_guard, + ); + let (src_texture, src_pending) = cmd_buf .trackers .textures @@ -820,6 +930,20 @@ impl Global { copy_size, )?; + let mut dst_zero_buffer_copy_regions = Vec::new(); + for immediate_init in cmd_buf.texture_memory_actions.register_init_action( + &get_copy_dst_texture_init_requirement(dst_texture, destination, copy_size), + &texture_guard, + ) { + collect_zero_buffer_copies_for_clear_texture( + &dst_texture.desc, + device.alignments.buffer_copy_pitch.get() as u32, + immediate_init.mip_level..(immediate_init.mip_level + 1), + immediate_init.layer..(immediate_init.layer + 1), + &mut dst_zero_buffer_copy_regions, + ); + } + let hal_copy_size = hal::CopyExtent { width: src_copy_size.width.min(dst_copy_size.width), height: src_copy_size.height.min(dst_copy_size.height), @@ -839,6 +963,16 @@ impl Global { let cmd_buf_raw = cmd_buf.encoder.open(); unsafe { cmd_buf_raw.transition_textures(barriers.into_iter()); + + // potential dst buffer init (for previously discarded dst_texture + partial copy) + if !dst_zero_buffer_copy_regions.is_empty() { + cmd_buf_raw.copy_buffer_to_texture( + &device.zero_buffer, + dst_raw, + dst_zero_buffer_copy_regions.into_iter(), + ); + } + cmd_buf_raw.copy_texture_to_texture( src_raw, hal::TextureUses::COPY_SRC, diff --git a/wgpu-core/src/conv.rs b/wgpu-core/src/conv.rs index b6632b590..fa1753d02 100644 --- a/wgpu-core/src/conv.rs +++ b/wgpu-core/src/conv.rs @@ -65,15 +65,13 @@ pub fn map_texture_usage( usage: wgt::TextureUsages, aspect: hal::FormatAspects, ) -> hal::TextureUses { - let mut u = hal::TextureUses::empty(); + // Enforce COPY_DST, otherwise we wouldn't be able to initialize the texture. + let mut u = hal::TextureUses::COPY_DST; + u.set( hal::TextureUses::COPY_SRC, usage.contains(wgt::TextureUsages::COPY_SRC), ); - u.set( - hal::TextureUses::COPY_DST, - usage.contains(wgt::TextureUsages::COPY_DST), - ); u.set( hal::TextureUses::RESOURCE, usage.contains(wgt::TextureUsages::TEXTURE_BINDING), diff --git a/wgpu-core/src/device/mod.rs b/wgpu-core/src/device/mod.rs index 374615421..3783f480a 100644 --- a/wgpu-core/src/device/mod.rs +++ b/wgpu-core/src/device/mod.rs @@ -3,7 +3,10 @@ use crate::{ device::life::WaitIdleError, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Hub, Input, InvalidId, Storage, Token}, id, - init_tracker::{BufferInitTracker, BufferInitTrackerAction, MemoryInitKind}, + init_tracker::{ + BufferInitTracker, BufferInitTrackerAction, MemoryInitKind, TextureInitRange, + TextureInitTracker, TextureInitTrackerAction, + }, instance, pipeline, present, resource, track::{BufferState, TextureSelector, TextureState, TrackerSet, UsageConflict}, validation::{self, check_buffer_usage, check_texture_usage}, @@ -604,6 +607,10 @@ impl Device { desc: desc.map_label(|_| ()), hal_usage, format_features, + initialization_status: TextureInitTracker::new( + desc.mip_level_count, + desc.size.depth_or_array_layers, + ), full_range: TextureSelector { levels: 0..desc.mip_level_count, layers: 0..desc.array_layer_count(), @@ -619,6 +626,7 @@ impl Device { desc: &resource::TextureDescriptor, ) -> Result, resource::CreateTextureError> { let hal_usage = conv::map_texture_usage(desc.usage, desc.format.into()); + let hal_desc = hal::TextureDescriptor { label: desc.label.borrow_option(), size: desc.size, @@ -1388,6 +1396,43 @@ impl Device { }) } + fn create_texture_binding( + view: &resource::TextureView, + texture_guard: &parking_lot::lock_api::RwLockReadGuard< + parking_lot::RawRwLock, + Storage, id::Id>>, + >, + internal_use: hal::TextureUses, + pub_usage: wgt::TextureUsages, + used: &mut TrackerSet, + used_texture_ranges: &mut Vec, + ) -> Result<(), binding_model::CreateBindGroupError> { + // Careful here: the texture may no longer have its own ref count, + // if it was deleted by the user. + let parent_id = view.parent_id.value; + let texture = &texture_guard[parent_id]; + used.textures + .change_extend( + parent_id, + &view.parent_id.ref_count, + view.selector.clone(), + internal_use, + ) + .map_err(UsageConflict::from)?; + check_texture_usage(texture.desc.usage, pub_usage)?; + + used_texture_ranges.push(TextureInitTrackerAction { + id: parent_id.0, + range: TextureInitRange { + mip_range: view.desc.range.mip_range(&texture.desc), + layer_range: view.desc.range.layer_range(&texture.desc), + }, + kind: MemoryInitKind::NeedsInitializedMemory, + }); + + Ok(()) + } + fn create_bind_group( &self, self_id: id::DeviceId, @@ -1419,6 +1464,7 @@ impl Device { let (sampler_guard, _) = hub.samplers.read(&mut token); let mut used_buffer_ranges = Vec::new(); + let mut used_texture_ranges = Vec::new(); let mut hal_entries = Vec::with_capacity(desc.entries.len()); let mut hal_buffers = Vec::new(); let mut hal_samplers = Vec::new(); @@ -1519,20 +1565,14 @@ impl Device { view, "SampledTexture, ReadonlyStorageTexture or WriteonlyStorageTexture", )?; - - // Careful here: the texture may no longer have its own ref count, - // if it was deleted by the user. - used.textures - .change_extend( - view.parent_id.value, - &view.parent_id.ref_count, - view.selector.clone(), - internal_use, - ) - .map_err(UsageConflict::from)?; - let texture = &texture_guard[view.parent_id.value]; - check_texture_usage(texture.desc.usage, pub_usage)?; - + Self::create_texture_binding( + view, + &texture_guard, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, + )?; let res_index = hal_textures.len(); hal_textures.push(hal::TextureBinding { view: &view.raw, @@ -1550,24 +1590,17 @@ impl Device { .views .use_extend(&*texture_view_guard, id, (), ()) .map_err(|_| Error::InvalidTextureView(id))?; - let (pub_usage, internal_use) = Self::texture_use_parameters( - binding, decl, view, - "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray" + let (pub_usage, internal_use) = + Self::texture_use_parameters(binding, decl, view, + "SampledTextureArray, ReadonlyStorageTextureArray or WriteonlyStorageTextureArray")?; + Self::create_texture_binding( + view, + &texture_guard, + internal_use, + pub_usage, + &mut used, + &mut used_texture_ranges, )?; - - // Careful here: the texture may no longer have its own ref count, - // if it was deleted by the user. - used.textures - .change_extend( - view.parent_id.value, - &view.parent_id.ref_count, - view.selector.clone(), - internal_use, - ) - .map_err(UsageConflict::from)?; - let texture = &texture_guard[view.parent_id.value]; - check_texture_usage(texture.desc.usage, pub_usage)?; - hal_textures.push(hal::TextureBinding { view: &view.raw, usage: internal_use, @@ -1619,6 +1652,7 @@ impl Device { life_guard: LifeGuard::new(desc.label.borrow_or_default()), used, used_buffer_ranges, + used_texture_ranges, dynamic_binding_info, }) } @@ -2847,8 +2881,8 @@ impl Global { // Zero initialize memory and then mark both staging and buffer as initialized // (it's guaranteed that this is the case by the time the buffer is usable) unsafe { ptr::write_bytes(mapping.ptr.as_ptr(), 0, buffer.size as usize) }; - buffer.initialization_status.clear(0..buffer.size); - stage.initialization_status.clear(0..buffer.size); + buffer.initialization_status.drain(0..buffer.size); + stage.initialization_status.drain(0..buffer.size); buffer.map_state = resource::BufferMapState::Init { ptr: mapping.ptr, diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index ecfc72f28..11ab48e36 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -333,7 +333,7 @@ impl Global { let dst = buffer_guard.get_mut(buffer_id).unwrap(); dst.initialization_status - .clear(buffer_offset..(buffer_offset + data_size)); + .drain(buffer_offset..(buffer_offset + data_size)); } Ok(()) @@ -428,10 +428,6 @@ impl Global { hal::TextureUses::COPY_DST, ) .unwrap(); - let dst_raw = dst - .inner - .as_raw() - .ok_or(TransferError::InvalidTexture(destination.texture))?; if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { return Err( @@ -514,9 +510,63 @@ impl Global { let encoder = device.pending_writes.activate(); unsafe { - encoder.transition_buffers(iter::once(barrier)); encoder.transition_textures(transition.map(|pending| pending.into_hal(dst))); - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); + encoder.transition_buffers(iter::once(barrier)); + } + + // If the copy does not fully cover the layers, we need to initialize to zero *first* as we don't keep track of partial texture layer inits. + // Strictly speaking we only need to clear the areas of a layer untouched, but this would get increasingly messy. + + let init_layer_range = + destination.origin.z..destination.origin.z + size.depth_or_array_layers; + if dst.initialization_status.mips[destination.mip_level as usize] + .check(init_layer_range.clone()) + .is_some() + { + // For clear we need write access to the texture! + drop(texture_guard); + let (mut texture_guard, _) = hub.textures.write(&mut token); + let dst = texture_guard.get_mut(destination.texture).unwrap(); + let dst_raw = dst + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + let layers_to_initialize = dst.initialization_status.mips + [destination.mip_level as usize] + .drain(init_layer_range); + + let mut zero_buffer_copy_regions = Vec::new(); + if size.width != dst.desc.size.width || size.height != dst.desc.size.height { + for layer in layers_to_initialize { + crate::command::collect_zero_buffer_copies_for_clear_texture( + &dst.desc, + device.alignments.buffer_copy_pitch.get() as u32, + destination.mip_level..(destination.mip_level + 1), + layer, + &mut zero_buffer_copy_regions, + ); + } + } + unsafe { + if !zero_buffer_copy_regions.is_empty() { + encoder.copy_buffer_to_texture( + &device.zero_buffer, + dst_raw, + zero_buffer_copy_regions.into_iter(), + ); + } + encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); + } + } else { + let dst_raw = dst + .inner + .as_raw() + .ok_or(TransferError::InvalidTexture(destination.texture))?; + + unsafe { + encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); + } } device.pending_writes.consume(stage); @@ -734,6 +784,9 @@ impl Global { baked .initialize_buffer_memory(&mut *trackers, &mut *buffer_guard) .map_err(|err| QueueSubmitError::DestroyedBuffer(err.0))?; + baked + .initialize_texture_memory(&mut *trackers, &mut *texture_guard, device) + .map_err(|err| QueueSubmitError::DestroyedTexture(err.0))?; //Note: stateless trackers are not merged: // device already knows these resources exist. CommandBuffer::insert_barriers( diff --git a/wgpu-core/src/init_tracker/mod.rs b/wgpu-core/src/init_tracker/mod.rs index ccc655628..ac25ea7e2 100644 --- a/wgpu-core/src/init_tracker/mod.rs +++ b/wgpu-core/src/init_tracker/mod.rs @@ -17,10 +17,10 @@ use smallvec::SmallVec; use std::{fmt, iter, ops::Range}; mod buffer; -//mod texture; +mod texture; pub(crate) use buffer::{BufferInitTracker, BufferInitTrackerAction}; -//pub(crate) use texture::{TextureInitRange, TextureInitTracker, TextureInitTrackerAction}; +pub(crate) use texture::{TextureInitRange, TextureInitTracker, TextureInitTrackerAction}; #[derive(Debug, Clone, Copy)] pub(crate) enum MemoryInitKind { @@ -109,6 +109,17 @@ where } } +impl<'a, Idx> Drop for InitTrackerDrain<'a, Idx> +where + Idx: fmt::Debug + Ord + Copy, +{ + fn drop(&mut self) { + if self.next_index <= self.first_index { + for _ in self {} + } + } +} + impl InitTracker where Idx: fmt::Debug + Ord + Copy + Default, @@ -150,7 +161,6 @@ where } // Drains uninitialized ranges in a query range. - #[must_use] pub(crate) fn drain(&mut self, drain_range: Range) -> InitTrackerDrain { let index = self .uninitialized_ranges @@ -162,11 +172,6 @@ where next_index: index, } } - - // Clears uninitialized ranges in a query range. - pub(crate) fn clear(&mut self, range: Range) { - self.drain(range).for_each(drop); - } } impl InitTracker { @@ -218,9 +223,9 @@ mod test { } #[test] - fn check_for_cleared_tracker() { + fn check_for_drained_tracker() { let mut tracker = Tracker::new(10); - tracker.clear(0..10); + tracker.drain(0..10); assert_eq!(tracker.check(0..10), None); assert_eq!(tracker.check(0..3), None); assert_eq!(tracker.check(3..4), None); @@ -231,9 +236,9 @@ mod test { fn check_for_partially_filled_tracker() { let mut tracker = Tracker::new(25); // Two regions of uninitialized memory - tracker.clear(0..5); - tracker.clear(10..15); - tracker.clear(20..25); + tracker.drain(0..5); + tracker.drain(10..15); + tracker.drain(20..25); assert_eq!(tracker.check(0..25), Some(5..25)); // entire range @@ -247,17 +252,17 @@ mod test { } #[test] - fn clear_already_cleared() { + fn drain_already_drained() { let mut tracker = Tracker::new(30); - tracker.clear(10..20); + tracker.drain(10..20); // Overlapping with non-cleared - tracker.clear(5..15); // Left overlap - tracker.clear(15..25); // Right overlap - tracker.clear(0..30); // Inner overlap + tracker.drain(5..15); // Left overlap + tracker.drain(15..25); // Right overlap + tracker.drain(0..30); // Inner overlap // Clear fully cleared - tracker.clear(0..30); + tracker.drain(0..30); assert_eq!(tracker.check(0..30), None); } @@ -303,7 +308,7 @@ mod test { #[test] fn discard_adds_range_on_cleared() { let mut tracker = Tracker::new(10); - tracker.clear(0..10); + tracker.drain(0..10); tracker.discard(0); tracker.discard(5); tracker.discard(9); @@ -327,7 +332,7 @@ mod test { #[test] fn discard_extends_ranges() { let mut tracker = Tracker::new(10); - tracker.clear(3..7); + tracker.drain(3..7); tracker.discard(2); tracker.discard(7); assert_eq!(tracker.uninitialized_ranges.len(), 2); @@ -338,7 +343,7 @@ mod test { #[test] fn discard_merges_ranges() { let mut tracker = Tracker::new(10); - tracker.clear(3..4); + tracker.drain(3..4); tracker.discard(3); assert_eq!(tracker.uninitialized_ranges.len(), 1); assert_eq!(tracker.uninitialized_ranges[0], 0..10); diff --git a/wgpu-core/src/init_tracker/texture.rs b/wgpu-core/src/init_tracker/texture.rs new file mode 100644 index 000000000..e8603a992 --- /dev/null +++ b/wgpu-core/src/init_tracker/texture.rs @@ -0,0 +1,87 @@ +use super::{InitTracker, MemoryInitKind}; +use crate::{id::TextureId, track::TextureSelector}; +use arrayvec::ArrayVec; +use std::ops::Range; + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitRange { + pub(crate) mip_range: Range, + pub(crate) layer_range: Range, +} + +impl From for TextureInitRange { + fn from(selector: TextureSelector) -> Self { + TextureInitRange { + mip_range: selector.levels, + layer_range: selector.layers, + } + } +} + +#[derive(Debug, Clone)] +pub(crate) struct TextureInitTrackerAction { + pub(crate) id: TextureId, + pub(crate) range: TextureInitRange, + pub(crate) kind: MemoryInitKind, +} + +pub(crate) type TextureLayerInitTracker = InitTracker; + +#[derive(Debug)] +pub(crate) struct TextureInitTracker { + pub mips: ArrayVec, +} + +impl TextureInitTracker { + pub(crate) fn new(mip_level_count: u32, depth_or_array_layers: u32) -> Self { + TextureInitTracker { + mips: std::iter::repeat(TextureLayerInitTracker::new(depth_or_array_layers)) + .take(mip_level_count as usize) + .collect(), + } + } + + pub(crate) fn check_action( + &self, + action: &TextureInitTrackerAction, + ) -> Option { + let mut mip_range_start = std::usize::MAX; + let mut mip_range_end = std::usize::MIN; + let mut layer_range_start = std::u32::MAX; + let mut layer_range_end = std::u32::MIN; + + for (i, mip_tracker) in self + .mips + .iter() + .enumerate() + .take(action.range.mip_range.end as usize) + .skip(action.range.mip_range.start as usize) + { + if let Some(uninitialized_layer_range) = + mip_tracker.check(action.range.layer_range.clone()) + { + mip_range_start = mip_range_start.min(i); + mip_range_end = i + 1; + layer_range_start = layer_range_start.min(uninitialized_layer_range.start); + layer_range_end = layer_range_end.max(uninitialized_layer_range.end); + }; + } + + if mip_range_start < mip_range_end && layer_range_start < layer_range_end { + Some(TextureInitTrackerAction { + id: action.id, + range: TextureInitRange { + mip_range: mip_range_start as u32..mip_range_end as u32, + layer_range: layer_range_start..layer_range_end, + }, + kind: action.kind, + }) + } else { + None + } + } + + pub(crate) fn discard(&mut self, mip_level: u32, layer: u32) { + self.mips[mip_level as usize].discard(layer); + } +} diff --git a/wgpu-core/src/present.rs b/wgpu-core/src/present.rs index 05e93f249..56304dae2 100644 --- a/wgpu-core/src/present.rs +++ b/wgpu-core/src/present.rs @@ -16,6 +16,7 @@ use crate::{ device::DeviceError, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Input, Token}, id::{DeviceId, SurfaceId, TextureId, Valid}, + init_tracker::TextureInitTracker, resource, track::TextureSelector, LifeGuard, Stored, @@ -151,6 +152,7 @@ impl Global { flags: wgt::TextureFormatFeatureFlags::empty(), filterable: false, }, + initialization_status: TextureInitTracker::new(1, 1), full_range: TextureSelector { layers: 0..1, levels: 0..1, diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index 9f990b995..ec5c7ac81 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -2,7 +2,7 @@ use crate::{ device::{DeviceError, HostMap, MissingFeatures}, hub::{Global, GlobalIdentityHandlerFactory, HalApi, Resource, Token}, id::{DeviceId, SurfaceId, TextureId, Valid}, - init_tracker::BufferInitTracker, + init_tracker::{BufferInitTracker, TextureInitTracker}, track::{TextureSelector, DUMMY_SELECTOR}, validation::MissingBufferUsageError, Label, LifeGuard, RefCount, Stored, @@ -185,6 +185,7 @@ pub struct Texture { pub(crate) desc: wgt::TextureDescriptor<()>, pub(crate) hal_usage: hal::TextureUses, pub(crate) format_features: wgt::TextureFormatFeatures, + pub(crate) initialization_status: TextureInitTracker, pub(crate) full_range: TextureSelector, pub(crate) life_guard: LifeGuard, } diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index 4941a1c2a..35466b7d0 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -2750,6 +2750,7 @@ impl Extent3d { } /// Calculates the extent at a given mip level. + /// Does *not* account for memory size being a multiple of block size. pub fn mip_level_size(&self, level: u32, is_3d_texture: bool) -> Extent3d { Extent3d { width: u32::max(1, self.width >> level), @@ -3335,6 +3336,30 @@ pub struct ImageSubresourceRange { pub array_layer_count: Option, } +impl ImageSubresourceRange { + /// Returns the mip level range of a subresource range describes for a specific texture. + pub fn mip_range(&self, texture_desc: &TextureDescriptor) -> Range { + self.base_mip_level..match self.mip_level_count { + Some(mip_level_count) => self.base_mip_level + mip_level_count.get(), + None => texture_desc.mip_level_count, + } + } + + /// Returns the layer range of a subresource range describes for a specific texture. + pub fn layer_range(&self, texture_desc: &TextureDescriptor) -> Range { + self.base_array_layer..match self.array_layer_count { + Some(array_layer_count) => self.base_array_layer + array_layer_count.get(), + None => { + if texture_desc.dimension == TextureDimension::D3 { + self.base_array_layer + 1 + } else { + texture_desc.size.depth_or_array_layers + } + } + } + } +} + /// Color variation to use when sampler addressing mode is [`AddressMode::ClampToBorder`] #[repr(C)] #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] diff --git a/wgpu/tests/root.rs b/wgpu/tests/root.rs index a55212ad4..95bf28bf3 100644 --- a/wgpu/tests/root.rs +++ b/wgpu/tests/root.rs @@ -6,3 +6,4 @@ mod device; mod example_wgsl; mod instance; mod vertex_indices; +mod zero_init_texture_after_discard; diff --git a/wgpu/tests/zero_init_texture_after_discard.rs b/wgpu/tests/zero_init_texture_after_discard.rs new file mode 100644 index 000000000..74c29340e --- /dev/null +++ b/wgpu/tests/zero_init_texture_after_discard.rs @@ -0,0 +1,290 @@ +use std::num::NonZeroU32; + +use crate::common::{initialize_test, TestParameters}; + +// Checks if discarding a color target resets its init state, causing a zero read of this texture when copied in after submit of the encoder. +#[test] +fn discarding_color_target_resets_texture_init_state_check_visible_on_copy_after_submit() { + initialize_test(TestParameters::default(), |ctx| { + let (texture, readback_buffer) = + create_white_texture_and_readback_buffer(&ctx, wgpu::TextureFormat::Rgba8UnormSrgb); + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Color Discard"), + color_attachments: &[wgpu::RenderPassColorAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }, + }], + depth_stencil_attachment: None, + }); + ctx.queue.submit([encoder.finish()]); + } + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + copy_texture_to_buffer(&mut encoder, &texture, &readback_buffer); + ctx.queue.submit([encoder.finish()]); + } + assert_buffer_is_zero(&readback_buffer, &ctx.device); + }); +} + +// Checks if discarding a color target resets its init state, causing a zero read of this texture when copied in the same encoder to a buffer. +#[test] +fn discarding_color_target_resets_texture_init_state_check_visible_on_copy_in_same_encoder() { + initialize_test(TestParameters::default(), |ctx| { + let (texture, readback_buffer) = + create_white_texture_and_readback_buffer(&ctx, wgpu::TextureFormat::Rgba8UnormSrgb); + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Color Discard"), + color_attachments: &[wgpu::RenderPassColorAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }, + }], + depth_stencil_attachment: None, + }); + copy_texture_to_buffer(&mut encoder, &texture, &readback_buffer); + ctx.queue.submit([encoder.finish()]); + } + assert_buffer_is_zero(&readback_buffer, &ctx.device); + }); +} + +#[test] +fn discarding_depth_target_resets_texture_init_state_check_visible_on_copy_in_same_encoder() { + initialize_test(TestParameters::default(), |ctx| { + for format in [ + wgpu::TextureFormat::Depth32Float, + //wgpu::TextureFormat::Depth24Plus, // Can't copy to or from buffer + //wgpu::TextureFormat::Depth24PlusStencil8, // Can only copy stencil aspect to/from buffer + ] { + let (texture, readback_buffer) = create_white_texture_and_readback_buffer(&ctx, format); + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Depth Discard"), + color_attachments: &[], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }), + stencil_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }), + }), + }); + copy_texture_to_buffer(&mut encoder, &texture, &readback_buffer); + ctx.queue.submit([encoder.finish()]); + } + assert_buffer_is_zero(&readback_buffer, &ctx.device); + } + }); +} + +#[test] +fn discarding_either_depth_or_stencil_aspect() { + initialize_test(TestParameters::default(), |ctx| { + let (texture, _) = create_white_texture_and_readback_buffer( + &ctx, + wgpu::TextureFormat::Depth24PlusStencil8, + ); + // TODO: How do we test this other than "doesn't crash"? We can't copy the texture to/from buffers, so we would need to do a copy in a shader + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Depth Discard, Stencil Load"), + color_attachments: &[], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }), + stencil_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Clear(0), + store: true, + }), + }), + }); + ctx.queue.submit([encoder.finish()]); + } + { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Depth Load, Stencil Discard"), + color_attachments: &[], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Clear(0.0), + store: true, + }), + stencil_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Load, + store: false, // discard! + }), + }), + }); + ctx.queue.submit([encoder.finish()]); + } + }); +} + +const TEXTURE_SIZE: wgpu::Extent3d = wgpu::Extent3d { + width: 64, + height: 64, + depth_or_array_layers: 1, +}; +const BYTES_PER_PIXEL: u32 = 4; +const BUFFER_COPY_LAYOUT: wgpu::ImageDataLayout = wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new(TEXTURE_SIZE.width * BYTES_PER_PIXEL), + rows_per_image: None, +}; + +fn create_white_texture_and_readback_buffer( + ctx: &crate::common::TestingContext, + format: wgpu::TextureFormat, +) -> (wgpu::Texture, wgpu::Buffer) { + let format_desc = format.describe(); + + // Size for tests is chosen so that we don't need to care about buffer alignments. + assert_eq!(format_desc.block_dimensions, (1, 1)); + assert_eq!(format_desc.block_size as u32, BYTES_PER_PIXEL); + assert_eq!( + (TEXTURE_SIZE.width * format_desc.block_size as u32) % wgpu::COPY_BYTES_PER_ROW_ALIGNMENT, + 0 + ); + let buffer_size = TEXTURE_SIZE.width * TEXTURE_SIZE.height * BYTES_PER_PIXEL; + + let texture = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: Some("RenderTarget"), + size: TEXTURE_SIZE, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format, + usage: if format == wgpu::TextureFormat::Depth24PlusStencil8 { + // not allowed to have copy usages! + wgpu::TextureUsages::RENDER_ATTACHMENT + } else { + wgpu::TextureUsages::COPY_DST + | wgpu::TextureUsages::COPY_SRC + | wgpu::TextureUsages::RENDER_ATTACHMENT + }, + }); + + // Clear using a write_texture operation. We could also clear using a render_pass clear. + // However, when making this test intentionally fail (by breaking wgpu impl), it shows that at least on the tested Vulkan driver, + // the later following discard pass in the test (i.e. internally vk::AttachmentStoreOp::DONT_CARE) will yield different depending on the operation we take here: + // * clearing white -> discard will cause it to become black! + // * clearing red -> discard will keep it red + // * write_texture -> discard will keep buffer + // This behavior is curious, but does not violate any spec - it is wgpu's job to pass this test no matter what a render target discard does. + + // ... but that said, for depth/stencil textures we need to do a clear. + if format_desc.sample_type == wgpu::TextureSampleType::Depth { + let mut encoder = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: Some("Depth/Stencil setup"), + color_attachments: &[], + depth_stencil_attachment: Some(wgpu::RenderPassDepthStencilAttachment { + view: &texture.create_view(&wgpu::TextureViewDescriptor::default()), + depth_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Clear(1.0), + store: true, + }), + stencil_ops: Some(wgpu::Operations { + load: wgpu::LoadOp::Clear(0xFFFFFFFF), + store: true, + }), + }), + }); + ctx.queue.submit([encoder.finish()]); + } else { + let data = vec![255; buffer_size as usize]; + ctx.queue.write_texture( + wgpu::ImageCopyTexture { + texture: &texture, + mip_level: 0, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + aspect: wgpu::TextureAspect::All, + }, + &data, + BUFFER_COPY_LAYOUT, + TEXTURE_SIZE, + ); + } + + ( + texture, + ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: Some("Texture Readback"), + size: buffer_size as u64, + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }), + ) +} + +fn copy_texture_to_buffer( + encoder: &mut wgpu::CommandEncoder, + texture: &wgpu::Texture, + read_back: &wgpu::Buffer, +) { + encoder.copy_texture_to_buffer( + wgpu::ImageCopyTexture { + texture, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + wgpu::ImageCopyBuffer { + buffer: read_back, + layout: BUFFER_COPY_LAYOUT, + }, + TEXTURE_SIZE, + ); +} + +fn assert_buffer_is_zero(readback_buffer: &wgpu::Buffer, device: &wgpu::Device) { + { + let buffer_slice = readback_buffer.slice(..); + let _ = buffer_slice.map_async(wgpu::MapMode::Read); + device.poll(wgpu::Maintain::Wait); + let buffer_view = buffer_slice.get_mapped_range(); + + assert!( + buffer_view.iter().all(|b| *b == 0), + "texture was not fully cleared" + ); + } + readback_buffer.unmap(); +}