Reintroduce clear_texture Metal/Vulkan/DX12 (#1905)

* enable CLEAR_COMMANDS feature for all adapters

* replaced fill_buffer with clear_buffer (minus variable value)

* Improve clear buffer/texture test
and remove unused clear-buffer.ron test

* renamed clear_image to clear_texture

* skeleton for new wgpu-hal clear_texture

* clear_texture implementation for vulkan

* clear_texture now restricts usage

* clear_texture implementation for dx12

* Implemented clear_texture for Metal backend

* Clean up GLES clear_buffer and leave note on how to implement clear_texture in the future

* fix linux compilation & formatting issues

* comment & namespace fixes

* Extent3d has now a simple mipmap calculating function

Fix incorrect use of texture.size in clear_texture for metal/dx12
Fix incorrect mip/layer ranges in clear_texture for metal/dx12
This commit is contained in:
Wumpf 2021-09-07 03:14:40 +02:00 committed by GitHub
parent 938c069608
commit df2a686c29
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 385 additions and 115 deletions

View File

@ -80,11 +80,11 @@ impl GlobalPlay for wgc::hub::Global<IdentityPassThroughFactory> {
trace::Command::ClearBuffer { dst, offset, size } => self
.command_encoder_clear_buffer::<A>(encoder, dst, offset, size)
.unwrap(),
trace::Command::ClearImage {
trace::Command::ClearTexture {
dst,
subresource_range,
} => self
.command_encoder_clear_image::<A>(encoder, dst, &subresource_range)
.command_encoder_clear_texture::<A>(encoder, dst, &subresource_range)
.unwrap(),
trace::Command::WriteTimestamp {
query_set_id,

View File

@ -3,7 +3,7 @@
tests: [
"bind-group.ron",
"buffer-copy.ron",
"clear-buffer-image.ron",
"clear-buffer-texture.ron",
"buffer-zero-init.ron",
"pipeline-statistics-query.ron",
"quad.ron",

View File

@ -1,36 +0,0 @@
(
features: (bits: 0x0000_0004_0000_0000),
expectations: [
(
name: "basic",
buffer: (index: 0, epoch: 1),
offset: 0,
data: Raw([
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
]),
)
],
actions: [
CreateBuffer(
Id(0, 1, Empty),
(
label: None,
size: 16,
usage: (
bits: 41,
),
mapped_at_creation: false,
),
),
Submit(1, [
ClearBuffer(
dst: Id(0, 1, Empty),
offset: 4,
size: Some(8),
)
]),
],
)

View File

@ -5,17 +5,17 @@
name: "Quad",
buffer: (index: 0, epoch: 1),
offset: 0,
data: File("clear-image.bin", 16384),
data: File("clear-texture.bin", 16384),
),
(
name: "buffer clear",
buffer: (index: 1, epoch: 1),
offset: 0,
data: Raw([
0x00, 0x00, 0x80, 0xBF,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x80, 0x3F,
]),
)
],
@ -35,6 +35,25 @@
bits: 27,
),
)),
// First fill the texture to ensure it wasn't just zero initialized or "happened" to be zero.
WriteTexture(
to: (
texture: Id(0, 1, Empty),
mip_level: 0,
array_layer: 0,
),
data: "quad.bin",
layout: (
offset: 0,
bytes_per_row: Some(256),
rows_per_image: None,
),
size: (
width: 64,
height: 64,
depth_or_array_layers: 1,
),
),
CreateBuffer(
Id(0, 1, Empty),
(
@ -46,6 +65,7 @@
mapped_at_creation: false,
),
),
CreateBuffer(
Id(1, 1, Empty),
(
@ -57,8 +77,18 @@
mapped_at_creation: false,
),
),
// Make sure there is something in the buffer, otherwise it might be just zero init!
WriteBuffer(
id: Id(1, 1, Empty),
data: "data1.bin",
range: (
start: 0,
end: 16,
),
queued: true,
),
Submit(1, [
ClearImage(
ClearTexture(
dst: Id(0, 1, Empty),
subresource_range: ImageSubresourceRange(
aspect: All,
@ -88,6 +118,7 @@
depth_or_array_layers: 1,
),
),
// Partial clear to proove
ClearBuffer(
dst: Id(1, 1, Empty),
offset: 4,

View File

@ -44,6 +44,10 @@ pub enum ClearError {
texture_format: wgt::TextureFormat,
subresource_range_aspects: TextureAspect,
},
#[error("Depth/Stencil formats are not supported for clearing")]
DepthStencilFormatNotSupported,
#[error("Multisampled textures are not supported for clearing")]
MultisampledTextureUnsupported,
#[error("image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \
whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")]
InvalidTextureLevelRange {
@ -68,7 +72,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
offset: BufferAddress,
size: Option<BufferSize>,
) -> Result<(), ClearError> {
profiling::scope!("CommandEncoder::fill_buffer");
profiling::scope!("CommandEncoder::clear_buffer");
let hub = A::hub(self);
let mut token = Token::root();
@ -82,7 +86,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
list.push(TraceCommand::ClearBuffer { dst, offset, size });
}
if !cmd_buf.support_fill_buffer_texture {
if !cmd_buf.support_clear_buffer_texture {
return Err(ClearError::MissingClearCommandsFeature);
}
@ -122,7 +126,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
None => dst_buffer.size,
};
if offset == end {
log::trace!("Ignoring fill_buffer of size 0");
log::trace!("Ignoring clear_buffer of size 0");
return Ok(());
}
@ -139,18 +143,18 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let cmd_buf_raw = cmd_buf.encoder.open();
unsafe {
cmd_buf_raw.transition_buffers(dst_barrier);
cmd_buf_raw.fill_buffer(dst_raw, offset..end, 0);
cmd_buf_raw.clear_buffer(dst_raw, offset..end);
}
Ok(())
}
pub fn command_encoder_clear_image<A: HalApi>(
pub fn command_encoder_clear_texture<A: HalApi>(
&self,
command_encoder_id: CommandEncoderId,
dst: TextureId,
subresource_range: &ImageSubresourceRange,
) -> Result<(), ClearError> {
profiling::scope!("CommandEncoder::clear_image");
profiling::scope!("CommandEncoder::clear_texture");
let hub = A::hub(self);
let mut token = Token::root();
@ -162,13 +166,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
#[cfg(feature = "trace")]
if let Some(ref mut list) = cmd_buf.commands {
list.push(TraceCommand::ClearImage {
list.push(TraceCommand::ClearTexture {
dst,
subresource_range: subresource_range.clone(),
});
}
if !cmd_buf.support_fill_buffer_texture {
if !cmd_buf.support_clear_buffer_texture {
return Err(ClearError::MissingClearCommandsFeature);
}
@ -185,6 +189,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
subresource_range_aspects: subresource_range.aspect,
});
};
// Check if texture is supported for clearing
if dst_texture.desc.format.describe().sample_type == wgt::TextureSampleType::Depth {
return Err(ClearError::DepthStencilFormatNotSupported);
}
if dst_texture.desc.sample_count > 1 {
return Err(ClearError::MultisampledTextureUnsupported);
}
// Check if subresource level range is valid
let subresource_level_end = match subresource_range.mip_level_count {
Some(count) => subresource_range.base_mip_level + count.get(),
@ -228,7 +241,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
hal::TextureUses::COPY_DST,
)
.map_err(ClearError::InvalidTexture)?;
let _dst_raw = dst_texture
let dst_raw = dst_texture
.inner
.as_raw()
.ok_or(ClearError::InvalidTexture(dst))?;
@ -241,23 +254,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let cmd_buf_raw = cmd_buf.encoder.open();
unsafe {
cmd_buf_raw.transition_textures(dst_barrier);
/*TODO: image clears
cmd_buf_raw.clear_image(
dst_raw,
hal::image::Layout::TransferDstOptimal,
hal::command::ClearValue {
color: hal::command::ClearColor {
float32: conv::map_color_f32(&wgt::Color::TRANSPARENT),
},
},
std::iter::once(hal::image::SubresourceRange {
aspects,
level_start: subresource_range.base_mip_level as u8,
level_count: subresource_range.mip_level_count.map(|c| c.get() as u8),
layer_start: subresource_range.base_array_layer as u16,
layer_count: subresource_range.array_layer_count.map(|c| c.get() as u16),
}),
);*/
cmd_buf_raw.clear_texture(dst_raw, subresource_range);
}
Ok(())
}

View File

@ -144,7 +144,7 @@ impl<A: hal::Api> BakedCommands<A> {
assert!(range.end % 4 == 0, "Buffer {:?} has an uninitialized range with an end not aligned to 4 (end was {})", raw_buf, range.end);
unsafe {
self.encoder.fill_buffer(raw_buf, range.clone(), 0);
self.encoder.clear_buffer(raw_buf, range.clone());
}
}
}
@ -160,7 +160,7 @@ pub struct CommandBuffer<A: hal::Api> {
pub(crate) trackers: TrackerSet,
buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
limits: wgt::Limits,
support_fill_buffer_texture: bool,
support_clear_buffer_texture: bool,
#[cfg(feature = "trace")]
pub(crate) commands: Option<Vec<crate::device::trace::Command>>,
}
@ -187,7 +187,7 @@ impl<A: HalApi> CommandBuffer<A> {
trackers: TrackerSet::new(A::VARIANT),
buffer_memory_init_actions: Default::default(),
limits,
support_fill_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS),
support_clear_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS),
#[cfg(feature = "trace")]
commands: if enable_tracing {
Some(Vec::new())

View File

@ -162,7 +162,7 @@ fn map_buffer<A: hal::Api>(
// Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero)
//
// If this is a read mapping, ideally we would use a `fill_buffer` command before reading the data from GPU (i.e. `invalidate_range`).
// If this is a read mapping, ideally we would use a `clear_buffer` command before reading the data from GPU (i.e. `invalidate_range`).
// However, this would require us to kick off and wait for a command buffer or piggy back on an existing one (the later is likely the only worthwhile option).
// As reading uninitialized memory isn't a particular important path to support,
// we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region.
@ -498,7 +498,7 @@ impl<A: HalApi> Device<A> {
}
} else {
// We are required to zero out (initialize) all memory.
// This is done on demand using fill_buffer which requires write transfer usage!
// This is done on demand using clear_buffer which requires write transfer usage!
usage |= hal::BufferUses::COPY_DST;
}

View File

@ -151,7 +151,7 @@ pub enum Command {
offset: wgt::BufferAddress,
size: Option<wgt::BufferSize>,
},
ClearImage {
ClearTexture {
dst: id::TextureId,
subresource_range: wgt::ImageSubresourceRange,
},

View File

@ -178,7 +178,8 @@ impl super::Adapter {
| wgt::Features::POLYGON_MODE_POINT
| wgt::Features::VERTEX_WRITABLE_STORAGE
| wgt::Features::TIMESTAMP_QUERY
| wgt::Features::TEXTURE_COMPRESSION_BC;
| wgt::Features::TEXTURE_COMPRESSION_BC
| wgt::Features::CLEAR_COMMANDS;
//TODO: in order to expose this, we need to run a compute shader
// that extract the necessary statistics out of the D3D12 result.
// Alternatively, we could allocate a buffer for the query set,

View File

@ -363,8 +363,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}
}
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
assert_eq!(value, 0, "Only zero is supported!");
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
let list = self.list.unwrap();
let mut offset = range.start;
while offset < range.end {
@ -380,6 +379,100 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}
}
unsafe fn clear_texture(
&mut self,
texture: &super::Texture,
subresource_range: &wgt::ImageSubresourceRange,
) {
// Note that CopyTextureRegion for depth/stencil or multisample resources would require full subresource copies.
// Meaning we'd need a much larger pre-zeroed buffer
// (but instead we just define clear_texture to not support these)
let list = self.list.unwrap();
let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION {
pResource: self.shared.zero_buffer.as_mut_ptr(),
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
u: mem::zeroed(),
};
let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION {
pResource: texture.resource.as_mut_ptr(),
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
u: mem::zeroed(),
};
let raw_format = conv::map_texture_format(texture.format);
let format_desc = texture.format.describe();
let mip_range = subresource_range.base_mip_level..match subresource_range.mip_level_count {
Some(c) => subresource_range.base_mip_level + c.get(),
None => texture.mip_level_count,
};
let array_range = subresource_range.base_array_layer
..match subresource_range.array_layer_count {
Some(c) => subresource_range.base_array_layer + c.get(),
None => texture.array_layer_count(),
};
for mip_level in mip_range {
let mip_size = texture
.size
.mip_level_size(mip_level, texture.dimension == wgt::TextureDimension::D3);
let depth = if texture.dimension == wgt::TextureDimension::D3 {
mip_size.depth_or_array_layers
} else {
1
};
let bytes_per_row = mip_size.width / format_desc.block_dimensions.0 as u32
* format_desc.block_size as u32;
// round up to a multiple of d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
let bytes_per_row = (bytes_per_row + d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)
/ d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
* d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
let max_rows_per_copy = super::ZERO_BUFFER_SIZE as u32 / bytes_per_row;
// round down to a multiple of rows needed by the texture format
let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u32
* format_desc.block_dimensions.1 as u32;
assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture with dimension {:?}, size {:?} and format {:?}", texture.dimension, texture.size, texture.format);
for array_layer in array_range.clone() {
// We excluded depth/stencil, so plane should be always zero
*dst_location.u.SubresourceIndex_mut() =
texture.calc_subresource(mip_level, array_layer, 0);
// 3D textures are quickly massive in memory size, so we don't bother trying to do more than one layer at once.
for z in 0..depth {
// May need multiple copies for each subresource!
// We assume that we never need to split a row. Back of the envelope calculation tells us a 512kb byte buffer is enough for this for most extreme known cases.
// max_texture_width * max_pixel_size = 32768 * 16 = 512kb
let mut num_rows_left = mip_size.height;
while num_rows_left > 0 {
let num_rows = num_rows_left.min(max_rows_per_copy);
*src_location.u.PlacedFootprint_mut() =
d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
Offset: 0,
Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT {
Format: raw_format,
Width: mip_size.width,
Height: num_rows,
Depth: 1,
RowPitch: bytes_per_row,
},
};
list.CopyTextureRegion(
&dst_location,
0,
mip_size.height - num_rows_left,
z,
&src_location,
std::ptr::null(),
);
num_rows_left -= num_rows;
}
}
}
}
}
unsafe fn copy_buffer_to_buffer<T>(
&mut self,
src: &super::Buffer,

View File

@ -120,7 +120,7 @@ impl<T> HResult<T> for (T, i32) {
// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
const MAX_ROOT_ELEMENTS: usize = 64;
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 512 << 10;
pub struct Instance {
factory: native::Factory4,

View File

@ -249,7 +249,14 @@ impl crate::CommandEncoder<Api> for Encoder {
{
}
unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {}
unsafe fn clear_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange) {}
unsafe fn clear_texture(
&mut self,
texture: &Resource,
subresource_range: &wgt::ImageSubresourceRange,
) {
}
unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {}

View File

@ -257,7 +257,8 @@ impl super::Adapter {
let mut features = wgt::Features::empty()
| wgt::Features::TEXTURE_COMPRESSION_ETC2
| wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES;
| wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
| wgt::Features::CLEAR_COMMANDS;
features.set(
wgt::Features::DEPTH_CLAMPING,
extensions.contains("GL_EXT_depth_clamp"),

View File

@ -237,12 +237,24 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}
}
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
self.cmd_buffer.commands.push(C::FillBuffer {
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
self.cmd_buffer.commands.push(C::ClearBuffer {
dst: buffer.raw,
dst_target: buffer.target,
range,
value,
});
}
unsafe fn clear_texture(
&mut self,
texture: &super::Texture,
subresource_range: &wgt::ImageSubresourceRange,
) {
let (dst, dst_target) = texture.inner.as_native();
self.cmd_buffer.commands.push(C::ClearTexture {
dst,
dst_target,
subresource_range: subresource_range.clone(),
});
}

View File

@ -188,7 +188,7 @@ pub struct Queue {
shader_clear_program: glow::Program,
/// The uniform location of the color uniform in the shader clear program
shader_clear_program_color_uniform_location: glow::UniformLocation,
/// Keep a reasonably large buffer filled with zeroes, so that we can implement `FillBuffer` of
/// Keep a reasonably large buffer filled with zeroes, so that we can implement `ClearBuffer` of
/// zeroes by copying from it.
zero_buffer: glow::Buffer,
temp_query_results: Vec<u64>,
@ -527,11 +527,15 @@ enum Command {
indirect_buf: glow::Buffer,
indirect_offset: wgt::BufferAddress,
},
FillBuffer {
ClearBuffer {
dst: glow::Buffer,
dst_target: BindTarget,
range: crate::MemoryRange,
value: u8,
},
ClearTexture {
dst: glow::Texture,
dst_target: BindTarget,
subresource_range: wgt::ImageSubresourceRange,
},
CopyBufferToBuffer {
src: glow::Buffer,

View File

@ -190,13 +190,11 @@ impl super::Queue {
gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf));
gl.dispatch_compute_indirect(indirect_offset as i32);
}
C::FillBuffer {
C::ClearBuffer {
dst,
dst_target,
ref range,
value,
} => {
assert_eq!(value, 0); // other values require `wgt::Features::CLEAR_COMMANDS`.
gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer));
gl.bind_buffer(dst_target, Some(dst));
let mut dst_offset = range.start;
@ -212,6 +210,20 @@ impl super::Queue {
dst_offset += size;
}
}
C::ClearTexture {
dst: _,
dst_target: _,
subresource_range: _,
} => {
// Should EXT_clear_texture when possible.
// https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_clear_texture.txt
// But support is not very widespread. Need to fallback to do zero_buffer copies
// TODO: Need to invoke calls into CopyBufferToTexture using zero_buffer.
// To do that determine how many rows zero_buffer can fill and then chunk the texture up
// (do *not* repeat the exact logic of CopyBufferToTexture, it's way too much!)
//unimplemented!("texture clearing for GLES is not implemented yet");
}
C::CopyBufferToBuffer {
src,
src_target,

View File

@ -344,9 +344,14 @@ pub trait CommandEncoder<A: Api>: Send + Sync {
// copy operations
/// This is valid to call with `value == 0`.
/// Otherwise `wgt::Features::CLEAR_COMMANDS` is required.
unsafe fn fill_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange, value: u8);
unsafe fn clear_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange);
// Does not support depth/stencil or multisampled textures
unsafe fn clear_texture(
&mut self,
texture: &A::Texture,
subresource_range: &wgt::ImageSubresourceRange,
);
unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &A::Buffer, dst: &A::Buffer, regions: T)
where

View File

@ -862,7 +862,8 @@ impl super::PrivateCapabilities {
| F::MAPPABLE_PRIMARY_BUFFERS
| F::VERTEX_WRITABLE_STORAGE
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
| F::POLYGON_MODE_LINE;
| F::POLYGON_MODE_LINE
| F::CLEAR_COMMANDS;
features.set(
F::TEXTURE_BINDING_ARRAY

View File

@ -126,9 +126,84 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
{
}
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
let encoder = self.enter_blit();
encoder.fill_buffer(&buffer.raw, conv::map_range(&range), value);
encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0);
}
unsafe fn clear_texture(
&mut self,
texture: &super::Texture,
subresource_range: &wgt::ImageSubresourceRange,
) {
let shared = self.shared.clone();
let encoder = self.enter_blit();
let format_desc = texture.format.describe();
let mip_range = subresource_range.base_mip_level..match subresource_range.mip_level_count {
Some(c) => subresource_range.base_mip_level + c.get(),
None => texture.mip_levels,
};
let array_range = subresource_range.base_array_layer
..match subresource_range.array_layer_count {
Some(c) => subresource_range.base_array_layer + c.get(),
None => texture.array_layers,
};
for mip_level in mip_range {
// Note that Metal requires this only to be a multiple of the pixel size, not some other constant like in other APIs.
let mip_size = texture
.size
.mip_level_size(mip_level, texture.raw_type == mtl::MTLTextureType::D3);
let depth = if texture.raw_type == mtl::MTLTextureType::D3 {
mip_size.depth_or_array_layers as u64
} else {
1
};
let bytes_per_row = mip_size.width as u64 / format_desc.block_dimensions.0 as u64
* format_desc.block_size as u64;
let max_rows_per_copy = super::ZERO_BUFFER_SIZE / bytes_per_row;
// round down to a multiple of rows needed by the texture format
let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u64
* format_desc.block_dimensions.1 as u64;
assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture of type {:?}, size {:?} and format {:?}",
texture.raw_type, texture.size, texture.format);
for array_layer in array_range.clone() {
// 3D textures are quickly massive in memory size, so we don't bother trying to do more than one layer at once.
for z in 0..depth {
// May need multiple copies for each subresource! We assume that we never need to split a row.
let mut num_rows_left = mip_size.height as u64;
while num_rows_left > 0 {
let num_rows = num_rows_left.min(max_rows_per_copy);
let source_size = mtl::MTLSize {
width: mip_size.width as u64,
height: num_rows,
depth: 1,
};
let destination_origion = mtl::MTLOrigin {
x: 0,
y: mip_size.height as u64 - num_rows_left,
z,
};
encoder.copy_from_buffer_to_texture(
&shared.zero_buffer,
0,
bytes_per_row,
bytes_per_row * num_rows,
source_size,
&texture.raw,
array_layer as u64,
mip_level as u64,
destination_origion,
mtl::MTLBlitOption::empty(),
);
num_rows_left -= num_rows;
}
}
}
}
}
unsafe fn copy_buffer_to_buffer<T>(

View File

@ -278,10 +278,12 @@ impl crate::Device<super::Api> for super::Device {
Ok(super::Texture {
raw,
format: desc.format,
raw_format: mtl_format,
raw_type: mtl_type,
mip_levels: desc.mip_level_count,
array_layers,
size: desc.size,
})
}

View File

@ -237,11 +237,17 @@ struct Settings {
retain_command_buffer_references: bool,
}
// Using max copyable texture row
// https://developer.apple.com/documentation/metal/mtlblitcommandencoder/1400752-copyfrombuffer?language=objc
// "The value must be less than or equal to 32767 multiplied by the destination textures pixel size."
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 32767 * 16; // 512kb
struct AdapterShared {
device: Mutex<mtl::Device>,
disabilities: PrivateDisabilities,
private_caps: PrivateCapabilities,
settings: Settings,
zero_buffer: mtl::Buffer,
}
unsafe impl Send for AdapterShared {}
@ -252,11 +258,20 @@ impl AdapterShared {
let private_caps = PrivateCapabilities::new(&device);
log::debug!("{:#?}", private_caps);
// buffers created this way are zero initialized
// see https://developer.apple.com/documentation/metal/mtldevice/1433375-newbufferwithlength?language=objc
let zero_buffer = device.new_buffer(
ZERO_BUFFER_SIZE,
mtl::MTLResourceOptions::CPUCacheModeWriteCombined
| mtl::MTLResourceOptions::StorageModePrivate,
);
Self {
disabilities: PrivateDisabilities::new(&device),
private_caps: PrivateCapabilities::new(&device),
device: Mutex::new(device),
settings: Settings::default(),
zero_buffer,
}
}
}
@ -280,6 +295,7 @@ pub struct Device {
pub struct Surface {
view: Option<NonNull<objc::runtime::Object>>,
render_layer: Mutex<mtl::MetalLayer>,
swapchain_format: wgt::TextureFormat,
raw_swapchain_format: mtl::MTLPixelFormat,
main_thread_id: thread::ThreadId,
// Useful for UI-intensive applications that are sensitive to
@ -404,10 +420,12 @@ impl Buffer {
#[derive(Debug)]
pub struct Texture {
raw: mtl::Texture,
format: wgt::TextureFormat,
raw_format: mtl::MTLPixelFormat,
raw_type: mtl::MTLTextureType,
array_layers: u32,
mip_levels: u32,
size: wgt::Extent3d,
}
unsafe impl Send for Texture {}

View File

@ -60,6 +60,7 @@ impl super::Surface {
Self {
view,
render_layer: Mutex::new(layer),
swapchain_format: wgt::TextureFormat::Bgra8UnormSrgb, // no value invalid, pick something not too far-fetched
raw_swapchain_format: mtl::MTLPixelFormat::Invalid,
main_thread_id: thread::current().id(),
present_with_transaction: false,
@ -209,6 +210,7 @@ impl crate::Surface<super::Api> for super::Surface {
log::info!("build swapchain {:?}", config);
let caps = &device.shared.private_caps;
self.swapchain_format = config.format;
self.raw_swapchain_format = caps.map_format(config.format);
let render_layer = self.render_layer.lock();
@ -271,10 +273,12 @@ impl crate::Surface<super::Api> for super::Surface {
let suf_texture = super::SurfaceTexture {
texture: super::Texture {
raw: texture,
format: self.swapchain_format,
raw_format: self.raw_swapchain_format,
raw_type: mtl::MTLTextureType::D2,
array_layers: 1,
mip_levels: 1,
size: self.dimensions(),
},
drawable,
present_with_transaction: self.present_with_transaction,

View File

@ -242,7 +242,8 @@ impl PhysicalDeviceFeatures {
| F::ADDRESS_MODE_CLAMP_TO_BORDER
| F::TIMESTAMP_QUERY
| F::PIPELINE_STATISTICS_QUERY
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES;
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
| F::CLEAR_COMMANDS;
let mut dl_flags = Df::all();
dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0);

View File

@ -182,16 +182,48 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}
}
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
self.device.raw.cmd_fill_buffer(
self.active,
buffer.raw,
range.start,
range.end - range.start,
(value as u32) * 0x01010101,
0,
);
}
unsafe fn clear_texture(
&mut self,
texture: &super::Texture,
subresource_range: &wgt::ImageSubresourceRange,
) {
self.device.raw.cmd_clear_color_image(
self.active,
texture.raw,
DST_IMAGE_LAYOUT,
&vk::ClearColorValue {
float32: [0.0, 0.0, 0.0, 0.0],
},
&[conv::map_subresource_range(
subresource_range,
texture.aspects,
)],
);
// The Vulkan api could easily support depth/stencil formats for clearing as well.
// But in other APIs this is more challenging which is why clear_texture excludes support for these formats.
// self.device.raw.cmd_clear_depth_stencil_image(
// self.active,
// texture.raw,
// DST_IMAGE_LAYOUT,
// &vk::ClearDepthStencilValue {
// depth: 0.0,
// stencil: 0,
// },
// &[range],
// );
}
unsafe fn copy_buffer_to_buffer<T>(
&mut self,
src: &super::Buffer,

View File

@ -495,7 +495,7 @@ bitflags::bitflags! {
///
/// This is a native-only feature.
const VERTEX_WRITABLE_STORAGE = 1 << 35;
/// Enables clear to zero for buffers & images.
/// Enables clear to zero for buffers & textures.
///
/// Supported platforms:
/// - All
@ -2353,7 +2353,7 @@ bitflags::bitflags! {
/// operation.
const COPY_SRC = 1 << 2;
/// Allow a buffer to be the destination buffer for a [`CommandEncoder::copy_buffer_to_buffer`], [`CommandEncoder::copy_texture_to_buffer`],
/// [`CommandEncoder::fill_buffer`] or [`Queue::write_buffer`] operation.
/// [`CommandEncoder::clear_buffer`] or [`Queue::write_buffer`] operation.
const COPY_DST = 1 << 3;
/// Allow a buffer to be the index buffer in a draw operation.
const INDEX = 1 << 4;
@ -2692,6 +2692,18 @@ impl Extent3d {
let max_dim = self.width.max(self.height.max(self.depth_or_array_layers));
32 - max_dim.leading_zeros()
}
/// Calculates the extent at a given mip level.
pub fn mip_level_size(&self, level: u32, is_3d_texture: bool) -> Extent3d {
Extent3d {
width: u32::max(1, self.width >> level),
height: u32::max(1, self.height >> level),
depth_or_array_layers: match is_3d_texture {
false => self.depth_or_array_layers,
true => u32::max(1, self.depth_or_array_layers >> level),
},
}
}
}
/// Describes a [`Texture`].
@ -2765,14 +2777,10 @@ impl<L> TextureDescriptor<L> {
return None;
}
Some(Extent3d {
width: u32::max(1, self.size.width >> level),
height: u32::max(1, self.size.height >> level),
depth_or_array_layers: match self.dimension {
TextureDimension::D1 | TextureDimension::D2 => self.size.depth_or_array_layers,
TextureDimension::D3 => u32::max(1, self.size.depth_or_array_layers >> level),
},
})
Some(
self.size
.mip_level_size(level, self.dimension == TextureDimension::D3),
)
}
/// Returns the number of array layers.

View File

@ -1948,19 +1948,19 @@ impl crate::Context for Context {
id
}
fn command_encoder_clear_image(
fn command_encoder_clear_texture(
&self,
encoder: &Self::CommandEncoderId,
texture: &crate::Texture,
subresource_range: &wgt::ImageSubresourceRange,
) {
let global = &self.0;
if let Err(cause) = wgc::gfx_select!(encoder.id => global.command_encoder_clear_image(
if let Err(cause) = wgc::gfx_select!(encoder.id => global.command_encoder_clear_texture(
encoder.id,
texture.id.id,
subresource_range
)) {
self.handle_error_nolabel(&encoder.error_sink, cause, "CommandEncoder::clear_image");
self.handle_error_nolabel(&encoder.error_sink, cause, "CommandEncoder::clear_texture");
}
}

View File

@ -1911,7 +1911,7 @@ impl crate::Context for Context {
})
}
fn command_encoder_clear_image(
fn command_encoder_clear_texture(
&self,
_encoder: &Self::CommandEncoderId,
_texture: &crate::Texture,

View File

@ -412,7 +412,7 @@ trait Context: Debug + Send + Sized + Sync {
);
fn command_encoder_finish(&self, encoder: Self::CommandEncoderId) -> Self::CommandBufferId;
fn command_encoder_clear_image(
fn command_encoder_clear_texture(
&self,
encoder: &Self::CommandEncoderId,
texture: &Texture,
@ -2260,13 +2260,15 @@ impl CommandEncoder {
/// Clears texture to zero.
///
/// Where possible it may be significantly more efficient to perform clears via render passes!
///
/// # Panics
///
/// - `CLEAR_COMMANDS` extension not enabled
/// - Texture does not have `COPY_DST` usage.
/// - Range it out of bounds
pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
Context::command_encoder_clear_image(
Context::command_encoder_clear_texture(
&*self.context,
self.id.as_ref().unwrap(),
texture,