mirror of
https://github.com/gfx-rs/wgpu.git
synced 2025-02-16 17:02:32 +00:00
Reintroduce clear_texture Metal/Vulkan/DX12 (#1905)
* enable CLEAR_COMMANDS feature for all adapters * replaced fill_buffer with clear_buffer (minus variable value) * Improve clear buffer/texture test and remove unused clear-buffer.ron test * renamed clear_image to clear_texture * skeleton for new wgpu-hal clear_texture * clear_texture implementation for vulkan * clear_texture now restricts usage * clear_texture implementation for dx12 * Implemented clear_texture for Metal backend * Clean up GLES clear_buffer and leave note on how to implement clear_texture in the future * fix linux compilation & formatting issues * comment & namespace fixes * Extent3d has now a simple mipmap calculating function Fix incorrect use of texture.size in clear_texture for metal/dx12 Fix incorrect mip/layer ranges in clear_texture for metal/dx12
This commit is contained in:
parent
938c069608
commit
df2a686c29
@ -80,11 +80,11 @@ impl GlobalPlay for wgc::hub::Global<IdentityPassThroughFactory> {
|
||||
trace::Command::ClearBuffer { dst, offset, size } => self
|
||||
.command_encoder_clear_buffer::<A>(encoder, dst, offset, size)
|
||||
.unwrap(),
|
||||
trace::Command::ClearImage {
|
||||
trace::Command::ClearTexture {
|
||||
dst,
|
||||
subresource_range,
|
||||
} => self
|
||||
.command_encoder_clear_image::<A>(encoder, dst, &subresource_range)
|
||||
.command_encoder_clear_texture::<A>(encoder, dst, &subresource_range)
|
||||
.unwrap(),
|
||||
trace::Command::WriteTimestamp {
|
||||
query_set_id,
|
||||
|
@ -3,7 +3,7 @@
|
||||
tests: [
|
||||
"bind-group.ron",
|
||||
"buffer-copy.ron",
|
||||
"clear-buffer-image.ron",
|
||||
"clear-buffer-texture.ron",
|
||||
"buffer-zero-init.ron",
|
||||
"pipeline-statistics-query.ron",
|
||||
"quad.ron",
|
||||
|
@ -1,36 +0,0 @@
|
||||
(
|
||||
features: (bits: 0x0000_0004_0000_0000),
|
||||
expectations: [
|
||||
(
|
||||
name: "basic",
|
||||
buffer: (index: 0, epoch: 1),
|
||||
offset: 0,
|
||||
data: Raw([
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
]),
|
||||
)
|
||||
],
|
||||
actions: [
|
||||
CreateBuffer(
|
||||
Id(0, 1, Empty),
|
||||
(
|
||||
label: None,
|
||||
size: 16,
|
||||
usage: (
|
||||
bits: 41,
|
||||
),
|
||||
mapped_at_creation: false,
|
||||
),
|
||||
),
|
||||
Submit(1, [
|
||||
ClearBuffer(
|
||||
dst: Id(0, 1, Empty),
|
||||
offset: 4,
|
||||
size: Some(8),
|
||||
)
|
||||
]),
|
||||
],
|
||||
)
|
@ -5,17 +5,17 @@
|
||||
name: "Quad",
|
||||
buffer: (index: 0, epoch: 1),
|
||||
offset: 0,
|
||||
data: File("clear-image.bin", 16384),
|
||||
data: File("clear-texture.bin", 16384),
|
||||
),
|
||||
(
|
||||
name: "buffer clear",
|
||||
buffer: (index: 1, epoch: 1),
|
||||
offset: 0,
|
||||
data: Raw([
|
||||
0x00, 0x00, 0x80, 0xBF,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x00, 0x00,
|
||||
0x00, 0x00, 0x80, 0x3F,
|
||||
]),
|
||||
)
|
||||
],
|
||||
@ -35,6 +35,25 @@
|
||||
bits: 27,
|
||||
),
|
||||
)),
|
||||
// First fill the texture to ensure it wasn't just zero initialized or "happened" to be zero.
|
||||
WriteTexture(
|
||||
to: (
|
||||
texture: Id(0, 1, Empty),
|
||||
mip_level: 0,
|
||||
array_layer: 0,
|
||||
),
|
||||
data: "quad.bin",
|
||||
layout: (
|
||||
offset: 0,
|
||||
bytes_per_row: Some(256),
|
||||
rows_per_image: None,
|
||||
),
|
||||
size: (
|
||||
width: 64,
|
||||
height: 64,
|
||||
depth_or_array_layers: 1,
|
||||
),
|
||||
),
|
||||
CreateBuffer(
|
||||
Id(0, 1, Empty),
|
||||
(
|
||||
@ -46,6 +65,7 @@
|
||||
mapped_at_creation: false,
|
||||
),
|
||||
),
|
||||
|
||||
CreateBuffer(
|
||||
Id(1, 1, Empty),
|
||||
(
|
||||
@ -57,8 +77,18 @@
|
||||
mapped_at_creation: false,
|
||||
),
|
||||
),
|
||||
// Make sure there is something in the buffer, otherwise it might be just zero init!
|
||||
WriteBuffer(
|
||||
id: Id(1, 1, Empty),
|
||||
data: "data1.bin",
|
||||
range: (
|
||||
start: 0,
|
||||
end: 16,
|
||||
),
|
||||
queued: true,
|
||||
),
|
||||
Submit(1, [
|
||||
ClearImage(
|
||||
ClearTexture(
|
||||
dst: Id(0, 1, Empty),
|
||||
subresource_range: ImageSubresourceRange(
|
||||
aspect: All,
|
||||
@ -88,6 +118,7 @@
|
||||
depth_or_array_layers: 1,
|
||||
),
|
||||
),
|
||||
// Partial clear to proove
|
||||
ClearBuffer(
|
||||
dst: Id(1, 1, Empty),
|
||||
offset: 4,
|
@ -44,6 +44,10 @@ pub enum ClearError {
|
||||
texture_format: wgt::TextureFormat,
|
||||
subresource_range_aspects: TextureAspect,
|
||||
},
|
||||
#[error("Depth/Stencil formats are not supported for clearing")]
|
||||
DepthStencilFormatNotSupported,
|
||||
#[error("Multisampled textures are not supported for clearing")]
|
||||
MultisampledTextureUnsupported,
|
||||
#[error("image subresource level range is outside of the texture's level range. texture range is {texture_level_range:?}, \
|
||||
whereas subesource range specified start {subresource_base_mip_level} and count {subresource_mip_level_count:?}")]
|
||||
InvalidTextureLevelRange {
|
||||
@ -68,7 +72,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
offset: BufferAddress,
|
||||
size: Option<BufferSize>,
|
||||
) -> Result<(), ClearError> {
|
||||
profiling::scope!("CommandEncoder::fill_buffer");
|
||||
profiling::scope!("CommandEncoder::clear_buffer");
|
||||
|
||||
let hub = A::hub(self);
|
||||
let mut token = Token::root();
|
||||
@ -82,7 +86,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
list.push(TraceCommand::ClearBuffer { dst, offset, size });
|
||||
}
|
||||
|
||||
if !cmd_buf.support_fill_buffer_texture {
|
||||
if !cmd_buf.support_clear_buffer_texture {
|
||||
return Err(ClearError::MissingClearCommandsFeature);
|
||||
}
|
||||
|
||||
@ -122,7 +126,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
None => dst_buffer.size,
|
||||
};
|
||||
if offset == end {
|
||||
log::trace!("Ignoring fill_buffer of size 0");
|
||||
log::trace!("Ignoring clear_buffer of size 0");
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@ -139,18 +143,18 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
let cmd_buf_raw = cmd_buf.encoder.open();
|
||||
unsafe {
|
||||
cmd_buf_raw.transition_buffers(dst_barrier);
|
||||
cmd_buf_raw.fill_buffer(dst_raw, offset..end, 0);
|
||||
cmd_buf_raw.clear_buffer(dst_raw, offset..end);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn command_encoder_clear_image<A: HalApi>(
|
||||
pub fn command_encoder_clear_texture<A: HalApi>(
|
||||
&self,
|
||||
command_encoder_id: CommandEncoderId,
|
||||
dst: TextureId,
|
||||
subresource_range: &ImageSubresourceRange,
|
||||
) -> Result<(), ClearError> {
|
||||
profiling::scope!("CommandEncoder::clear_image");
|
||||
profiling::scope!("CommandEncoder::clear_texture");
|
||||
|
||||
let hub = A::hub(self);
|
||||
let mut token = Token::root();
|
||||
@ -162,13 +166,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
|
||||
#[cfg(feature = "trace")]
|
||||
if let Some(ref mut list) = cmd_buf.commands {
|
||||
list.push(TraceCommand::ClearImage {
|
||||
list.push(TraceCommand::ClearTexture {
|
||||
dst,
|
||||
subresource_range: subresource_range.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
if !cmd_buf.support_fill_buffer_texture {
|
||||
if !cmd_buf.support_clear_buffer_texture {
|
||||
return Err(ClearError::MissingClearCommandsFeature);
|
||||
}
|
||||
|
||||
@ -185,6 +189,15 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
subresource_range_aspects: subresource_range.aspect,
|
||||
});
|
||||
};
|
||||
|
||||
// Check if texture is supported for clearing
|
||||
if dst_texture.desc.format.describe().sample_type == wgt::TextureSampleType::Depth {
|
||||
return Err(ClearError::DepthStencilFormatNotSupported);
|
||||
}
|
||||
if dst_texture.desc.sample_count > 1 {
|
||||
return Err(ClearError::MultisampledTextureUnsupported);
|
||||
}
|
||||
|
||||
// Check if subresource level range is valid
|
||||
let subresource_level_end = match subresource_range.mip_level_count {
|
||||
Some(count) => subresource_range.base_mip_level + count.get(),
|
||||
@ -228,7 +241,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
hal::TextureUses::COPY_DST,
|
||||
)
|
||||
.map_err(ClearError::InvalidTexture)?;
|
||||
let _dst_raw = dst_texture
|
||||
let dst_raw = dst_texture
|
||||
.inner
|
||||
.as_raw()
|
||||
.ok_or(ClearError::InvalidTexture(dst))?;
|
||||
@ -241,23 +254,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
|
||||
let cmd_buf_raw = cmd_buf.encoder.open();
|
||||
unsafe {
|
||||
cmd_buf_raw.transition_textures(dst_barrier);
|
||||
/*TODO: image clears
|
||||
cmd_buf_raw.clear_image(
|
||||
dst_raw,
|
||||
hal::image::Layout::TransferDstOptimal,
|
||||
hal::command::ClearValue {
|
||||
color: hal::command::ClearColor {
|
||||
float32: conv::map_color_f32(&wgt::Color::TRANSPARENT),
|
||||
},
|
||||
},
|
||||
std::iter::once(hal::image::SubresourceRange {
|
||||
aspects,
|
||||
level_start: subresource_range.base_mip_level as u8,
|
||||
level_count: subresource_range.mip_level_count.map(|c| c.get() as u8),
|
||||
layer_start: subresource_range.base_array_layer as u16,
|
||||
layer_count: subresource_range.array_layer_count.map(|c| c.get() as u16),
|
||||
}),
|
||||
);*/
|
||||
cmd_buf_raw.clear_texture(dst_raw, subresource_range);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
@ -144,7 +144,7 @@ impl<A: hal::Api> BakedCommands<A> {
|
||||
assert!(range.end % 4 == 0, "Buffer {:?} has an uninitialized range with an end not aligned to 4 (end was {})", raw_buf, range.end);
|
||||
|
||||
unsafe {
|
||||
self.encoder.fill_buffer(raw_buf, range.clone(), 0);
|
||||
self.encoder.clear_buffer(raw_buf, range.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -160,7 +160,7 @@ pub struct CommandBuffer<A: hal::Api> {
|
||||
pub(crate) trackers: TrackerSet,
|
||||
buffer_memory_init_actions: Vec<BufferInitTrackerAction>,
|
||||
limits: wgt::Limits,
|
||||
support_fill_buffer_texture: bool,
|
||||
support_clear_buffer_texture: bool,
|
||||
#[cfg(feature = "trace")]
|
||||
pub(crate) commands: Option<Vec<crate::device::trace::Command>>,
|
||||
}
|
||||
@ -187,7 +187,7 @@ impl<A: HalApi> CommandBuffer<A> {
|
||||
trackers: TrackerSet::new(A::VARIANT),
|
||||
buffer_memory_init_actions: Default::default(),
|
||||
limits,
|
||||
support_fill_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS),
|
||||
support_clear_buffer_texture: features.contains(wgt::Features::CLEAR_COMMANDS),
|
||||
#[cfg(feature = "trace")]
|
||||
commands: if enable_tracing {
|
||||
Some(Vec::new())
|
||||
|
@ -162,7 +162,7 @@ fn map_buffer<A: hal::Api>(
|
||||
|
||||
// Zero out uninitialized parts of the mapping. (Spec dictates all resources behave as if they were initialized with zero)
|
||||
//
|
||||
// If this is a read mapping, ideally we would use a `fill_buffer` command before reading the data from GPU (i.e. `invalidate_range`).
|
||||
// If this is a read mapping, ideally we would use a `clear_buffer` command before reading the data from GPU (i.e. `invalidate_range`).
|
||||
// However, this would require us to kick off and wait for a command buffer or piggy back on an existing one (the later is likely the only worthwhile option).
|
||||
// As reading uninitialized memory isn't a particular important path to support,
|
||||
// we instead just initialize the memory here and make sure it is GPU visible, so this happens at max only once for every buffer region.
|
||||
@ -498,7 +498,7 @@ impl<A: HalApi> Device<A> {
|
||||
}
|
||||
} else {
|
||||
// We are required to zero out (initialize) all memory.
|
||||
// This is done on demand using fill_buffer which requires write transfer usage!
|
||||
// This is done on demand using clear_buffer which requires write transfer usage!
|
||||
usage |= hal::BufferUses::COPY_DST;
|
||||
}
|
||||
|
||||
|
@ -151,7 +151,7 @@ pub enum Command {
|
||||
offset: wgt::BufferAddress,
|
||||
size: Option<wgt::BufferSize>,
|
||||
},
|
||||
ClearImage {
|
||||
ClearTexture {
|
||||
dst: id::TextureId,
|
||||
subresource_range: wgt::ImageSubresourceRange,
|
||||
},
|
||||
|
@ -178,7 +178,8 @@ impl super::Adapter {
|
||||
| wgt::Features::POLYGON_MODE_POINT
|
||||
| wgt::Features::VERTEX_WRITABLE_STORAGE
|
||||
| wgt::Features::TIMESTAMP_QUERY
|
||||
| wgt::Features::TEXTURE_COMPRESSION_BC;
|
||||
| wgt::Features::TEXTURE_COMPRESSION_BC
|
||||
| wgt::Features::CLEAR_COMMANDS;
|
||||
//TODO: in order to expose this, we need to run a compute shader
|
||||
// that extract the necessary statistics out of the D3D12 result.
|
||||
// Alternatively, we could allocate a buffer for the query set,
|
||||
|
@ -363,8 +363,7 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
|
||||
assert_eq!(value, 0, "Only zero is supported!");
|
||||
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
|
||||
let list = self.list.unwrap();
|
||||
let mut offset = range.start;
|
||||
while offset < range.end {
|
||||
@ -380,6 +379,100 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &super::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
// Note that CopyTextureRegion for depth/stencil or multisample resources would require full subresource copies.
|
||||
// Meaning we'd need a much larger pre-zeroed buffer
|
||||
// (but instead we just define clear_texture to not support these)
|
||||
|
||||
let list = self.list.unwrap();
|
||||
let mut src_location = d3d12::D3D12_TEXTURE_COPY_LOCATION {
|
||||
pResource: self.shared.zero_buffer.as_mut_ptr(),
|
||||
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT,
|
||||
u: mem::zeroed(),
|
||||
};
|
||||
let mut dst_location = d3d12::D3D12_TEXTURE_COPY_LOCATION {
|
||||
pResource: texture.resource.as_mut_ptr(),
|
||||
Type: d3d12::D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX,
|
||||
u: mem::zeroed(),
|
||||
};
|
||||
let raw_format = conv::map_texture_format(texture.format);
|
||||
let format_desc = texture.format.describe();
|
||||
|
||||
let mip_range = subresource_range.base_mip_level..match subresource_range.mip_level_count {
|
||||
Some(c) => subresource_range.base_mip_level + c.get(),
|
||||
None => texture.mip_level_count,
|
||||
};
|
||||
let array_range = subresource_range.base_array_layer
|
||||
..match subresource_range.array_layer_count {
|
||||
Some(c) => subresource_range.base_array_layer + c.get(),
|
||||
None => texture.array_layer_count(),
|
||||
};
|
||||
for mip_level in mip_range {
|
||||
let mip_size = texture
|
||||
.size
|
||||
.mip_level_size(mip_level, texture.dimension == wgt::TextureDimension::D3);
|
||||
let depth = if texture.dimension == wgt::TextureDimension::D3 {
|
||||
mip_size.depth_or_array_layers
|
||||
} else {
|
||||
1
|
||||
};
|
||||
let bytes_per_row = mip_size.width / format_desc.block_dimensions.0 as u32
|
||||
* format_desc.block_size as u32;
|
||||
// round up to a multiple of d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
|
||||
let bytes_per_row = (bytes_per_row + d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT - 1)
|
||||
/ d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT
|
||||
* d3d12::D3D12_TEXTURE_DATA_PITCH_ALIGNMENT;
|
||||
|
||||
let max_rows_per_copy = super::ZERO_BUFFER_SIZE as u32 / bytes_per_row;
|
||||
// round down to a multiple of rows needed by the texture format
|
||||
let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u32
|
||||
* format_desc.block_dimensions.1 as u32;
|
||||
assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture with dimension {:?}, size {:?} and format {:?}", texture.dimension, texture.size, texture.format);
|
||||
|
||||
for array_layer in array_range.clone() {
|
||||
// We excluded depth/stencil, so plane should be always zero
|
||||
*dst_location.u.SubresourceIndex_mut() =
|
||||
texture.calc_subresource(mip_level, array_layer, 0);
|
||||
// 3D textures are quickly massive in memory size, so we don't bother trying to do more than one layer at once.
|
||||
for z in 0..depth {
|
||||
// May need multiple copies for each subresource!
|
||||
// We assume that we never need to split a row. Back of the envelope calculation tells us a 512kb byte buffer is enough for this for most extreme known cases.
|
||||
// max_texture_width * max_pixel_size = 32768 * 16 = 512kb
|
||||
let mut num_rows_left = mip_size.height;
|
||||
while num_rows_left > 0 {
|
||||
let num_rows = num_rows_left.min(max_rows_per_copy);
|
||||
|
||||
*src_location.u.PlacedFootprint_mut() =
|
||||
d3d12::D3D12_PLACED_SUBRESOURCE_FOOTPRINT {
|
||||
Offset: 0,
|
||||
Footprint: d3d12::D3D12_SUBRESOURCE_FOOTPRINT {
|
||||
Format: raw_format,
|
||||
Width: mip_size.width,
|
||||
Height: num_rows,
|
||||
Depth: 1,
|
||||
RowPitch: bytes_per_row,
|
||||
},
|
||||
};
|
||||
|
||||
list.CopyTextureRegion(
|
||||
&dst_location,
|
||||
0,
|
||||
mip_size.height - num_rows_left,
|
||||
z,
|
||||
&src_location,
|
||||
std::ptr::null(),
|
||||
);
|
||||
num_rows_left -= num_rows;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_buffer<T>(
|
||||
&mut self,
|
||||
src: &super::Buffer,
|
||||
|
@ -120,7 +120,7 @@ impl<T> HResult<T> for (T, i32) {
|
||||
|
||||
// Limited by D3D12's root signature size of 64. Each element takes 1 or 2 entries.
|
||||
const MAX_ROOT_ELEMENTS: usize = 64;
|
||||
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 256 << 10;
|
||||
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 512 << 10;
|
||||
|
||||
pub struct Instance {
|
||||
factory: native::Factory4,
|
||||
|
@ -249,7 +249,14 @@ impl crate::CommandEncoder<Api> for Encoder {
|
||||
{
|
||||
}
|
||||
|
||||
unsafe fn fill_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange, value: u8) {}
|
||||
unsafe fn clear_buffer(&mut self, buffer: &Resource, range: crate::MemoryRange) {}
|
||||
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &Resource,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &Resource, dst: &Resource, regions: T) {}
|
||||
|
||||
|
@ -257,7 +257,8 @@ impl super::Adapter {
|
||||
|
||||
let mut features = wgt::Features::empty()
|
||||
| wgt::Features::TEXTURE_COMPRESSION_ETC2
|
||||
| wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES;
|
||||
| wgt::Features::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
|
||||
| wgt::Features::CLEAR_COMMANDS;
|
||||
features.set(
|
||||
wgt::Features::DEPTH_CLAMPING,
|
||||
extensions.contains("GL_EXT_depth_clamp"),
|
||||
|
@ -237,12 +237,24 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
|
||||
self.cmd_buffer.commands.push(C::FillBuffer {
|
||||
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
|
||||
self.cmd_buffer.commands.push(C::ClearBuffer {
|
||||
dst: buffer.raw,
|
||||
dst_target: buffer.target,
|
||||
range,
|
||||
value,
|
||||
});
|
||||
}
|
||||
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &super::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
let (dst, dst_target) = texture.inner.as_native();
|
||||
self.cmd_buffer.commands.push(C::ClearTexture {
|
||||
dst,
|
||||
dst_target,
|
||||
subresource_range: subresource_range.clone(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -188,7 +188,7 @@ pub struct Queue {
|
||||
shader_clear_program: glow::Program,
|
||||
/// The uniform location of the color uniform in the shader clear program
|
||||
shader_clear_program_color_uniform_location: glow::UniformLocation,
|
||||
/// Keep a reasonably large buffer filled with zeroes, so that we can implement `FillBuffer` of
|
||||
/// Keep a reasonably large buffer filled with zeroes, so that we can implement `ClearBuffer` of
|
||||
/// zeroes by copying from it.
|
||||
zero_buffer: glow::Buffer,
|
||||
temp_query_results: Vec<u64>,
|
||||
@ -527,11 +527,15 @@ enum Command {
|
||||
indirect_buf: glow::Buffer,
|
||||
indirect_offset: wgt::BufferAddress,
|
||||
},
|
||||
FillBuffer {
|
||||
ClearBuffer {
|
||||
dst: glow::Buffer,
|
||||
dst_target: BindTarget,
|
||||
range: crate::MemoryRange,
|
||||
value: u8,
|
||||
},
|
||||
ClearTexture {
|
||||
dst: glow::Texture,
|
||||
dst_target: BindTarget,
|
||||
subresource_range: wgt::ImageSubresourceRange,
|
||||
},
|
||||
CopyBufferToBuffer {
|
||||
src: glow::Buffer,
|
||||
|
@ -190,13 +190,11 @@ impl super::Queue {
|
||||
gl.bind_buffer(glow::DRAW_INDIRECT_BUFFER, Some(indirect_buf));
|
||||
gl.dispatch_compute_indirect(indirect_offset as i32);
|
||||
}
|
||||
C::FillBuffer {
|
||||
C::ClearBuffer {
|
||||
dst,
|
||||
dst_target,
|
||||
ref range,
|
||||
value,
|
||||
} => {
|
||||
assert_eq!(value, 0); // other values require `wgt::Features::CLEAR_COMMANDS`.
|
||||
gl.bind_buffer(glow::COPY_READ_BUFFER, Some(self.zero_buffer));
|
||||
gl.bind_buffer(dst_target, Some(dst));
|
||||
let mut dst_offset = range.start;
|
||||
@ -212,6 +210,20 @@ impl super::Queue {
|
||||
dst_offset += size;
|
||||
}
|
||||
}
|
||||
C::ClearTexture {
|
||||
dst: _,
|
||||
dst_target: _,
|
||||
subresource_range: _,
|
||||
} => {
|
||||
// Should EXT_clear_texture when possible.
|
||||
// https://www.khronos.org/registry/OpenGL/extensions/EXT/EXT_clear_texture.txt
|
||||
// But support is not very widespread. Need to fallback to do zero_buffer copies
|
||||
|
||||
// TODO: Need to invoke calls into CopyBufferToTexture using zero_buffer.
|
||||
// To do that determine how many rows zero_buffer can fill and then chunk the texture up
|
||||
// (do *not* repeat the exact logic of CopyBufferToTexture, it's way too much!)
|
||||
//unimplemented!("texture clearing for GLES is not implemented yet");
|
||||
}
|
||||
C::CopyBufferToBuffer {
|
||||
src,
|
||||
src_target,
|
||||
|
@ -344,9 +344,14 @@ pub trait CommandEncoder<A: Api>: Send + Sync {
|
||||
|
||||
// copy operations
|
||||
|
||||
/// This is valid to call with `value == 0`.
|
||||
/// Otherwise `wgt::Features::CLEAR_COMMANDS` is required.
|
||||
unsafe fn fill_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange, value: u8);
|
||||
unsafe fn clear_buffer(&mut self, buffer: &A::Buffer, range: MemoryRange);
|
||||
|
||||
// Does not support depth/stencil or multisampled textures
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &A::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
);
|
||||
|
||||
unsafe fn copy_buffer_to_buffer<T>(&mut self, src: &A::Buffer, dst: &A::Buffer, regions: T)
|
||||
where
|
||||
|
@ -862,7 +862,8 @@ impl super::PrivateCapabilities {
|
||||
| F::MAPPABLE_PRIMARY_BUFFERS
|
||||
| F::VERTEX_WRITABLE_STORAGE
|
||||
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
|
||||
| F::POLYGON_MODE_LINE;
|
||||
| F::POLYGON_MODE_LINE
|
||||
| F::CLEAR_COMMANDS;
|
||||
|
||||
features.set(
|
||||
F::TEXTURE_BINDING_ARRAY
|
||||
|
@ -126,9 +126,84 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
|
||||
{
|
||||
}
|
||||
|
||||
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
|
||||
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
|
||||
let encoder = self.enter_blit();
|
||||
encoder.fill_buffer(&buffer.raw, conv::map_range(&range), value);
|
||||
encoder.fill_buffer(&buffer.raw, conv::map_range(&range), 0);
|
||||
}
|
||||
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &super::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
let shared = self.shared.clone();
|
||||
let encoder = self.enter_blit();
|
||||
|
||||
let format_desc = texture.format.describe();
|
||||
|
||||
let mip_range = subresource_range.base_mip_level..match subresource_range.mip_level_count {
|
||||
Some(c) => subresource_range.base_mip_level + c.get(),
|
||||
None => texture.mip_levels,
|
||||
};
|
||||
let array_range = subresource_range.base_array_layer
|
||||
..match subresource_range.array_layer_count {
|
||||
Some(c) => subresource_range.base_array_layer + c.get(),
|
||||
None => texture.array_layers,
|
||||
};
|
||||
|
||||
for mip_level in mip_range {
|
||||
// Note that Metal requires this only to be a multiple of the pixel size, not some other constant like in other APIs.
|
||||
let mip_size = texture
|
||||
.size
|
||||
.mip_level_size(mip_level, texture.raw_type == mtl::MTLTextureType::D3);
|
||||
let depth = if texture.raw_type == mtl::MTLTextureType::D3 {
|
||||
mip_size.depth_or_array_layers as u64
|
||||
} else {
|
||||
1
|
||||
};
|
||||
let bytes_per_row = mip_size.width as u64 / format_desc.block_dimensions.0 as u64
|
||||
* format_desc.block_size as u64;
|
||||
let max_rows_per_copy = super::ZERO_BUFFER_SIZE / bytes_per_row;
|
||||
// round down to a multiple of rows needed by the texture format
|
||||
let max_rows_per_copy = max_rows_per_copy / format_desc.block_dimensions.1 as u64
|
||||
* format_desc.block_dimensions.1 as u64;
|
||||
assert!(max_rows_per_copy > 0, "Zero buffer size is too small to fill a single row of a texture of type {:?}, size {:?} and format {:?}",
|
||||
texture.raw_type, texture.size, texture.format);
|
||||
|
||||
for array_layer in array_range.clone() {
|
||||
// 3D textures are quickly massive in memory size, so we don't bother trying to do more than one layer at once.
|
||||
for z in 0..depth {
|
||||
// May need multiple copies for each subresource! We assume that we never need to split a row.
|
||||
let mut num_rows_left = mip_size.height as u64;
|
||||
while num_rows_left > 0 {
|
||||
let num_rows = num_rows_left.min(max_rows_per_copy);
|
||||
let source_size = mtl::MTLSize {
|
||||
width: mip_size.width as u64,
|
||||
height: num_rows,
|
||||
depth: 1,
|
||||
};
|
||||
let destination_origion = mtl::MTLOrigin {
|
||||
x: 0,
|
||||
y: mip_size.height as u64 - num_rows_left,
|
||||
z,
|
||||
};
|
||||
encoder.copy_from_buffer_to_texture(
|
||||
&shared.zero_buffer,
|
||||
0,
|
||||
bytes_per_row,
|
||||
bytes_per_row * num_rows,
|
||||
source_size,
|
||||
&texture.raw,
|
||||
array_layer as u64,
|
||||
mip_level as u64,
|
||||
destination_origion,
|
||||
mtl::MTLBlitOption::empty(),
|
||||
);
|
||||
num_rows_left -= num_rows;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_buffer<T>(
|
||||
|
@ -278,10 +278,12 @@ impl crate::Device<super::Api> for super::Device {
|
||||
|
||||
Ok(super::Texture {
|
||||
raw,
|
||||
format: desc.format,
|
||||
raw_format: mtl_format,
|
||||
raw_type: mtl_type,
|
||||
mip_levels: desc.mip_level_count,
|
||||
array_layers,
|
||||
size: desc.size,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -237,11 +237,17 @@ struct Settings {
|
||||
retain_command_buffer_references: bool,
|
||||
}
|
||||
|
||||
// Using max copyable texture row
|
||||
// https://developer.apple.com/documentation/metal/mtlblitcommandencoder/1400752-copyfrombuffer?language=objc
|
||||
// "The value must be less than or equal to 32767 multiplied by the destination texture’s pixel size."
|
||||
const ZERO_BUFFER_SIZE: wgt::BufferAddress = 32767 * 16; // 512kb
|
||||
|
||||
struct AdapterShared {
|
||||
device: Mutex<mtl::Device>,
|
||||
disabilities: PrivateDisabilities,
|
||||
private_caps: PrivateCapabilities,
|
||||
settings: Settings,
|
||||
zero_buffer: mtl::Buffer,
|
||||
}
|
||||
|
||||
unsafe impl Send for AdapterShared {}
|
||||
@ -252,11 +258,20 @@ impl AdapterShared {
|
||||
let private_caps = PrivateCapabilities::new(&device);
|
||||
log::debug!("{:#?}", private_caps);
|
||||
|
||||
// buffers created this way are zero initialized
|
||||
// see https://developer.apple.com/documentation/metal/mtldevice/1433375-newbufferwithlength?language=objc
|
||||
let zero_buffer = device.new_buffer(
|
||||
ZERO_BUFFER_SIZE,
|
||||
mtl::MTLResourceOptions::CPUCacheModeWriteCombined
|
||||
| mtl::MTLResourceOptions::StorageModePrivate,
|
||||
);
|
||||
|
||||
Self {
|
||||
disabilities: PrivateDisabilities::new(&device),
|
||||
private_caps: PrivateCapabilities::new(&device),
|
||||
device: Mutex::new(device),
|
||||
settings: Settings::default(),
|
||||
zero_buffer,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -280,6 +295,7 @@ pub struct Device {
|
||||
pub struct Surface {
|
||||
view: Option<NonNull<objc::runtime::Object>>,
|
||||
render_layer: Mutex<mtl::MetalLayer>,
|
||||
swapchain_format: wgt::TextureFormat,
|
||||
raw_swapchain_format: mtl::MTLPixelFormat,
|
||||
main_thread_id: thread::ThreadId,
|
||||
// Useful for UI-intensive applications that are sensitive to
|
||||
@ -404,10 +420,12 @@ impl Buffer {
|
||||
#[derive(Debug)]
|
||||
pub struct Texture {
|
||||
raw: mtl::Texture,
|
||||
format: wgt::TextureFormat,
|
||||
raw_format: mtl::MTLPixelFormat,
|
||||
raw_type: mtl::MTLTextureType,
|
||||
array_layers: u32,
|
||||
mip_levels: u32,
|
||||
size: wgt::Extent3d,
|
||||
}
|
||||
|
||||
unsafe impl Send for Texture {}
|
||||
|
@ -60,6 +60,7 @@ impl super::Surface {
|
||||
Self {
|
||||
view,
|
||||
render_layer: Mutex::new(layer),
|
||||
swapchain_format: wgt::TextureFormat::Bgra8UnormSrgb, // no value invalid, pick something not too far-fetched
|
||||
raw_swapchain_format: mtl::MTLPixelFormat::Invalid,
|
||||
main_thread_id: thread::current().id(),
|
||||
present_with_transaction: false,
|
||||
@ -209,6 +210,7 @@ impl crate::Surface<super::Api> for super::Surface {
|
||||
log::info!("build swapchain {:?}", config);
|
||||
|
||||
let caps = &device.shared.private_caps;
|
||||
self.swapchain_format = config.format;
|
||||
self.raw_swapchain_format = caps.map_format(config.format);
|
||||
|
||||
let render_layer = self.render_layer.lock();
|
||||
@ -271,10 +273,12 @@ impl crate::Surface<super::Api> for super::Surface {
|
||||
let suf_texture = super::SurfaceTexture {
|
||||
texture: super::Texture {
|
||||
raw: texture,
|
||||
format: self.swapchain_format,
|
||||
raw_format: self.raw_swapchain_format,
|
||||
raw_type: mtl::MTLTextureType::D2,
|
||||
array_layers: 1,
|
||||
mip_levels: 1,
|
||||
size: self.dimensions(),
|
||||
},
|
||||
drawable,
|
||||
present_with_transaction: self.present_with_transaction,
|
||||
|
@ -242,7 +242,8 @@ impl PhysicalDeviceFeatures {
|
||||
| F::ADDRESS_MODE_CLAMP_TO_BORDER
|
||||
| F::TIMESTAMP_QUERY
|
||||
| F::PIPELINE_STATISTICS_QUERY
|
||||
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES;
|
||||
| F::TEXTURE_ADAPTER_SPECIFIC_FORMAT_FEATURES
|
||||
| F::CLEAR_COMMANDS;
|
||||
let mut dl_flags = Df::all();
|
||||
|
||||
dl_flags.set(Df::CUBE_ARRAY_TEXTURES, self.core.image_cube_array != 0);
|
||||
|
@ -182,16 +182,48 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
|
||||
}
|
||||
}
|
||||
|
||||
unsafe fn fill_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange, value: u8) {
|
||||
unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
|
||||
self.device.raw.cmd_fill_buffer(
|
||||
self.active,
|
||||
buffer.raw,
|
||||
range.start,
|
||||
range.end - range.start,
|
||||
(value as u32) * 0x01010101,
|
||||
0,
|
||||
);
|
||||
}
|
||||
|
||||
unsafe fn clear_texture(
|
||||
&mut self,
|
||||
texture: &super::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
self.device.raw.cmd_clear_color_image(
|
||||
self.active,
|
||||
texture.raw,
|
||||
DST_IMAGE_LAYOUT,
|
||||
&vk::ClearColorValue {
|
||||
float32: [0.0, 0.0, 0.0, 0.0],
|
||||
},
|
||||
&[conv::map_subresource_range(
|
||||
subresource_range,
|
||||
texture.aspects,
|
||||
)],
|
||||
);
|
||||
|
||||
// The Vulkan api could easily support depth/stencil formats for clearing as well.
|
||||
// But in other APIs this is more challenging which is why clear_texture excludes support for these formats.
|
||||
// self.device.raw.cmd_clear_depth_stencil_image(
|
||||
// self.active,
|
||||
// texture.raw,
|
||||
// DST_IMAGE_LAYOUT,
|
||||
// &vk::ClearDepthStencilValue {
|
||||
// depth: 0.0,
|
||||
// stencil: 0,
|
||||
// },
|
||||
// &[range],
|
||||
// );
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer_to_buffer<T>(
|
||||
&mut self,
|
||||
src: &super::Buffer,
|
||||
|
@ -495,7 +495,7 @@ bitflags::bitflags! {
|
||||
///
|
||||
/// This is a native-only feature.
|
||||
const VERTEX_WRITABLE_STORAGE = 1 << 35;
|
||||
/// Enables clear to zero for buffers & images.
|
||||
/// Enables clear to zero for buffers & textures.
|
||||
///
|
||||
/// Supported platforms:
|
||||
/// - All
|
||||
@ -2353,7 +2353,7 @@ bitflags::bitflags! {
|
||||
/// operation.
|
||||
const COPY_SRC = 1 << 2;
|
||||
/// Allow a buffer to be the destination buffer for a [`CommandEncoder::copy_buffer_to_buffer`], [`CommandEncoder::copy_texture_to_buffer`],
|
||||
/// [`CommandEncoder::fill_buffer`] or [`Queue::write_buffer`] operation.
|
||||
/// [`CommandEncoder::clear_buffer`] or [`Queue::write_buffer`] operation.
|
||||
const COPY_DST = 1 << 3;
|
||||
/// Allow a buffer to be the index buffer in a draw operation.
|
||||
const INDEX = 1 << 4;
|
||||
@ -2692,6 +2692,18 @@ impl Extent3d {
|
||||
let max_dim = self.width.max(self.height.max(self.depth_or_array_layers));
|
||||
32 - max_dim.leading_zeros()
|
||||
}
|
||||
|
||||
/// Calculates the extent at a given mip level.
|
||||
pub fn mip_level_size(&self, level: u32, is_3d_texture: bool) -> Extent3d {
|
||||
Extent3d {
|
||||
width: u32::max(1, self.width >> level),
|
||||
height: u32::max(1, self.height >> level),
|
||||
depth_or_array_layers: match is_3d_texture {
|
||||
false => self.depth_or_array_layers,
|
||||
true => u32::max(1, self.depth_or_array_layers >> level),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes a [`Texture`].
|
||||
@ -2765,14 +2777,10 @@ impl<L> TextureDescriptor<L> {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some(Extent3d {
|
||||
width: u32::max(1, self.size.width >> level),
|
||||
height: u32::max(1, self.size.height >> level),
|
||||
depth_or_array_layers: match self.dimension {
|
||||
TextureDimension::D1 | TextureDimension::D2 => self.size.depth_or_array_layers,
|
||||
TextureDimension::D3 => u32::max(1, self.size.depth_or_array_layers >> level),
|
||||
},
|
||||
})
|
||||
Some(
|
||||
self.size
|
||||
.mip_level_size(level, self.dimension == TextureDimension::D3),
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the number of array layers.
|
||||
|
@ -1948,19 +1948,19 @@ impl crate::Context for Context {
|
||||
id
|
||||
}
|
||||
|
||||
fn command_encoder_clear_image(
|
||||
fn command_encoder_clear_texture(
|
||||
&self,
|
||||
encoder: &Self::CommandEncoderId,
|
||||
texture: &crate::Texture,
|
||||
subresource_range: &wgt::ImageSubresourceRange,
|
||||
) {
|
||||
let global = &self.0;
|
||||
if let Err(cause) = wgc::gfx_select!(encoder.id => global.command_encoder_clear_image(
|
||||
if let Err(cause) = wgc::gfx_select!(encoder.id => global.command_encoder_clear_texture(
|
||||
encoder.id,
|
||||
texture.id.id,
|
||||
subresource_range
|
||||
)) {
|
||||
self.handle_error_nolabel(&encoder.error_sink, cause, "CommandEncoder::clear_image");
|
||||
self.handle_error_nolabel(&encoder.error_sink, cause, "CommandEncoder::clear_texture");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1911,7 +1911,7 @@ impl crate::Context for Context {
|
||||
})
|
||||
}
|
||||
|
||||
fn command_encoder_clear_image(
|
||||
fn command_encoder_clear_texture(
|
||||
&self,
|
||||
_encoder: &Self::CommandEncoderId,
|
||||
_texture: &crate::Texture,
|
||||
|
@ -412,7 +412,7 @@ trait Context: Debug + Send + Sized + Sync {
|
||||
);
|
||||
fn command_encoder_finish(&self, encoder: Self::CommandEncoderId) -> Self::CommandBufferId;
|
||||
|
||||
fn command_encoder_clear_image(
|
||||
fn command_encoder_clear_texture(
|
||||
&self,
|
||||
encoder: &Self::CommandEncoderId,
|
||||
texture: &Texture,
|
||||
@ -2260,13 +2260,15 @@ impl CommandEncoder {
|
||||
|
||||
/// Clears texture to zero.
|
||||
///
|
||||
/// Where possible it may be significantly more efficient to perform clears via render passes!
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// - `CLEAR_COMMANDS` extension not enabled
|
||||
/// - Texture does not have `COPY_DST` usage.
|
||||
/// - Range it out of bounds
|
||||
pub fn clear_texture(&mut self, texture: &Texture, subresource_range: &ImageSubresourceRange) {
|
||||
Context::command_encoder_clear_image(
|
||||
Context::command_encoder_clear_texture(
|
||||
&*self.context,
|
||||
self.id.as_ref().unwrap(),
|
||||
texture,
|
||||
|
Loading…
Reference in New Issue
Block a user