rows_per_image & rows_per_image are now optional

Fixes #988
This commit is contained in:
Andreas Reich 2021-03-20 11:53:14 +01:00
parent c831c5512a
commit 50043875e5
4 changed files with 91 additions and 53 deletions

View File

@ -123,8 +123,8 @@
buffer: Id(0, 1, Empty), buffer: Id(0, 1, Empty),
layout: ( layout: (
offset: 0, offset: 0,
bytes_per_row: 256, bytes_per_row: Some(256),
rows_per_image: 64, rows_per_image: Some(64),
), ),
), ),
size: ( size: (

View File

@ -76,6 +76,10 @@ pub enum TransferError {
UnalignedBytesPerRow, UnalignedBytesPerRow,
#[error("number of rows per image is not a multiple of block height")] #[error("number of rows per image is not a multiple of block height")]
UnalignedRowsPerImage, UnalignedRowsPerImage,
#[error("number of bytes per row needs to be specified since more than one row is copied")]
UnspecifiedBytesPerRow,
#[error("number of rows per image needs to be specified since more than one image is copied")]
UnspecifiedRowsPerImage,
#[error("number of bytes per row is less than the number of bytes in a complete row")] #[error("number of bytes per row is less than the number of bytes in a complete row")]
InvalidBytesPerRow, InvalidBytesPerRow,
#[error("image is 1D and the copy height and depth are not both set to 1")] #[error("image is 1D and the copy height and depth are not both set to 1")]
@ -148,7 +152,7 @@ pub(crate) fn texture_copy_view_to_hal<B: hal::Backend>(
)) ))
} }
/// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range /// Function copied with some modifications from webgpu standard <https://gpuweb.github.io/gpuweb/#copy-between-buffer-texture>
/// If successful, returns number of buffer bytes required for this copy. /// If successful, returns number of buffer bytes required for this copy.
pub(crate) fn validate_linear_texture_data( pub(crate) fn validate_linear_texture_data(
layout: &wgt::TextureDataLayout, layout: &wgt::TextureDataLayout,
@ -157,6 +161,7 @@ pub(crate) fn validate_linear_texture_data(
buffer_side: CopySide, buffer_side: CopySide,
bytes_per_block: BufferAddress, bytes_per_block: BufferAddress,
copy_size: &Extent3d, copy_size: &Extent3d,
need_copy_aligned_rows: bool,
) -> Result<BufferAddress, TransferError> { ) -> Result<BufferAddress, TransferError> {
// Convert all inputs to BufferAddress (u64) to prevent overflow issues // Convert all inputs to BufferAddress (u64) to prevent overflow issues
let copy_width = copy_size.width as BufferAddress; let copy_width = copy_size.width as BufferAddress;
@ -164,14 +169,32 @@ pub(crate) fn validate_linear_texture_data(
let copy_depth = copy_size.depth_or_array_layers as BufferAddress; let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
let offset = layout.offset; let offset = layout.offset;
let rows_per_image = layout.rows_per_image as BufferAddress;
let bytes_per_row = layout.bytes_per_row as BufferAddress;
let (block_width, block_height) = format.describe().block_dimensions; let (block_width, block_height) = format.describe().block_dimensions;
let block_width = block_width as BufferAddress; let block_width = block_width as BufferAddress;
let block_height = block_height as BufferAddress; let block_height = block_height as BufferAddress;
let block_size = bytes_per_block; let block_size = bytes_per_block;
let width_in_blocks = copy_width / block_width;
let height_in_blocks = copy_height / block_height;
let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row {
bytes_per_row.get() as BufferAddress
} else {
if copy_depth > 1 || block_height > 1 {
return Err(TransferError::UnspecifiedBytesPerRow);
}
bytes_per_block * width_in_blocks
};
let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
rows_per_image.get() as BufferAddress
} else {
if copy_depth > 1 {
return Err(TransferError::UnspecifiedRowsPerImage);
}
copy_height
};
if copy_width % block_width != 0 { if copy_width % block_width != 0 {
return Err(TransferError::UnalignedCopyWidth); return Err(TransferError::UnalignedCopyWidth);
} }
@ -182,23 +205,28 @@ pub(crate) fn validate_linear_texture_data(
return Err(TransferError::UnalignedRowsPerImage); return Err(TransferError::UnalignedRowsPerImage);
} }
let bytes_in_a_complete_row = block_size * copy_width / block_width; if need_copy_aligned_rows {
let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress;
if bytes_per_row_alignment % bytes_per_block != 0 {
return Err(TransferError::UnalignedBytesPerRow);
}
if bytes_per_row % bytes_per_row_alignment != 0 {
return Err(TransferError::UnalignedBytesPerRow);
}
}
let bytes_in_last_row = block_size * width_in_blocks;
let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 { let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 {
0 0
} else { } else {
let actual_rows_per_image = if rows_per_image == 0 { let texel_block_rows_per_image = rows_per_image / block_height;
copy_height
} else {
rows_per_image
};
let texel_block_rows_per_image = actual_rows_per_image / block_height;
let bytes_per_image = bytes_per_row * texel_block_rows_per_image; let bytes_per_image = bytes_per_row * texel_block_rows_per_image;
let bytes_in_last_slice = let bytes_in_last_slice = bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
bytes_per_row * (copy_height / block_height - 1) + bytes_in_a_complete_row;
bytes_per_image * (copy_depth - 1) + bytes_in_last_slice bytes_per_image * (copy_depth - 1) + bytes_in_last_slice
}; };
if rows_per_image != 0 && rows_per_image < copy_height { if rows_per_image < copy_height {
return Err(TransferError::InvalidRowsPerImage); return Err(TransferError::InvalidRowsPerImage);
} }
if offset + required_bytes_in_copy > buffer_size { if offset + required_bytes_in_copy > buffer_size {
@ -212,12 +240,9 @@ pub(crate) fn validate_linear_texture_data(
if offset % block_size != 0 { if offset % block_size != 0 {
return Err(TransferError::UnalignedBufferOffset(offset)); return Err(TransferError::UnalignedBufferOffset(offset));
} }
if copy_height > 1 && bytes_per_row < bytes_in_a_complete_row { if copy_height > 1 && bytes_per_row < bytes_in_last_row {
return Err(TransferError::InvalidBytesPerRow); return Err(TransferError::InvalidBytesPerRow);
} }
if copy_depth > 1 && rows_per_image == 0 {
return Err(TransferError::InvalidRowsPerImage);
}
Ok(required_bytes_in_copy) Ok(required_bytes_in_copy)
} }
@ -507,18 +532,10 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
} }
let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_texture)); let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_texture));
let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT;
let bytes_per_block = conv::map_texture_format(dst_texture.format, cmd_buf.private_features) let bytes_per_block = conv::map_texture_format(dst_texture.format, cmd_buf.private_features)
.surface_desc() .surface_desc()
.bits as u32 .bits as u32
/ BITS_PER_BYTE; / BITS_PER_BYTE;
let src_bytes_per_row = source.layout.bytes_per_row;
if bytes_per_row_alignment % bytes_per_block != 0 {
return Err(TransferError::UnalignedBytesPerRow.into());
}
if src_bytes_per_row % bytes_per_row_alignment != 0 {
return Err(TransferError::UnalignedBytesPerRow.into());
}
validate_texture_copy_range( validate_texture_copy_range(
destination, destination,
dst_texture.format, dst_texture.format,
@ -533,6 +550,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
CopySide::Source, CopySide::Source,
bytes_per_block as BufferAddress, bytes_per_block as BufferAddress,
copy_size, copy_size,
true,
)?; )?;
cmd_buf.buffer_memory_init_actions.extend( cmd_buf.buffer_memory_init_actions.extend(
@ -562,11 +580,20 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
depth_or_array_layers: copy_size.depth_or_array_layers, depth_or_array_layers: copy_size.depth_or_array_layers,
}; };
let buffer_width = (source.layout.bytes_per_row / bytes_per_block) * block_width as u32; let buffer_width = if let Some(bytes_per_row) = source.layout.bytes_per_row {
(bytes_per_row.get() / bytes_per_block) * block_width as u32
} else {
image_extent.width
};
let buffer_height = if let Some(rows_per_image) = source.layout.rows_per_image {
rows_per_image.get()
} else {
0
};
let region = hal::command::BufferImageCopy { let region = hal::command::BufferImageCopy {
buffer_offset: source.layout.offset, buffer_offset: source.layout.offset,
buffer_width, buffer_width,
buffer_height: source.layout.rows_per_image, buffer_height,
image_layers: dst_layers, image_layers: dst_layers,
image_offset: dst_offset, image_offset: dst_offset,
image_extent: conv::map_extent(&image_extent, dst_texture.dimension), image_extent: conv::map_extent(&image_extent, dst_texture.dimension),
@ -655,18 +682,10 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
} }
let dst_barrier = dst_barriers.map(|pending| pending.into_hal(dst_buffer)); let dst_barrier = dst_barriers.map(|pending| pending.into_hal(dst_buffer));
let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT;
let bytes_per_block = conv::map_texture_format(src_texture.format, cmd_buf.private_features) let bytes_per_block = conv::map_texture_format(src_texture.format, cmd_buf.private_features)
.surface_desc() .surface_desc()
.bits as u32 .bits as u32
/ BITS_PER_BYTE; / BITS_PER_BYTE;
let dst_bytes_per_row = destination.layout.bytes_per_row;
if bytes_per_row_alignment % bytes_per_block != 0 {
return Err(TransferError::UnalignedBytesPerRow.into());
}
if dst_bytes_per_row % bytes_per_row_alignment != 0 {
return Err(TransferError::UnalignedBytesPerRow.into());
}
validate_texture_copy_range( validate_texture_copy_range(
source, source,
src_texture.format, src_texture.format,
@ -681,6 +700,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
CopySide::Destination, CopySide::Destination,
bytes_per_block as BufferAddress, bytes_per_block as BufferAddress,
copy_size, copy_size,
true,
)?; )?;
let (block_width, _) = src_texture.format.describe().block_dimensions; let (block_width, _) = src_texture.format.describe().block_dimensions;
@ -713,12 +733,20 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
depth_or_array_layers: copy_size.depth_or_array_layers, depth_or_array_layers: copy_size.depth_or_array_layers,
}; };
let buffer_width = let buffer_width = if let Some(bytes_per_row) = destination.layout.bytes_per_row {
(destination.layout.bytes_per_row / bytes_per_block) * block_width as u32; (bytes_per_row.get() / bytes_per_block) * block_width as u32
} else {
image_extent.width
};
let buffer_height = if let Some(rows_per_image) = destination.layout.rows_per_image {
rows_per_image.get()
} else {
0
};
let region = hal::command::BufferImageCopy { let region = hal::command::BufferImageCopy {
buffer_offset: destination.layout.offset, buffer_offset: destination.layout.offset,
buffer_width, buffer_width,
buffer_height: destination.layout.rows_per_image, buffer_height,
image_layers: src_layers, image_layers: src_layers,
image_offset: src_offset, image_offset: src_offset,
image_extent: conv::map_extent(&image_extent, src_texture.dimension), image_extent: conv::map_extent(&image_extent, src_texture.dimension),

View File

@ -340,6 +340,7 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
CopySide::Source, CopySide::Source,
bytes_per_block as wgt::BufferAddress, bytes_per_block as wgt::BufferAddress,
size, size,
false,
)?; )?;
let (block_width, block_height) = texture_format.describe().block_dimensions; let (block_width, block_height) = texture_format.describe().block_dimensions;
@ -352,8 +353,13 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let width_blocks = size.width / block_width; let width_blocks = size.width / block_width;
let height_blocks = size.height / block_width; let height_blocks = size.height / block_width;
let texel_rows_per_image = data_layout.rows_per_image; let texel_rows_per_image = if let Some(rows_per_image) = data_layout.rows_per_image {
let block_rows_per_image = data_layout.rows_per_image / block_height; rows_per_image.get()
} else {
// doesn't really matter because we need this only if we copy more than one layer, and then we validate for this being not None
size.height
};
let block_rows_per_image = texel_rows_per_image / block_height;
let bytes_per_row_alignment = get_lowest_common_denom( let bytes_per_row_alignment = get_lowest_common_denom(
device.hal_limits.optimal_buffer_copy_pitch_alignment as u32, device.hal_limits.optimal_buffer_copy_pitch_alignment as u32,
@ -397,21 +403,25 @@ impl<G: GlobalIdentityHandlerFactory> Global<G> {
let ptr = stage.memory.map(&device.raw, 0, stage_size)?; let ptr = stage.memory.map(&device.raw, 0, stage_size)?;
unsafe { unsafe {
let bytes_per_row = if let Some(bytes_per_row) = data_layout.bytes_per_row {
bytes_per_row.get()
} else {
width_blocks * bytes_per_block
};
//TODO: https://github.com/zakarumych/gpu-alloc/issues/13 //TODO: https://github.com/zakarumych/gpu-alloc/issues/13
if stage_bytes_per_row == data_layout.bytes_per_row { if stage_bytes_per_row == bytes_per_row {
// Fast path if the data isalready being aligned optimally. // Fast path if the data isalready being aligned optimally.
ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize); ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize);
} else { } else {
// Copy row by row into the optimal alignment. // Copy row by row into the optimal alignment.
let copy_bytes_per_row = let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
stage_bytes_per_row.min(data_layout.bytes_per_row) as usize;
for layer in 0..size.depth_or_array_layers { for layer in 0..size.depth_or_array_layers {
let rows_offset = layer * block_rows_per_image; let rows_offset = layer * block_rows_per_image;
for row in 0..height_blocks { for row in 0..height_blocks {
ptr::copy_nonoverlapping( ptr::copy_nonoverlapping(
data.as_ptr().offset( data.as_ptr()
(rows_offset + row) as isize * data_layout.bytes_per_row as isize, .offset((rows_offset + row) as isize * bytes_per_row as isize),
),
ptr.as_ptr().offset( ptr.as_ptr().offset(
(rows_offset + row) as isize * stage_bytes_per_row as isize, (rows_offset + row) as isize * stage_bytes_per_row as isize,
), ),

View File

@ -2507,18 +2507,18 @@ pub struct TextureDataLayout {
/// For non-compressed textures, this is 1. /// For non-compressed textures, this is 1.
pub offset: BufferAddress, pub offset: BufferAddress,
/// Bytes per "row" of the image. This represents one row of pixels in the x direction. Compressed /// Bytes per "row" of the image. This represents one row of pixels in the x direction. Compressed
/// textures include multiple rows of pixels in each "row". May be 0 for 1D texture copies. /// textures include multiple rows of pixels in each "row".
/// Required if there are multiple rows (i.e. height or depth is more than one pixel or pixel block for compressed textures)
/// ///
/// Must be a multiple of 256 for [`CommandEncoder::copy_buffer_to_texture`] and [`CommandEncoder::copy_texture_to_buffer`]. /// Must be a multiple of 256 for [`CommandEncoder::copy_buffer_to_texture`] and [`CommandEncoder::copy_texture_to_buffer`].
/// [`Queue::write_texture`] does not have this requirement. /// [`Queue::write_texture`] does not have this requirement.
/// ///
/// Must be a multiple of the texture block size. For non-compressed textures, this is 1. /// Must be a multiple of the texture block size. For non-compressed textures, this is 1.
pub bytes_per_row: u32, pub bytes_per_row: Option<NonZeroU32>,
/// Rows that make up a single "image". Each "image" is one layer in the z direction of a 3D image. May be larger /// Rows that make up a single "image". Each "image" is one layer in the z direction of a 3D image. May be larger
/// than `copy_size.y`. /// than `copy_size.y`.
/// /// Required if there are multiple images (i.e. the depth is more than one)
/// May be 0 for 2D texture copies. pub rows_per_image: Option<NonZeroU32>,
pub rows_per_image: u32,
} }
/// Specific type of a buffer binding. /// Specific type of a buffer binding.