From 50043875e5959bb33e41e81d28670b68d70382dc Mon Sep 17 00:00:00 2001 From: Andreas Reich Date: Sat, 20 Mar 2021 11:53:14 +0100 Subject: [PATCH] rows_per_image & rows_per_image are now optional Fixes #988 --- player/tests/data/quad.ron | 4 +- wgpu-core/src/command/transfer.rs | 104 +++++++++++++++++++----------- wgpu-core/src/device/queue.rs | 26 +++++--- wgpu-types/src/lib.rs | 10 +-- 4 files changed, 91 insertions(+), 53 deletions(-) diff --git a/player/tests/data/quad.ron b/player/tests/data/quad.ron index c6f690a8c..69e9e4735 100644 --- a/player/tests/data/quad.ron +++ b/player/tests/data/quad.ron @@ -123,8 +123,8 @@ buffer: Id(0, 1, Empty), layout: ( offset: 0, - bytes_per_row: 256, - rows_per_image: 64, + bytes_per_row: Some(256), + rows_per_image: Some(64), ), ), size: ( diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index b437acce9..e589fce35 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -76,6 +76,10 @@ pub enum TransferError { UnalignedBytesPerRow, #[error("number of rows per image is not a multiple of block height")] UnalignedRowsPerImage, + #[error("number of bytes per row needs to be specified since more than one row is copied")] + UnspecifiedBytesPerRow, + #[error("number of rows per image needs to be specified since more than one image is copied")] + UnspecifiedRowsPerImage, #[error("number of bytes per row is less than the number of bytes in a complete row")] InvalidBytesPerRow, #[error("image is 1D and the copy height and depth are not both set to 1")] @@ -148,7 +152,7 @@ pub(crate) fn texture_copy_view_to_hal( )) } -/// Function copied with minor modifications from webgpu standard https://gpuweb.github.io/gpuweb/#valid-texture-copy-range +/// Function copied with some modifications from webgpu standard /// If successful, returns number of buffer bytes required for this copy. pub(crate) fn validate_linear_texture_data( layout: &wgt::TextureDataLayout, @@ -157,6 +161,7 @@ pub(crate) fn validate_linear_texture_data( buffer_side: CopySide, bytes_per_block: BufferAddress, copy_size: &Extent3d, + need_copy_aligned_rows: bool, ) -> Result { // Convert all inputs to BufferAddress (u64) to prevent overflow issues let copy_width = copy_size.width as BufferAddress; @@ -164,14 +169,32 @@ pub(crate) fn validate_linear_texture_data( let copy_depth = copy_size.depth_or_array_layers as BufferAddress; let offset = layout.offset; - let rows_per_image = layout.rows_per_image as BufferAddress; - let bytes_per_row = layout.bytes_per_row as BufferAddress; let (block_width, block_height) = format.describe().block_dimensions; let block_width = block_width as BufferAddress; let block_height = block_height as BufferAddress; let block_size = bytes_per_block; + let width_in_blocks = copy_width / block_width; + let height_in_blocks = copy_height / block_height; + + let bytes_per_row = if let Some(bytes_per_row) = layout.bytes_per_row { + bytes_per_row.get() as BufferAddress + } else { + if copy_depth > 1 || block_height > 1 { + return Err(TransferError::UnspecifiedBytesPerRow); + } + bytes_per_block * width_in_blocks + }; + let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { + rows_per_image.get() as BufferAddress + } else { + if copy_depth > 1 { + return Err(TransferError::UnspecifiedRowsPerImage); + } + copy_height + }; + if copy_width % block_width != 0 { return Err(TransferError::UnalignedCopyWidth); } @@ -182,23 +205,28 @@ pub(crate) fn validate_linear_texture_data( return Err(TransferError::UnalignedRowsPerImage); } - let bytes_in_a_complete_row = block_size * copy_width / block_width; + if need_copy_aligned_rows { + let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT as BufferAddress; + + if bytes_per_row_alignment % bytes_per_block != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } + if bytes_per_row % bytes_per_row_alignment != 0 { + return Err(TransferError::UnalignedBytesPerRow); + } + } + + let bytes_in_last_row = block_size * width_in_blocks; let required_bytes_in_copy = if copy_width == 0 || copy_height == 0 || copy_depth == 0 { 0 } else { - let actual_rows_per_image = if rows_per_image == 0 { - copy_height - } else { - rows_per_image - }; - let texel_block_rows_per_image = actual_rows_per_image / block_height; + let texel_block_rows_per_image = rows_per_image / block_height; let bytes_per_image = bytes_per_row * texel_block_rows_per_image; - let bytes_in_last_slice = - bytes_per_row * (copy_height / block_height - 1) + bytes_in_a_complete_row; + let bytes_in_last_slice = bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; bytes_per_image * (copy_depth - 1) + bytes_in_last_slice }; - if rows_per_image != 0 && rows_per_image < copy_height { + if rows_per_image < copy_height { return Err(TransferError::InvalidRowsPerImage); } if offset + required_bytes_in_copy > buffer_size { @@ -212,12 +240,9 @@ pub(crate) fn validate_linear_texture_data( if offset % block_size != 0 { return Err(TransferError::UnalignedBufferOffset(offset)); } - if copy_height > 1 && bytes_per_row < bytes_in_a_complete_row { + if copy_height > 1 && bytes_per_row < bytes_in_last_row { return Err(TransferError::InvalidBytesPerRow); } - if copy_depth > 1 && rows_per_image == 0 { - return Err(TransferError::InvalidRowsPerImage); - } Ok(required_bytes_in_copy) } @@ -507,18 +532,10 @@ impl Global { } let dst_barriers = dst_pending.map(|pending| pending.into_hal(dst_texture)); - let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT; let bytes_per_block = conv::map_texture_format(dst_texture.format, cmd_buf.private_features) .surface_desc() .bits as u32 / BITS_PER_BYTE; - let src_bytes_per_row = source.layout.bytes_per_row; - if bytes_per_row_alignment % bytes_per_block != 0 { - return Err(TransferError::UnalignedBytesPerRow.into()); - } - if src_bytes_per_row % bytes_per_row_alignment != 0 { - return Err(TransferError::UnalignedBytesPerRow.into()); - } validate_texture_copy_range( destination, dst_texture.format, @@ -533,6 +550,7 @@ impl Global { CopySide::Source, bytes_per_block as BufferAddress, copy_size, + true, )?; cmd_buf.buffer_memory_init_actions.extend( @@ -562,11 +580,20 @@ impl Global { depth_or_array_layers: copy_size.depth_or_array_layers, }; - let buffer_width = (source.layout.bytes_per_row / bytes_per_block) * block_width as u32; + let buffer_width = if let Some(bytes_per_row) = source.layout.bytes_per_row { + (bytes_per_row.get() / bytes_per_block) * block_width as u32 + } else { + image_extent.width + }; + let buffer_height = if let Some(rows_per_image) = source.layout.rows_per_image { + rows_per_image.get() + } else { + 0 + }; let region = hal::command::BufferImageCopy { buffer_offset: source.layout.offset, buffer_width, - buffer_height: source.layout.rows_per_image, + buffer_height, image_layers: dst_layers, image_offset: dst_offset, image_extent: conv::map_extent(&image_extent, dst_texture.dimension), @@ -655,18 +682,10 @@ impl Global { } let dst_barrier = dst_barriers.map(|pending| pending.into_hal(dst_buffer)); - let bytes_per_row_alignment = wgt::COPY_BYTES_PER_ROW_ALIGNMENT; let bytes_per_block = conv::map_texture_format(src_texture.format, cmd_buf.private_features) .surface_desc() .bits as u32 / BITS_PER_BYTE; - let dst_bytes_per_row = destination.layout.bytes_per_row; - if bytes_per_row_alignment % bytes_per_block != 0 { - return Err(TransferError::UnalignedBytesPerRow.into()); - } - if dst_bytes_per_row % bytes_per_row_alignment != 0 { - return Err(TransferError::UnalignedBytesPerRow.into()); - } validate_texture_copy_range( source, src_texture.format, @@ -681,6 +700,7 @@ impl Global { CopySide::Destination, bytes_per_block as BufferAddress, copy_size, + true, )?; let (block_width, _) = src_texture.format.describe().block_dimensions; @@ -713,12 +733,20 @@ impl Global { depth_or_array_layers: copy_size.depth_or_array_layers, }; - let buffer_width = - (destination.layout.bytes_per_row / bytes_per_block) * block_width as u32; + let buffer_width = if let Some(bytes_per_row) = destination.layout.bytes_per_row { + (bytes_per_row.get() / bytes_per_block) * block_width as u32 + } else { + image_extent.width + }; + let buffer_height = if let Some(rows_per_image) = destination.layout.rows_per_image { + rows_per_image.get() + } else { + 0 + }; let region = hal::command::BufferImageCopy { buffer_offset: destination.layout.offset, buffer_width, - buffer_height: destination.layout.rows_per_image, + buffer_height, image_layers: src_layers, image_offset: src_offset, image_extent: conv::map_extent(&image_extent, src_texture.dimension), diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index b8d038473..1ed433898 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -340,6 +340,7 @@ impl Global { CopySide::Source, bytes_per_block as wgt::BufferAddress, size, + false, )?; let (block_width, block_height) = texture_format.describe().block_dimensions; @@ -352,8 +353,13 @@ impl Global { let width_blocks = size.width / block_width; let height_blocks = size.height / block_width; - let texel_rows_per_image = data_layout.rows_per_image; - let block_rows_per_image = data_layout.rows_per_image / block_height; + let texel_rows_per_image = if let Some(rows_per_image) = data_layout.rows_per_image { + rows_per_image.get() + } else { + // doesn't really matter because we need this only if we copy more than one layer, and then we validate for this being not None + size.height + }; + let block_rows_per_image = texel_rows_per_image / block_height; let bytes_per_row_alignment = get_lowest_common_denom( device.hal_limits.optimal_buffer_copy_pitch_alignment as u32, @@ -397,21 +403,25 @@ impl Global { let ptr = stage.memory.map(&device.raw, 0, stage_size)?; unsafe { + let bytes_per_row = if let Some(bytes_per_row) = data_layout.bytes_per_row { + bytes_per_row.get() + } else { + width_blocks * bytes_per_block + }; + //TODO: https://github.com/zakarumych/gpu-alloc/issues/13 - if stage_bytes_per_row == data_layout.bytes_per_row { + if stage_bytes_per_row == bytes_per_row { // Fast path if the data isalready being aligned optimally. ptr::copy_nonoverlapping(data.as_ptr(), ptr.as_ptr(), stage_size as usize); } else { // Copy row by row into the optimal alignment. - let copy_bytes_per_row = - stage_bytes_per_row.min(data_layout.bytes_per_row) as usize; + let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; for layer in 0..size.depth_or_array_layers { let rows_offset = layer * block_rows_per_image; for row in 0..height_blocks { ptr::copy_nonoverlapping( - data.as_ptr().offset( - (rows_offset + row) as isize * data_layout.bytes_per_row as isize, - ), + data.as_ptr() + .offset((rows_offset + row) as isize * bytes_per_row as isize), ptr.as_ptr().offset( (rows_offset + row) as isize * stage_bytes_per_row as isize, ), diff --git a/wgpu-types/src/lib.rs b/wgpu-types/src/lib.rs index ca745f795..fd231571a 100644 --- a/wgpu-types/src/lib.rs +++ b/wgpu-types/src/lib.rs @@ -2507,18 +2507,18 @@ pub struct TextureDataLayout { /// For non-compressed textures, this is 1. pub offset: BufferAddress, /// Bytes per "row" of the image. This represents one row of pixels in the x direction. Compressed - /// textures include multiple rows of pixels in each "row". May be 0 for 1D texture copies. + /// textures include multiple rows of pixels in each "row". + /// Required if there are multiple rows (i.e. height or depth is more than one pixel or pixel block for compressed textures) /// /// Must be a multiple of 256 for [`CommandEncoder::copy_buffer_to_texture`] and [`CommandEncoder::copy_texture_to_buffer`]. /// [`Queue::write_texture`] does not have this requirement. /// /// Must be a multiple of the texture block size. For non-compressed textures, this is 1. - pub bytes_per_row: u32, + pub bytes_per_row: Option, /// Rows that make up a single "image". Each "image" is one layer in the z direction of a 3D image. May be larger /// than `copy_size.y`. - /// - /// May be 0 for 2D texture copies. - pub rows_per_image: u32, + /// Required if there are multiple images (i.e. the depth is more than one) + pub rows_per_image: Option, } /// Specific type of a buffer binding.