diff --git a/compiler/rustc_codegen_llvm/src/intrinsic.rs b/compiler/rustc_codegen_llvm/src/intrinsic.rs index b5b0086f740..e02c61cd296 100644 --- a/compiler/rustc_codegen_llvm/src/intrinsic.rs +++ b/compiler/rustc_codegen_llvm/src/intrinsic.rs @@ -1121,8 +1121,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>( if name == sym::simd_select_bitmask { let (len, _) = require_simd!(arg_tys[1], SimdArgument); - let expected_int_bits = (len.max(8) - 1).next_power_of_two(); - let expected_bytes = len / 8 + ((len % 8 > 0) as u64); + let expected_int_bits = len.max(8).next_power_of_two(); + let expected_bytes = len.div_ceil(8); let mask_ty = arg_tys[0]; let mask = match mask_ty.kind() { @@ -1379,17 +1379,16 @@ fn generic_simd_intrinsic<'ll, 'tcx>( } if name == sym::simd_bitmask { - // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a - // vector mask and returns the most significant bit (MSB) of each lane in the form - // of either: + // The `fn simd_bitmask(vector) -> unsigned integer` intrinsic takes a vector mask and + // returns one bit for each lane (which must all be `0` or `!0`) in the form of either: // * an unsigned integer // * an array of `u8` // If the vector has less than 8 lanes, a u8 is returned with zeroed trailing bits. // // The bit order of the result depends on the byte endianness, LSB-first for little // endian and MSB-first for big endian. - let expected_int_bits = in_len.max(8); - let expected_bytes = expected_int_bits / 8 + ((expected_int_bits % 8 > 0) as u64); + let expected_int_bits = in_len.max(8).next_power_of_two(); + let expected_bytes = in_len.div_ceil(8); // Integer vector : let (i_xn, in_elem_bitwidth) = match in_elem.kind() { @@ -1409,7 +1408,8 @@ fn generic_simd_intrinsic<'ll, 'tcx>( }), }; - // Shift the MSB to the right by "in_elem_bitwidth - 1" into the first bit position. + // LLVM doesn't always know the inputs are `0` or `!0`, so we shift here so it optimizes to + // `pmovmskb` and similar on x86. let shift_indices = vec![ bx.cx.const_int(bx.type_ix(in_elem_bitwidth), (in_elem_bitwidth - 1) as _); diff --git a/library/core/src/intrinsics/simd.rs b/library/core/src/intrinsics/simd.rs index b892e111588..30734c020b3 100644 --- a/library/core/src/intrinsics/simd.rs +++ b/library/core/src/intrinsics/simd.rs @@ -460,7 +460,7 @@ extern "rust-intrinsic" { /// `T` must be an integer vector. /// /// `U` must be either the smallest unsigned integer with at least as many bits as the length - /// of `T`, or the smallest array of `u8` with as many bits as the length of `T`. + /// of `T`, or the smallest array of `u8` with at least as many bits as the length of `T`. /// /// Each element is truncated to a single bit and packed into the result. /// @@ -472,12 +472,19 @@ extern "rust-intrinsic" { /// * On little endian, the least significant bit corresponds to the first vector element. /// * On big endian, the least significant bit corresponds to the last vector element. /// - /// For example, `[!0, 0, !0, !0]` packs to `0b1101` on little endian and `0b1011` on big - /// endian. + /// For example, `[!0, 0, !0, !0]` packs to + /// - `0b1101u8` or `[0b1101]` on little endian, and + /// - `0b1011u8` or `[0b1011]` on big endian. /// - /// To consider a larger example, `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs - /// to `[0b00000001, 0b01000011]` or `0b0100001100000001` on little endian, and `[0b10000000, - /// 0b11000010]` or `0b1000000011000010` on big endian. + /// To consider a larger example, + /// `[!0, 0, 0, 0, 0, 0, 0, 0, !0, !0, 0, 0, 0, 0, !0, 0]` packs to + /// - `0b0100001100000001u16` or `[0b00000001, 0b01000011]` on little endian, and + /// - `0b1000000011000010u16` or `[0b10000000, 0b11000010]` on big endian. + /// + /// And finally, a non-power-of-2 example with multiple bytes: + /// `[!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]` packs to + /// - `0b0101001011u16` or `[0b01001011, 0b01]` on little endian, and + /// - `0b1101001010u16` or `[0b11, 0b01001010]` on big endian. /// /// # Safety /// `x` must contain only `0` and `!0`. diff --git a/tests/ui/simd/simd-bitmask-notpow2.rs b/tests/ui/simd/simd-bitmask-notpow2.rs new file mode 100644 index 00000000000..ff43206a3fd --- /dev/null +++ b/tests/ui/simd/simd-bitmask-notpow2.rs @@ -0,0 +1,90 @@ +//@run-pass +// SEGFAULTS on LLVM 17. This should be merged into `simd-bitmask` once we require LLVM 18. +//@ min-llvm-version: 18 +// FIXME: broken codegen on big-endian (https://github.com/rust-lang/rust/issues/127205) +//@ ignore-endian-big +#![feature(repr_simd, intrinsics)] + +extern "rust-intrinsic" { + fn simd_bitmask(v: T) -> U; + fn simd_select_bitmask(m: T, a: U, b: U) -> U; +} + +fn main() { + // Non-power-of-2 multi-byte mask. + #[repr(simd, packed)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone, Debug, PartialEq)] + struct i32x10([i32; 10]); + impl i32x10 { + fn splat(x: i32) -> Self { + Self([x; 10]) + } + } + unsafe { + let mask = i32x10([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0]); + let mask_bits = if cfg!(target_endian = "little") { 0b0101001011 } else { 0b1101001010 }; + let mask_bytes = + if cfg!(target_endian = "little") { [0b01001011, 0b01] } else { [0b11, 0b01001010] }; + + let bitmask1: u16 = simd_bitmask(mask); + let bitmask2: [u8; 2] = simd_bitmask(mask); + assert_eq!(bitmask1, mask_bits); + assert_eq!(bitmask2, mask_bytes); + + let selected1 = simd_select_bitmask::( + mask_bits, + i32x10::splat(!0), // yes + i32x10::splat(0), // no + ); + let selected2 = simd_select_bitmask::<[u8; 2], _>( + mask_bytes, + i32x10::splat(!0), // yes + i32x10::splat(0), // no + ); + assert_eq!(selected1, mask); + assert_eq!(selected2, mask); + } + + // Test for a mask where the next multiple of 8 is not a power of two. + #[repr(simd, packed)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone, Debug, PartialEq)] + struct i32x20([i32; 20]); + impl i32x20 { + fn splat(x: i32) -> Self { + Self([x; 20]) + } + } + unsafe { + let mask = i32x20([!0, !0, 0, !0, 0, 0, !0, 0, !0, 0, 0, 0, 0, !0, !0, !0, !0, !0, !0, !0]); + let mask_bits = if cfg!(target_endian = "little") { + 0b11111110000101001011 + } else { + 0b11010010100001111111 + }; + let mask_bytes = if cfg!(target_endian = "little") { + [0b01001011, 0b11100001, 0b1111] + } else { + [0b1101, 0b00101000, 0b01111111] + }; + + let bitmask1: u32 = simd_bitmask(mask); + let bitmask2: [u8; 3] = simd_bitmask(mask); + assert_eq!(bitmask1, mask_bits); + assert_eq!(bitmask2, mask_bytes); + + let selected1 = simd_select_bitmask::( + mask_bits, + i32x20::splat(!0), // yes + i32x20::splat(0), // no + ); + let selected2 = simd_select_bitmask::<[u8; 3], _>( + mask_bytes, + i32x20::splat(!0), // yes + i32x20::splat(0), // no + ); + assert_eq!(selected1, mask); + assert_eq!(selected2, mask); + } +} diff --git a/tests/ui/simd/simd-bitmask.rs b/tests/ui/simd/simd-bitmask.rs index 4a7c3bc7750..82f73fca951 100644 --- a/tests/ui/simd/simd-bitmask.rs +++ b/tests/ui/simd/simd-bitmask.rs @@ -1,5 +1,4 @@ //@run-pass -//@ignore-endian-big behavior of simd_select_bitmask is endian-specific #![feature(repr_simd, intrinsics)] extern "rust-intrinsic" { @@ -17,36 +16,58 @@ fn main() { let i: u8 = simd_bitmask(v); let a: [u8; 1] = simd_bitmask(v); - assert_eq!(i, 0b0101); - assert_eq!(a, [0b0101]); + if cfg!(target_endian = "little") { + assert_eq!(i, 0b0101); + assert_eq!(a, [0b0101]); + } else { + assert_eq!(i, 0b1010); + assert_eq!(a, [0b1010]); + } let v = Simd::([0, 0, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, -1, 0]); let i: u16 = simd_bitmask(v); let a: [u8; 2] = simd_bitmask(v); - assert_eq!(i, 0b0101000000001100); - assert_eq!(a, [0b1100, 0b01010000]); + if cfg!(target_endian = "little") { + assert_eq!(i, 0b0101000000001100); + assert_eq!(a, [0b00001100, 0b01010000]); + } else { + assert_eq!(i, 0b0011000000001010); + assert_eq!(a, [0b00110000, 0b00001010]); + } } unsafe { - let a = Simd::([0, 1, 2, 3, 4, 5, 6, 7]); - let b = Simd::([8, 9, 10, 11, 12, 13, 14, 15]); - let e = [0, 9, 2, 11, 12, 13, 14, 15]; + let a = Simd::([0, 1, 2, 3]); + let b = Simd::([8, 9, 10, 11]); + let e = [0, 9, 2, 11]; - let r = simd_select_bitmask(0b0101u8, a, b); + let mask = if cfg!(target_endian = "little") { 0b0101u8 } else { 0b1010u8 }; + let r = simd_select_bitmask(mask, a, b); assert_eq!(r.0, e); - let r = simd_select_bitmask([0b0101u8], a, b); + let mask = if cfg!(target_endian = "little") { [0b0101u8] } else { [0b1010u8] }; + let r = simd_select_bitmask(mask, a, b); assert_eq!(r.0, e); let a = Simd::([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]); let b = Simd::([16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]); let e = [16, 17, 2, 3, 20, 21, 22, 23, 24, 25, 26, 27, 12, 29, 14, 31]; - let r = simd_select_bitmask(0b0101000000001100u16, a, b); + let mask = if cfg!(target_endian = "little") { + 0b0101000000001100u16 + } else { + 0b0011000000001010u16 + }; + let r = simd_select_bitmask(mask, a, b); assert_eq!(r.0, e); - let r = simd_select_bitmask([0b1100u8, 0b01010000u8], a, b); + let mask = if cfg!(target_endian = "little") { + [0b00001100u8, 0b01010000u8] + } else { + [0b00110000u8, 0b00001010u8] + }; + let r = simd_select_bitmask(mask, a, b); assert_eq!(r.0, e); } }