mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-02 10:04:23 +00:00
Make some usize
-typed masks definition agnostic to the size of usize
Some masks where defined as ```rust const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize; ``` where it was assumed that `usize` is never wider than 64, which is currently true. To make those constants valid in a hypothetical 128-bit target, these constants have been redefined in an `usize`-width-agnostic way ```rust const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; size_of::<usize>()]); ``` There are already some cases where Rust anticipates the possibility of supporting 128-bit targets, such as not implementing `From<usize>` for `u64`.
This commit is contained in:
parent
e7575f9670
commit
93ae6f80e3
@ -77,6 +77,6 @@ fn is_ascii_align_to_unrolled(bytes: &[u8]) -> bool {
|
||||
|
||||
#[inline]
|
||||
fn contains_nonascii(v: usize) -> bool {
|
||||
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
const NONASCII_MASK: usize = usize::from_ne_bytes([0x80; core::mem::size_of::<usize>()]);
|
||||
(NONASCII_MASK & v) != 0
|
||||
}
|
||||
|
@ -890,6 +890,27 @@ impl usize {
|
||||
widening_impl! { usize, u128, 64, unsigned }
|
||||
}
|
||||
|
||||
impl usize {
|
||||
/// Returns an `usize` where every byte is equal to `x`.
|
||||
#[inline]
|
||||
pub(crate) const fn repeat_u8(x: u8) -> usize {
|
||||
usize::from_ne_bytes([x; mem::size_of::<usize>()])
|
||||
}
|
||||
|
||||
/// Returns an `usize` where every byte pair is equal to `x`.
|
||||
#[inline]
|
||||
pub(crate) const fn repeat_u16(x: u16) -> usize {
|
||||
let mut r = 0usize;
|
||||
let mut i = 0;
|
||||
while i < mem::size_of::<usize>() {
|
||||
// Use `wrapping_shl` to make it work on targets with 16-bit `usize`
|
||||
r = r.wrapping_shl(16) | (x as usize);
|
||||
i += 2;
|
||||
}
|
||||
r
|
||||
}
|
||||
}
|
||||
|
||||
/// A classification of floating point numbers.
|
||||
///
|
||||
/// This `enum` is used as the return type for [`f32::classify`] and [`f64::classify`]. See
|
||||
|
@ -235,7 +235,7 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
|
||||
/// from `../str/mod.rs`, which does something similar for utf8 validation.
|
||||
#[inline]
|
||||
fn contains_nonascii(v: usize) -> bool {
|
||||
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
|
||||
(NONASCII_MASK & v) != 0
|
||||
}
|
||||
|
||||
|
@ -4,12 +4,8 @@
|
||||
use crate::cmp;
|
||||
use crate::mem;
|
||||
|
||||
const LO_U64: u64 = 0x0101010101010101;
|
||||
const HI_U64: u64 = 0x8080808080808080;
|
||||
|
||||
// Use truncation.
|
||||
const LO_USIZE: usize = LO_U64 as usize;
|
||||
const HI_USIZE: usize = HI_U64 as usize;
|
||||
const LO_USIZE: usize = usize::repeat_u8(0x01);
|
||||
const HI_USIZE: usize = usize::repeat_u8(0x80);
|
||||
const USIZE_BYTES: usize = mem::size_of::<usize>();
|
||||
|
||||
/// Returns `true` if `x` contains any zero byte.
|
||||
|
@ -112,7 +112,7 @@ fn do_count_chars(s: &str) -> usize {
|
||||
// true)
|
||||
#[inline]
|
||||
fn contains_non_continuation_byte(w: usize) -> usize {
|
||||
const LSB: usize = 0x0101_0101_0101_0101u64 as usize;
|
||||
const LSB: usize = usize::repeat_u8(0x01);
|
||||
((!w >> 7) | (w >> 6)) & LSB
|
||||
}
|
||||
|
||||
@ -120,8 +120,8 @@ fn contains_non_continuation_byte(w: usize) -> usize {
|
||||
// more efficient.
|
||||
#[inline]
|
||||
fn sum_bytes_in_usize(values: usize) -> usize {
|
||||
const LSB_SHORTS: usize = 0x0001_0001_0001_0001_u64 as usize;
|
||||
const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize;
|
||||
const LSB_SHORTS: usize = usize::repeat_u16(0x0001);
|
||||
const SKIP_BYTES: usize = usize::repeat_u16(0x00ff);
|
||||
|
||||
let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES);
|
||||
pair_sum.wrapping_mul(LSB_SHORTS) >> ((USIZE_SIZE - 2) * 8)
|
||||
|
@ -112,8 +112,7 @@ where
|
||||
Some(ch)
|
||||
}
|
||||
|
||||
// use truncation to fit u64 into usize
|
||||
const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
|
||||
const NONASCII_MASK: usize = usize::repeat_u8(0x80);
|
||||
|
||||
/// Returns `true` if any byte in the word `x` is nonascii (>= 128).
|
||||
#[inline]
|
||||
|
Loading…
Reference in New Issue
Block a user