mirror of
https://github.com/rust-lang/rust.git
synced 2025-04-15 05:26:47 +00:00
Merge 9109550e4c
into 65fa0ab924
This commit is contained in:
commit
fd172f7d14
@ -178,8 +178,8 @@ def main():
|
||||
else:
|
||||
normal0.append((a, b - a))
|
||||
|
||||
singletons0u, singletons0l = compress_singletons(singletons0)
|
||||
singletons1u, singletons1l = compress_singletons(singletons1)
|
||||
SINGLETONS0_UPPER, SINGLETONS0_LOWER = compress_singletons(singletons0)
|
||||
SINGLETONS1_UPPER, SINGLETONS1_LOWER = compress_singletons(singletons1)
|
||||
normal0 = compress_normal(normal0)
|
||||
normal1 = compress_normal(normal1)
|
||||
|
||||
@ -187,21 +187,35 @@ def main():
|
||||
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
|
||||
// do not edit directly!
|
||||
|
||||
fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
|
||||
let xupper = (x >> 8) as u8;
|
||||
let mut lowerstart = 0;
|
||||
for &(upper, lowercount) in singletonuppers {
|
||||
let lowerend = lowerstart + lowercount as usize;
|
||||
if xupper == upper {
|
||||
for &lower in &singletonlowers[lowerstart..lowerend] {
|
||||
if lower == x as u8 {
|
||||
/// # Safety
|
||||
///
|
||||
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
|
||||
/// equal to the length of `singletons_lower`.
|
||||
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
|
||||
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
|
||||
unsafe fn check(
|
||||
x: u16,
|
||||
singletons_upper: &[(u8, u8)],
|
||||
singletons_lower: &[u8],
|
||||
normal: &[u8],
|
||||
) -> bool {
|
||||
let [x_upper, x_lower] = x.to_be_bytes();
|
||||
let mut lower_start = 0;
|
||||
for &(upper, lower_count) in singletons_upper {
|
||||
let lower_end = lower_start + lower_count as usize;
|
||||
if upper == x_upper {
|
||||
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
|
||||
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
|
||||
// less than `singletons_lower.len()`.
|
||||
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
|
||||
if lower == x_lower {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if xupper < upper {
|
||||
} else if x_upper < upper {
|
||||
break;
|
||||
}
|
||||
lowerstart = lowerend;
|
||||
lower_start = lower_end;
|
||||
}
|
||||
|
||||
let mut x = x as i32;
|
||||
@ -209,9 +223,14 @@ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &
|
||||
let mut current = true;
|
||||
while let Some(v) = normal.next() {
|
||||
let len = if v & 0x80 != 0 {
|
||||
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
|
||||
let upper = v & 0x7f;
|
||||
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
|
||||
// if the length is greater than 0x7f, it consists of two bytes, so there
|
||||
// must be a next byte.
|
||||
let lower = unsafe { normal.next().unwrap_unchecked() };
|
||||
i32::from(u16::from_be_bytes([upper, lower]))
|
||||
} else {
|
||||
v as i32
|
||||
i32::from(v)
|
||||
};
|
||||
x -= len;
|
||||
if x < 0 {
|
||||
@ -226,30 +245,56 @@ pub(crate) fn is_printable(x: char) -> bool {
|
||||
let x = x as u32;
|
||||
let lower = x as u16;
|
||||
|
||||
if x < 32 {
|
||||
// ASCII fast path
|
||||
false
|
||||
} else if x < 127 {
|
||||
// ASCII fast path
|
||||
true
|
||||
} else if x < 0x10000 {
|
||||
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
|
||||
} else if x < 0x20000 {
|
||||
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
|
||||
} else {\
|
||||
match x {
|
||||
..32 => false, // ASCII fast path
|
||||
..127 => true, // ASCII fast path
|
||||
..0x10000 => {
|
||||
const {
|
||||
let mut lower_count_total = 0;
|
||||
let mut i = 0;
|
||||
while i < SINGLETONS0_UPPER.len() {
|
||||
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
|
||||
i += 1;
|
||||
}
|
||||
assert!(lower_count_total == SINGLETONS0_LOWER.len());
|
||||
}
|
||||
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
|
||||
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
|
||||
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
|
||||
// the length contained in the remaining 15 bits.
|
||||
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
|
||||
}
|
||||
..0x20000 => {
|
||||
const {
|
||||
let mut lower_count_total = 0;
|
||||
let mut i = 0;
|
||||
while i < SINGLETONS1_UPPER.len() {
|
||||
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
|
||||
i += 1;
|
||||
}
|
||||
assert!(lower_count_total == SINGLETONS1_LOWER.len());
|
||||
}
|
||||
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
|
||||
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
|
||||
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
|
||||
// the length contained in the remaining 15 bits.
|
||||
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
|
||||
}\
|
||||
""")
|
||||
for a, b in extra:
|
||||
print(" if 0x{:x} <= x && x < 0x{:x} {{".format(a, a + b))
|
||||
print(" return false;")
|
||||
print(" }")
|
||||
print(" 0x{:x}..0x{:x} => false,".format(a, a + b))
|
||||
print("""\
|
||||
true
|
||||
_ => true,
|
||||
}
|
||||
}\
|
||||
""")
|
||||
print()
|
||||
print_singletons(singletons0u, singletons0l, "SINGLETONS0U", "SINGLETONS0L")
|
||||
print_singletons(singletons1u, singletons1l, "SINGLETONS1U", "SINGLETONS1L")
|
||||
print_singletons(
|
||||
SINGLETONS0_UPPER, SINGLETONS0_LOWER, "SINGLETONS0_UPPER", "SINGLETONS0_LOWER"
|
||||
)
|
||||
print_singletons(
|
||||
SINGLETONS1_UPPER, SINGLETONS1_LOWER, "SINGLETONS1_UPPER", "SINGLETONS1_LOWER"
|
||||
)
|
||||
print_normal(normal0, "NORMAL0")
|
||||
print_normal(normal1, "NORMAL1")
|
||||
|
||||
|
@ -1,21 +1,35 @@
|
||||
// NOTE: The following code was generated by "library/core/src/unicode/printable.py",
|
||||
// do not edit directly!
|
||||
|
||||
fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &[u8]) -> bool {
|
||||
let xupper = (x >> 8) as u8;
|
||||
let mut lowerstart = 0;
|
||||
for &(upper, lowercount) in singletonuppers {
|
||||
let lowerend = lowerstart + lowercount as usize;
|
||||
if xupper == upper {
|
||||
for &lower in &singletonlowers[lowerstart..lowerend] {
|
||||
if lower == x as u8 {
|
||||
/// # Safety
|
||||
///
|
||||
/// - The sum of all lengths (i.e. the second field of each pair) in `singletons_upper` must be
|
||||
/// equal to the length of `singletons_lower`.
|
||||
/// - `normal` must be encoded such that lengths greater than `0x7f` consist of two bytes in big
|
||||
/// endian, with the highest bit set and the length contained in the remaining 15 bits.
|
||||
unsafe fn check(
|
||||
x: u16,
|
||||
singletons_upper: &[(u8, u8)],
|
||||
singletons_lower: &[u8],
|
||||
normal: &[u8],
|
||||
) -> bool {
|
||||
let [x_upper, x_lower] = x.to_be_bytes();
|
||||
let mut lower_start = 0;
|
||||
for &(upper, lower_count) in singletons_upper {
|
||||
let lower_end = lower_start + lower_count as usize;
|
||||
if upper == x_upper {
|
||||
// SAFETY: The caller ensures that the sum of all lengths in `singletons_upper`
|
||||
// is equal to the length of `singletons_lower`, so `lower_end` is guaranteed to be
|
||||
// less than `singletons_lower.len()`.
|
||||
for &lower in unsafe { singletons_lower.get_unchecked(lower_start..lower_end) } {
|
||||
if lower == x_lower {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if xupper < upper {
|
||||
} else if x_upper < upper {
|
||||
break;
|
||||
}
|
||||
lowerstart = lowerend;
|
||||
lower_start = lower_end;
|
||||
}
|
||||
|
||||
let mut x = x as i32;
|
||||
@ -23,9 +37,14 @@ fn check(x: u16, singletonuppers: &[(u8, u8)], singletonlowers: &[u8], normal: &
|
||||
let mut current = true;
|
||||
while let Some(v) = normal.next() {
|
||||
let len = if v & 0x80 != 0 {
|
||||
((v & 0x7f) as i32) << 8 | normal.next().unwrap() as i32
|
||||
let upper = v & 0x7f;
|
||||
// SAFETY: The encoding of `normal` is guaranteed by the caller such that
|
||||
// if the length is greater than 0x7f, it consists of two bytes, so there
|
||||
// must be a next byte.
|
||||
let lower = unsafe { normal.next().unwrap_unchecked() };
|
||||
i32::from(u16::from_be_bytes([upper, lower]))
|
||||
} else {
|
||||
v as i32
|
||||
i32::from(v)
|
||||
};
|
||||
x -= len;
|
||||
if x < 0 {
|
||||
@ -40,53 +59,57 @@ pub(crate) fn is_printable(x: char) -> bool {
|
||||
let x = x as u32;
|
||||
let lower = x as u16;
|
||||
|
||||
if x < 32 {
|
||||
// ASCII fast path
|
||||
false
|
||||
} else if x < 127 {
|
||||
// ASCII fast path
|
||||
true
|
||||
} else if x < 0x10000 {
|
||||
check(lower, SINGLETONS0U, SINGLETONS0L, NORMAL0)
|
||||
} else if x < 0x20000 {
|
||||
check(lower, SINGLETONS1U, SINGLETONS1L, NORMAL1)
|
||||
} else {
|
||||
if 0x2a6e0 <= x && x < 0x2a700 {
|
||||
return false;
|
||||
match x {
|
||||
..32 => false, // ASCII fast path
|
||||
..127 => true, // ASCII fast path
|
||||
..0x10000 => {
|
||||
const {
|
||||
let mut lower_count_total = 0;
|
||||
let mut i = 0;
|
||||
while i < SINGLETONS0_UPPER.len() {
|
||||
lower_count_total += SINGLETONS0_UPPER[i].1 as usize;
|
||||
i += 1;
|
||||
}
|
||||
assert!(lower_count_total == SINGLETONS0_LOWER.len());
|
||||
}
|
||||
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS0_UPPER` is equal
|
||||
// to the length of `SINGLETONS0_LOWER`, and `NORMAL0` is encoded such that lengths
|
||||
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
|
||||
// the length contained in the remaining 15 bits.
|
||||
unsafe { check(lower, SINGLETONS0_UPPER, SINGLETONS0_LOWER, NORMAL0) }
|
||||
}
|
||||
if 0x2b73a <= x && x < 0x2b740 {
|
||||
return false;
|
||||
..0x20000 => {
|
||||
const {
|
||||
let mut lower_count_total = 0;
|
||||
let mut i = 0;
|
||||
while i < SINGLETONS1_UPPER.len() {
|
||||
lower_count_total += SINGLETONS1_UPPER[i].1 as usize;
|
||||
i += 1;
|
||||
}
|
||||
assert!(lower_count_total == SINGLETONS1_LOWER.len());
|
||||
}
|
||||
// SAFETY: We just asserted that the sum of all lengths in `SINGLETONS1_UPPER` is equal
|
||||
// to the length of `SINGLETONS1_LOWER`, and `NORMAL1` is encoded such that lengths
|
||||
// greater than `0x7f` consist of two bytes in big endian, with the highest bit set and
|
||||
// the length contained in the remaining 15 bits.
|
||||
unsafe { check(lower, SINGLETONS1_UPPER, SINGLETONS1_LOWER, NORMAL1) }
|
||||
}
|
||||
if 0x2b81e <= x && x < 0x2b820 {
|
||||
return false;
|
||||
}
|
||||
if 0x2cea2 <= x && x < 0x2ceb0 {
|
||||
return false;
|
||||
}
|
||||
if 0x2ebe1 <= x && x < 0x2ebf0 {
|
||||
return false;
|
||||
}
|
||||
if 0x2ee5e <= x && x < 0x2f800 {
|
||||
return false;
|
||||
}
|
||||
if 0x2fa1e <= x && x < 0x30000 {
|
||||
return false;
|
||||
}
|
||||
if 0x3134b <= x && x < 0x31350 {
|
||||
return false;
|
||||
}
|
||||
if 0x323b0 <= x && x < 0xe0100 {
|
||||
return false;
|
||||
}
|
||||
if 0xe01f0 <= x && x < 0x110000 {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
0x2a6e0..0x2a700 => false,
|
||||
0x2b73a..0x2b740 => false,
|
||||
0x2b81e..0x2b820 => false,
|
||||
0x2cea2..0x2ceb0 => false,
|
||||
0x2ebe1..0x2ebf0 => false,
|
||||
0x2ee5e..0x2f800 => false,
|
||||
0x2fa1e..0x30000 => false,
|
||||
0x3134b..0x31350 => false,
|
||||
0x323b0..0xe0100 => false,
|
||||
0xe01f0..0x110000 => false,
|
||||
_ => true,
|
||||
}
|
||||
}
|
||||
|
||||
#[rustfmt::skip]
|
||||
const SINGLETONS0U: &[(u8, u8)] = &[
|
||||
const SINGLETONS0_UPPER: &[(u8, u8)] = &[
|
||||
(0x00, 1),
|
||||
(0x03, 5),
|
||||
(0x05, 6),
|
||||
@ -129,7 +152,7 @@ const SINGLETONS0U: &[(u8, u8)] = &[
|
||||
(0xff, 9),
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
const SINGLETONS0L: &[u8] = &[
|
||||
const SINGLETONS0_LOWER: &[u8] = &[
|
||||
0xad, 0x78, 0x79, 0x8b, 0x8d, 0xa2, 0x30, 0x57,
|
||||
0x58, 0x8b, 0x8c, 0x90, 0x1c, 0xdd, 0x0e, 0x0f,
|
||||
0x4b, 0x4c, 0xfb, 0xfc, 0x2e, 0x2f, 0x3f, 0x5c,
|
||||
@ -169,7 +192,7 @@ const SINGLETONS0L: &[u8] = &[
|
||||
0xfe, 0xff,
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
const SINGLETONS1U: &[(u8, u8)] = &[
|
||||
const SINGLETONS1_UPPER: &[(u8, u8)] = &[
|
||||
(0x00, 6),
|
||||
(0x01, 1),
|
||||
(0x03, 1),
|
||||
@ -216,7 +239,7 @@ const SINGLETONS1U: &[(u8, u8)] = &[
|
||||
(0xfb, 1),
|
||||
];
|
||||
#[rustfmt::skip]
|
||||
const SINGLETONS1L: &[u8] = &[
|
||||
const SINGLETONS1_LOWER: &[u8] = &[
|
||||
0x0c, 0x27, 0x3b, 0x3e, 0x4e, 0x4f, 0x8f, 0x9e,
|
||||
0x9e, 0x9f, 0x7b, 0x8b, 0x93, 0x96, 0xa2, 0xb2,
|
||||
0xba, 0x86, 0xb1, 0x06, 0x07, 0x09, 0x36, 0x3d,
|
||||
|
Loading…
Reference in New Issue
Block a user