mirror of
https://github.com/rust-lang/rust.git
synced 2025-02-18 09:53:26 +00:00
Respond to review feedback, and improve implementation somewhat
This commit is contained in:
parent
002aaf2c65
commit
ebbccaf6bf
@ -3,21 +3,25 @@ use test::{black_box, Bencher};
|
|||||||
|
|
||||||
macro_rules! define_benches {
|
macro_rules! define_benches {
|
||||||
($( fn $name: ident($arg: ident: &str) $body: block )+) => {
|
($( fn $name: ident($arg: ident: &str) $body: block )+) => {
|
||||||
|
define_benches!(mod en_tiny, en::TINY, $($name $arg $body)+);
|
||||||
define_benches!(mod en_small, en::SMALL, $($name $arg $body)+);
|
define_benches!(mod en_small, en::SMALL, $($name $arg $body)+);
|
||||||
define_benches!(mod en_medium, en::MEDIUM, $($name $arg $body)+);
|
define_benches!(mod en_medium, en::MEDIUM, $($name $arg $body)+);
|
||||||
define_benches!(mod en_large, en::LARGE, $($name $arg $body)+);
|
define_benches!(mod en_large, en::LARGE, $($name $arg $body)+);
|
||||||
define_benches!(mod en_huge, en::HUGE, $($name $arg $body)+);
|
define_benches!(mod en_huge, en::HUGE, $($name $arg $body)+);
|
||||||
|
|
||||||
|
define_benches!(mod zh_tiny, zh::TINY, $($name $arg $body)+);
|
||||||
define_benches!(mod zh_small, zh::SMALL, $($name $arg $body)+);
|
define_benches!(mod zh_small, zh::SMALL, $($name $arg $body)+);
|
||||||
define_benches!(mod zh_medium, zh::MEDIUM, $($name $arg $body)+);
|
define_benches!(mod zh_medium, zh::MEDIUM, $($name $arg $body)+);
|
||||||
define_benches!(mod zh_large, zh::LARGE, $($name $arg $body)+);
|
define_benches!(mod zh_large, zh::LARGE, $($name $arg $body)+);
|
||||||
define_benches!(mod zh_huge, zh::HUGE, $($name $arg $body)+);
|
define_benches!(mod zh_huge, zh::HUGE, $($name $arg $body)+);
|
||||||
|
|
||||||
|
define_benches!(mod ru_tiny, ru::TINY, $($name $arg $body)+);
|
||||||
define_benches!(mod ru_small, ru::SMALL, $($name $arg $body)+);
|
define_benches!(mod ru_small, ru::SMALL, $($name $arg $body)+);
|
||||||
define_benches!(mod ru_medium, ru::MEDIUM, $($name $arg $body)+);
|
define_benches!(mod ru_medium, ru::MEDIUM, $($name $arg $body)+);
|
||||||
define_benches!(mod ru_large, ru::LARGE, $($name $arg $body)+);
|
define_benches!(mod ru_large, ru::LARGE, $($name $arg $body)+);
|
||||||
define_benches!(mod ru_huge, ru::HUGE, $($name $arg $body)+);
|
define_benches!(mod ru_huge, ru::HUGE, $($name $arg $body)+);
|
||||||
|
|
||||||
|
define_benches!(mod emoji_tiny, emoji::TINY, $($name $arg $body)+);
|
||||||
define_benches!(mod emoji_small, emoji::SMALL, $($name $arg $body)+);
|
define_benches!(mod emoji_small, emoji::SMALL, $($name $arg $body)+);
|
||||||
define_benches!(mod emoji_medium, emoji::MEDIUM, $($name $arg $body)+);
|
define_benches!(mod emoji_medium, emoji::MEDIUM, $($name $arg $body)+);
|
||||||
define_benches!(mod emoji_large, emoji::LARGE, $($name $arg $body)+);
|
define_benches!(mod emoji_large, emoji::LARGE, $($name $arg $body)+);
|
||||||
@ -43,12 +47,12 @@ macro_rules! define_benches {
|
|||||||
}
|
}
|
||||||
|
|
||||||
define_benches! {
|
define_benches! {
|
||||||
fn case00_cur_libcore(s: &str) {
|
fn case00_libcore(s: &str) {
|
||||||
cur_libcore(s)
|
libcore(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn case01_old_libcore(s: &str) {
|
fn case01_filter_count_cont_bytes(s: &str) {
|
||||||
old_libcore(s)
|
filter_count_cont_bytes(s)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn case02_iter_increment(s: &str) {
|
fn case02_iter_increment(s: &str) {
|
||||||
@ -60,14 +64,16 @@ define_benches! {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn cur_libcore(s: &str) -> usize {
|
fn libcore(s: &str) -> usize {
|
||||||
s.chars().count()
|
s.chars().count()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn utf8_is_cont_byte(byte: u8) -> bool {
|
fn utf8_is_cont_byte(byte: u8) -> bool {
|
||||||
(byte as i8) < -64
|
(byte as i8) < -64
|
||||||
}
|
}
|
||||||
fn old_libcore(s: &str) -> usize {
|
|
||||||
|
fn filter_count_cont_bytes(s: &str) -> usize {
|
||||||
s.as_bytes().iter().filter(|&&byte| !utf8_is_cont_byte(byte)).count()
|
s.as_bytes().iter().filter(|&&byte| !utf8_is_cont_byte(byte)).count()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
//! Exposes a number of modules with different kinds of strings.
|
//! Exposes a number of modules with different kinds of strings.
|
||||||
//!
|
//!
|
||||||
//! Each module contains `&str` constants named `SMALL`, `MEDIUM`, `LARGE`, and
|
//! Each module contains `&str` constants named `TINY`, `SMALL`, `MEDIUM`,
|
||||||
//! `HUGE`.
|
//! `LARGE`, and `HUGE`.
|
||||||
//!
|
//!
|
||||||
|
//! - The `TINY` string is generally around 8 bytes.
|
||||||
//! - The `SMALL` string is generally around 30-40 bytes.
|
//! - The `SMALL` string is generally around 30-40 bytes.
|
||||||
//! - The `MEDIUM` string is generally around 600-700 bytes.
|
//! - The `MEDIUM` string is generally around 600-700 bytes.
|
||||||
//! - The `LARGE` string is the `MEDIUM` string repeated 8x, and is around 5kb.
|
//! - The `LARGE` string is the `MEDIUM` string repeated 8x, and is around 5kb.
|
||||||
@ -27,6 +28,7 @@ macro_rules! define_consts {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod en {
|
pub mod en {
|
||||||
|
pub const TINY: &str = "Mary had";
|
||||||
pub const SMALL: &str = "Mary had a little lamb, Little lamb";
|
pub const SMALL: &str = "Mary had a little lamb, Little lamb";
|
||||||
define_consts! {
|
define_consts! {
|
||||||
"Rust is blazingly fast and memory-efficient: with no runtime or garbage
|
"Rust is blazingly fast and memory-efficient: with no runtime or garbage
|
||||||
@ -42,6 +44,7 @@ pub mod en {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod zh {
|
pub mod zh {
|
||||||
|
pub const TINY: &str = "速度惊";
|
||||||
pub const SMALL: &str = "速度惊人且内存利用率极高";
|
pub const SMALL: &str = "速度惊人且内存利用率极高";
|
||||||
define_consts! {
|
define_consts! {
|
||||||
"Rust 速度惊人且内存利用率极高。由于\
|
"Rust 速度惊人且内存利用率极高。由于\
|
||||||
@ -59,6 +62,7 @@ pub mod zh {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod ru {
|
pub mod ru {
|
||||||
|
pub const TINY: &str = "Сотни";
|
||||||
pub const SMALL: &str = "Сотни компаний по";
|
pub const SMALL: &str = "Сотни компаний по";
|
||||||
define_consts! {
|
define_consts! {
|
||||||
"Сотни компаний по всему миру используют Rust в реальных\
|
"Сотни компаний по всему миру используют Rust в реальных\
|
||||||
@ -72,6 +76,7 @@ pub mod ru {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub mod emoji {
|
pub mod emoji {
|
||||||
|
pub const TINY: &str = "😀😃";
|
||||||
pub const SMALL: &str = "😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘";
|
pub const SMALL: &str = "😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘";
|
||||||
define_consts! {
|
define_consts! {
|
||||||
"😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘😗☺😚😙🥲😋😛😜🤪😝🤑🤗🤭🤫🤔🤐🤨😐😑😶😶🌫️😏😒\
|
"😀😃😄😁😆😅🤣😂🙂🙃😉😊😇🥰😍🤩😘😗☺😚😙🥲😋😛😜🤪😝🤑🤗🤭🤫🤔🤐🤨😐😑😶😶🌫️😏😒\
|
||||||
|
@ -17,27 +17,57 @@
|
|||||||
//! Note: Because the term "leading byte" can sometimes be ambiguous (for
|
//! Note: Because the term "leading byte" can sometimes be ambiguous (for
|
||||||
//! example, it could also refer to the first byte of a slice), we'll often use
|
//! example, it could also refer to the first byte of a slice), we'll often use
|
||||||
//! the term "non-continuation byte" to refer to these bytes in the code.
|
//! the term "non-continuation byte" to refer to these bytes in the code.
|
||||||
|
use core::intrinsics::unlikely;
|
||||||
|
|
||||||
|
const USIZE_SIZE: usize = core::mem::size_of::<usize>();
|
||||||
|
const UNROLL_INNER: usize = 4;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub(super) fn count_chars(s: &str) -> usize {
|
pub(super) fn count_chars(s: &str) -> usize {
|
||||||
|
if s.len() < USIZE_SIZE * UNROLL_INNER {
|
||||||
|
// Avoid entering the optimized implementation for strings where the
|
||||||
|
// difference is not likely to matter, or where it might even be slower.
|
||||||
|
// That said, a ton of thought was not spent on the particular threshold
|
||||||
|
// here, beyond "this value seems to make sense".
|
||||||
|
char_count_general_case(s.as_bytes())
|
||||||
|
} else {
|
||||||
|
do_count_chars(s)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn do_count_chars(s: &str) -> usize {
|
||||||
// For correctness, `CHUNK_SIZE` must be:
|
// For correctness, `CHUNK_SIZE` must be:
|
||||||
|
//
|
||||||
// - Less than or equal to 255, otherwise we'll overflow bytes in `counts`.
|
// - Less than or equal to 255, otherwise we'll overflow bytes in `counts`.
|
||||||
// - A multiple of `UNROLL_INNER`, otherwise our `break` inside the
|
// - A multiple of `UNROLL_INNER`, otherwise our `break` inside the
|
||||||
// `body.chunks(CHUNK_SIZE)` loop.
|
// `body.chunks(CHUNK_SIZE)` loop.
|
||||||
//
|
//
|
||||||
// For performance, `CHUNK_SIZE` should be:
|
// For performance, `CHUNK_SIZE` should be:
|
||||||
// - Relatively cheap to `%` against.
|
// - Relatively cheap to `/` against (so some simple sum of powers of two).
|
||||||
// - Large enough to avoid paying for the cost of the `sum_bytes_in_usize`
|
// - Large enough to avoid paying for the cost of the `sum_bytes_in_usize`
|
||||||
// too often.
|
// too often.
|
||||||
const CHUNK_SIZE: usize = 192;
|
const CHUNK_SIZE: usize = 192;
|
||||||
const UNROLL_INNER: usize = 4;
|
|
||||||
|
|
||||||
// Check the properties of `CHUNK_SIZE` / `UNROLL_INNER` that are required
|
// Check the properties of `CHUNK_SIZE` and `UNROLL_INNER` that are required
|
||||||
// for correctness.
|
// for correctness.
|
||||||
const _: [(); 1] = [(); (CHUNK_SIZE < 256 && (CHUNK_SIZE % UNROLL_INNER) == 0) as usize];
|
const _: () = assert!(CHUNK_SIZE < 256);
|
||||||
|
const _: () = assert!(CHUNK_SIZE % UNROLL_INNER == 0);
|
||||||
|
|
||||||
// SAFETY: transmuting `[u8]` to `[usize]` is safe except for size
|
// SAFETY: transmuting `[u8]` to `[usize]` is safe except for size
|
||||||
// differences which are handled by `align_to`.
|
// differences which are handled by `align_to`.
|
||||||
let (head, body, tail) = unsafe { s.as_bytes().align_to::<usize>() };
|
let (head, body, tail) = unsafe { s.as_bytes().align_to::<usize>() };
|
||||||
|
|
||||||
|
// This should be quite rare, and basically exists to handle the degenerate
|
||||||
|
// cases where align_to fails (as well as miri under symbolic alignment
|
||||||
|
// mode).
|
||||||
|
//
|
||||||
|
// The `unlikely` helps discourage LLVM from inlining the body, which is
|
||||||
|
// nice, as we would rather not mark the `char_count_general_case` function
|
||||||
|
// as cold.
|
||||||
|
if unlikely(body.is_empty() || head.len() > USIZE_SIZE || tail.len() > USIZE_SIZE) {
|
||||||
|
return char_count_general_case(s.as_bytes());
|
||||||
|
}
|
||||||
|
|
||||||
let mut total = char_count_general_case(head) + char_count_general_case(tail);
|
let mut total = char_count_general_case(head) + char_count_general_case(tail);
|
||||||
// Split `body` into `CHUNK_SIZE` chunks to reduce the frequency with which
|
// Split `body` into `CHUNK_SIZE` chunks to reduce the frequency with which
|
||||||
// we call `sum_bytes_in_usize`.
|
// we call `sum_bytes_in_usize`.
|
||||||
@ -45,11 +75,8 @@ pub(super) fn count_chars(s: &str) -> usize {
|
|||||||
// We accumulate intermediate sums in `counts`, where each byte contains
|
// We accumulate intermediate sums in `counts`, where each byte contains
|
||||||
// a subset of the sum of this chunk, like a `[u8; size_of::<usize>()]`.
|
// a subset of the sum of this chunk, like a `[u8; size_of::<usize>()]`.
|
||||||
let mut counts = 0;
|
let mut counts = 0;
|
||||||
let unrolled_chunks = chunk.array_chunks::<UNROLL_INNER>();
|
|
||||||
// If there's a remainder (know can only happen for the last item in
|
let (unrolled_chunks, remainder) = chunk.as_chunks::<UNROLL_INNER>();
|
||||||
// `chunks`, because `CHUNK_SIZE % UNROLL == 0`), then we need to
|
|
||||||
// account for that (although we don't use it to later).
|
|
||||||
let remainder = unrolled_chunks.remainder();
|
|
||||||
for unrolled in unrolled_chunks {
|
for unrolled in unrolled_chunks {
|
||||||
for &word in unrolled {
|
for &word in unrolled {
|
||||||
// Because `CHUNK_SIZE` is < 256, this addition can't cause the
|
// Because `CHUNK_SIZE` is < 256, this addition can't cause the
|
||||||
@ -85,8 +112,8 @@ pub(super) fn count_chars(s: &str) -> usize {
|
|||||||
// true)
|
// true)
|
||||||
#[inline]
|
#[inline]
|
||||||
fn contains_non_continuation_byte(w: usize) -> usize {
|
fn contains_non_continuation_byte(w: usize) -> usize {
|
||||||
let lsb = 0x0101_0101_0101_0101u64 as usize;
|
const LSB: usize = 0x0101_0101_0101_0101u64 as usize;
|
||||||
((!w >> 7) | (w >> 6)) & lsb
|
((!w >> 7) | (w >> 6)) & LSB
|
||||||
}
|
}
|
||||||
|
|
||||||
// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
|
// Morally equivalent to `values.to_ne_bytes().into_iter().sum::<usize>()`, but
|
||||||
@ -97,7 +124,7 @@ fn sum_bytes_in_usize(values: usize) -> usize {
|
|||||||
const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize;
|
const SKIP_BYTES: usize = 0x00ff_00ff_00ff_00ff_u64 as usize;
|
||||||
|
|
||||||
let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES);
|
let pair_sum: usize = (values & SKIP_BYTES) + ((values >> 8) & SKIP_BYTES);
|
||||||
pair_sum.wrapping_mul(LSB_SHORTS) >> ((core::mem::size_of::<usize>() - 2) * 8)
|
pair_sum.wrapping_mul(LSB_SHORTS) >> ((USIZE_SIZE - 2) * 8)
|
||||||
}
|
}
|
||||||
|
|
||||||
// This is the most direct implementation of the concept of "count the number of
|
// This is the most direct implementation of the concept of "count the number of
|
||||||
@ -105,12 +132,5 @@ fn sum_bytes_in_usize(values: usize) -> usize {
|
|||||||
// head and tail of the input string (the first and last item in the tuple
|
// head and tail of the input string (the first and last item in the tuple
|
||||||
// returned by `slice::align_to`).
|
// returned by `slice::align_to`).
|
||||||
fn char_count_general_case(s: &[u8]) -> usize {
|
fn char_count_general_case(s: &[u8]) -> usize {
|
||||||
const CONT_MASK_U8: u8 = 0b0011_1111;
|
s.iter().filter(|&&byte| !super::validations::utf8_is_cont_byte(byte)).count()
|
||||||
const TAG_CONT_U8: u8 = 0b1000_0000;
|
|
||||||
let mut leads = 0;
|
|
||||||
for &byte in s {
|
|
||||||
let is_lead = (byte & !CONT_MASK_U8) != TAG_CONT_U8;
|
|
||||||
leads += is_lead as usize;
|
|
||||||
}
|
|
||||||
leads
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user