2022-10-23 17:16:49 +00:00
|
|
|
use core::borrow::Borrow;
|
2017-02-03 23:04:22 +00:00
|
|
|
use core::iter::*;
|
2022-10-17 20:47:39 +00:00
|
|
|
use core::mem;
|
|
|
|
use core::num::Wrapping;
|
2019-12-07 04:18:12 +00:00
|
|
|
use test::{black_box, Bencher};
|
2017-02-03 23:04:22 +00:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_rposition(b: &mut Bencher) {
|
|
|
|
let it: Vec<usize> = (0..300).collect();
|
|
|
|
b.iter(|| {
|
|
|
|
it.iter().rposition(|&x| x <= 150);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_skip_while(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let it = 0..100;
|
|
|
|
let mut sum = 0;
|
2019-12-07 04:18:12 +00:00
|
|
|
it.skip_while(|&x| {
|
|
|
|
sum += x;
|
|
|
|
sum < 4000
|
|
|
|
})
|
|
|
|
.all(|_| true);
|
2017-02-03 23:04:22 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_multiple_take(b: &mut Bencher) {
|
|
|
|
let mut it = (0..42).cycle();
|
|
|
|
b.iter(|| {
|
|
|
|
let n = it.next().unwrap();
|
|
|
|
for _ in 0..n {
|
|
|
|
it.clone().take(it.next().unwrap()).all(|_| true);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2019-12-07 04:18:12 +00:00
|
|
|
fn scatter(x: i32) -> i32 {
|
|
|
|
(x * 31) % 127
|
|
|
|
}
|
2017-02-03 23:04:22 +00:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_max_by_key(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let it = 0..100;
|
2019-03-12 16:52:10 +00:00
|
|
|
it.map(black_box).max_by_key(|&x| scatter(x))
|
2017-02-03 23:04:22 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2021-06-23 20:26:46 +00:00
|
|
|
// https://www.reddit.com/r/rust/comments/31syce/using_iterators_to_find_the_index_of_the_min_or/
|
2017-02-03 23:04:22 +00:00
|
|
|
#[bench]
|
|
|
|
fn bench_max_by_key2(b: &mut Bencher) {
|
|
|
|
fn max_index_iter(array: &[i32]) -> usize {
|
|
|
|
array.iter().enumerate().max_by_key(|&(_, item)| item).unwrap().0
|
|
|
|
}
|
|
|
|
|
|
|
|
let mut data = vec![0; 1638];
|
|
|
|
data[514] = 9999;
|
|
|
|
|
|
|
|
b.iter(|| max_index_iter(&data));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_max(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let it = 0..100;
|
2019-03-12 16:52:10 +00:00
|
|
|
it.map(black_box).map(scatter).max()
|
2017-02-03 23:04:22 +00:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn copy_zip(xs: &[u8], ys: &mut [u8]) {
|
|
|
|
for (a, b) in ys.iter_mut().zip(xs) {
|
|
|
|
*a = *b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn add_zip(xs: &[f32], ys: &mut [f32]) {
|
|
|
|
for (a, b) in ys.iter_mut().zip(xs) {
|
|
|
|
*a += *b;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_zip_copy(b: &mut Bencher) {
|
|
|
|
let source = vec![0u8; 16 * 1024];
|
|
|
|
let mut dst = black_box(vec![0u8; 16 * 1024]);
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| copy_zip(&source, &mut dst))
|
2017-02-03 23:04:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_zip_add(b: &mut Bencher) {
|
|
|
|
let source = vec![1.; 16 * 1024];
|
|
|
|
let mut dst = vec![0.; 16 * 1024];
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| add_zip(&source, &mut dst));
|
2017-02-03 23:04:22 +00:00
|
|
|
}
|
2017-06-21 20:22:27 +00:00
|
|
|
|
|
|
|
/// `Iterator::for_each` implemented as a plain loop.
|
2019-12-07 04:18:12 +00:00
|
|
|
fn for_each_loop<I, F>(iter: I, mut f: F)
|
|
|
|
where
|
|
|
|
I: Iterator,
|
|
|
|
F: FnMut(I::Item),
|
2017-06-21 20:22:27 +00:00
|
|
|
{
|
|
|
|
for item in iter {
|
|
|
|
f(item);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// `Iterator::for_each` implemented with `fold` for internal iteration.
|
|
|
|
/// (except when `by_ref()` effectively disables that optimization.)
|
2019-12-07 04:18:12 +00:00
|
|
|
fn for_each_fold<I, F>(iter: I, mut f: F)
|
|
|
|
where
|
|
|
|
I: Iterator,
|
|
|
|
F: FnMut(I::Item),
|
2017-06-21 20:22:27 +00:00
|
|
|
{
|
|
|
|
iter.fold((), move |(), item| f(item));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_for_each_chain_loop(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let mut acc = 0;
|
|
|
|
let iter = (0i64..1000000).chain(0..1000000).map(black_box);
|
|
|
|
for_each_loop(iter, |x| acc += x);
|
|
|
|
acc
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_for_each_chain_fold(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let mut acc = 0;
|
|
|
|
let iter = (0i64..1000000).chain(0..1000000).map(black_box);
|
|
|
|
for_each_fold(iter, |x| acc += x);
|
|
|
|
acc
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_for_each_chain_ref_fold(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
let mut acc = 0;
|
|
|
|
let mut iter = (0i64..1000000).chain(0..1000000).map(black_box);
|
|
|
|
for_each_fold(iter.by_ref(), |x| acc += x);
|
|
|
|
acc
|
|
|
|
});
|
|
|
|
}
|
Customize `<FlatMap as Iterator>::fold`
`FlatMap` can use internal iteration for its `fold`, which shows a
performance advantage in the new benchmarks:
test iter::bench_flat_map_chain_ref_sum ... bench: 4,354,111 ns/iter (+/- 108,871)
test iter::bench_flat_map_chain_sum ... bench: 468,167 ns/iter (+/- 2,274)
test iter::bench_flat_map_ref_sum ... bench: 449,616 ns/iter (+/- 6,257)
test iter::bench_flat_map_sum ... bench: 348,010 ns/iter (+/- 1,227)
... where the "ref" benches are using `by_ref()` that isn't optimized.
So this change shows a decent advantage on its own, but much more when
combined with a `chain` iterator that also optimizes `fold`.
2017-09-14 20:51:32 +00:00
|
|
|
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
/// Helper to benchmark `sum` for iterators taken by value which
|
|
|
|
/// can optimize `fold`, and by reference which cannot.
|
|
|
|
macro_rules! bench_sums {
|
|
|
|
($bench_sum:ident, $bench_ref_sum:ident, $iter:expr) => {
|
|
|
|
#[bench]
|
|
|
|
fn $bench_sum(b: &mut Bencher) {
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| -> i64 { $iter.map(black_box).sum() });
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn $bench_ref_sum(b: &mut Bencher) {
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| -> i64 { $iter.map(black_box).by_ref().sum() });
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
}
|
2019-12-07 04:18:12 +00:00
|
|
|
};
|
Customize `<FlatMap as Iterator>::fold`
`FlatMap` can use internal iteration for its `fold`, which shows a
performance advantage in the new benchmarks:
test iter::bench_flat_map_chain_ref_sum ... bench: 4,354,111 ns/iter (+/- 108,871)
test iter::bench_flat_map_chain_sum ... bench: 468,167 ns/iter (+/- 2,274)
test iter::bench_flat_map_ref_sum ... bench: 449,616 ns/iter (+/- 6,257)
test iter::bench_flat_map_sum ... bench: 348,010 ns/iter (+/- 1,227)
... where the "ref" benches are using `by_ref()` that isn't optimized.
So this change shows a decent advantage on its own, but much more when
combined with a `chain` iterator that also optimizes `fold`.
2017-09-14 20:51:32 +00:00
|
|
|
}
|
|
|
|
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
bench_sums! {
|
|
|
|
bench_flat_map_sum,
|
|
|
|
bench_flat_map_ref_sum,
|
|
|
|
(0i64..1000).flat_map(|x| x..x+1000)
|
Customize `<FlatMap as Iterator>::fold`
`FlatMap` can use internal iteration for its `fold`, which shows a
performance advantage in the new benchmarks:
test iter::bench_flat_map_chain_ref_sum ... bench: 4,354,111 ns/iter (+/- 108,871)
test iter::bench_flat_map_chain_sum ... bench: 468,167 ns/iter (+/- 2,274)
test iter::bench_flat_map_ref_sum ... bench: 449,616 ns/iter (+/- 6,257)
test iter::bench_flat_map_sum ... bench: 348,010 ns/iter (+/- 1,227)
... where the "ref" benches are using `by_ref()` that isn't optimized.
So this change shows a decent advantage on its own, but much more when
combined with a `chain` iterator that also optimizes `fold`.
2017-09-14 20:51:32 +00:00
|
|
|
}
|
|
|
|
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
bench_sums! {
|
|
|
|
bench_flat_map_chain_sum,
|
|
|
|
bench_flat_map_chain_ref_sum,
|
|
|
|
(0i64..1000000).flat_map(|x| once(x).chain(once(x)))
|
Customize `<FlatMap as Iterator>::fold`
`FlatMap` can use internal iteration for its `fold`, which shows a
performance advantage in the new benchmarks:
test iter::bench_flat_map_chain_ref_sum ... bench: 4,354,111 ns/iter (+/- 108,871)
test iter::bench_flat_map_chain_sum ... bench: 468,167 ns/iter (+/- 2,274)
test iter::bench_flat_map_ref_sum ... bench: 449,616 ns/iter (+/- 6,257)
test iter::bench_flat_map_sum ... bench: 348,010 ns/iter (+/- 1,227)
... where the "ref" benches are using `by_ref()` that isn't optimized.
So this change shows a decent advantage on its own, but much more when
combined with a `chain` iterator that also optimizes `fold`.
2017-09-14 20:51:32 +00:00
|
|
|
}
|
|
|
|
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
bench_sums! {
|
|
|
|
bench_enumerate_sum,
|
|
|
|
bench_enumerate_ref_sum,
|
|
|
|
(0i64..1000000).enumerate().map(|(i, x)| x * i as i64)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_enumerate_chain_sum,
|
|
|
|
bench_enumerate_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).enumerate().map(|(i, x)| x * i as i64)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_filter_sum,
|
|
|
|
bench_filter_ref_sum,
|
2019-02-27 10:46:37 +00:00
|
|
|
(0i64..1000000).filter(|x| x % 3 == 0)
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_filter_chain_sum,
|
|
|
|
bench_filter_chain_ref_sum,
|
2019-02-27 10:46:37 +00:00
|
|
|
(0i64..1000000).chain(0..1000000).filter(|x| x % 3 == 0)
|
Add more custom folding to `core::iter` adaptors
Many of the iterator adaptors will perform faster folds if they forward
to their inner iterator's folds, especially for inner types like `Chain`
which are optimized too. The following types are newly specialized:
| Type | `fold` | `rfold` |
| ----------- | ------ | ------- |
| `Enumerate` | ✓ | ✓ |
| `Filter` | ✓ | ✓ |
| `FilterMap` | ✓ | ✓ |
| `FlatMap` | exists | ✓ |
| `Fuse` | ✓ | ✓ |
| `Inspect` | ✓ | ✓ |
| `Peekable` | ✓ | N/A¹ |
| `Skip` | ✓ | N/A² |
| `SkipWhile` | ✓ | N/A¹ |
¹ not a `DoubleEndedIterator`
² `Skip::next_back` doesn't pull skipped items at all, but this couldn't
be avoided if `Skip::rfold` were to call its inner iterator's `rfold`.
Benchmarks
----------
In the following results, plain `_sum` computes the sum of a million
integers -- note that `sum()` is implemented with `fold()`. The
`_ref_sum` variants do the same on a `by_ref()` iterator, which is
limited to calling `next()` one by one, without specialized `fold`.
The `chain` variants perform the same tests on two iterators chained
together, to show a greater benefit of forwarding `fold` internally.
test iter::bench_enumerate_chain_ref_sum ... bench: 2,216,264 ns/iter (+/- 29,228)
test iter::bench_enumerate_chain_sum ... bench: 922,380 ns/iter (+/- 2,676)
test iter::bench_enumerate_ref_sum ... bench: 476,094 ns/iter (+/- 7,110)
test iter::bench_enumerate_sum ... bench: 476,438 ns/iter (+/- 3,334)
test iter::bench_filter_chain_ref_sum ... bench: 2,266,095 ns/iter (+/- 6,051)
test iter::bench_filter_chain_sum ... bench: 745,594 ns/iter (+/- 2,013)
test iter::bench_filter_ref_sum ... bench: 889,696 ns/iter (+/- 1,188)
test iter::bench_filter_sum ... bench: 667,325 ns/iter (+/- 1,894)
test iter::bench_filter_map_chain_ref_sum ... bench: 2,259,195 ns/iter (+/- 353,440)
test iter::bench_filter_map_chain_sum ... bench: 1,223,280 ns/iter (+/- 1,972)
test iter::bench_filter_map_ref_sum ... bench: 611,607 ns/iter (+/- 2,507)
test iter::bench_filter_map_sum ... bench: 611,610 ns/iter (+/- 472)
test iter::bench_fuse_chain_ref_sum ... bench: 2,246,106 ns/iter (+/- 22,395)
test iter::bench_fuse_chain_sum ... bench: 634,887 ns/iter (+/- 1,341)
test iter::bench_fuse_ref_sum ... bench: 444,816 ns/iter (+/- 1,748)
test iter::bench_fuse_sum ... bench: 316,954 ns/iter (+/- 2,616)
test iter::bench_inspect_chain_ref_sum ... bench: 2,245,431 ns/iter (+/- 21,371)
test iter::bench_inspect_chain_sum ... bench: 631,645 ns/iter (+/- 4,928)
test iter::bench_inspect_ref_sum ... bench: 317,437 ns/iter (+/- 702)
test iter::bench_inspect_sum ... bench: 315,942 ns/iter (+/- 4,320)
test iter::bench_peekable_chain_ref_sum ... bench: 2,243,585 ns/iter (+/- 12,186)
test iter::bench_peekable_chain_sum ... bench: 634,848 ns/iter (+/- 1,712)
test iter::bench_peekable_ref_sum ... bench: 444,808 ns/iter (+/- 480)
test iter::bench_peekable_sum ... bench: 317,133 ns/iter (+/- 3,309)
test iter::bench_skip_chain_ref_sum ... bench: 1,778,734 ns/iter (+/- 2,198)
test iter::bench_skip_chain_sum ... bench: 761,850 ns/iter (+/- 1,645)
test iter::bench_skip_ref_sum ... bench: 478,207 ns/iter (+/- 119,252)
test iter::bench_skip_sum ... bench: 315,614 ns/iter (+/- 3,054)
test iter::bench_skip_while_chain_ref_sum ... bench: 2,486,370 ns/iter (+/- 4,845)
test iter::bench_skip_while_chain_sum ... bench: 633,915 ns/iter (+/- 5,892)
test iter::bench_skip_while_ref_sum ... bench: 666,926 ns/iter (+/- 804)
test iter::bench_skip_while_sum ... bench: 444,405 ns/iter (+/- 571)
2017-09-26 03:53:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_filter_map_sum,
|
|
|
|
bench_filter_map_ref_sum,
|
|
|
|
(0i64..1000000).filter_map(|x| x.checked_mul(x))
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_filter_map_chain_sum,
|
|
|
|
bench_filter_map_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).filter_map(|x| x.checked_mul(x))
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_fuse_sum,
|
|
|
|
bench_fuse_ref_sum,
|
|
|
|
(0i64..1000000).fuse()
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_fuse_chain_sum,
|
|
|
|
bench_fuse_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).fuse()
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_inspect_sum,
|
|
|
|
bench_inspect_ref_sum,
|
|
|
|
(0i64..1000000).inspect(|_| {})
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_inspect_chain_sum,
|
|
|
|
bench_inspect_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).inspect(|_| {})
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_peekable_sum,
|
|
|
|
bench_peekable_ref_sum,
|
|
|
|
(0i64..1000000).peekable()
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_peekable_chain_sum,
|
|
|
|
bench_peekable_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).peekable()
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_skip_sum,
|
|
|
|
bench_skip_ref_sum,
|
|
|
|
(0i64..1000000).skip(1000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_skip_chain_sum,
|
|
|
|
bench_skip_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).skip(1000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_skip_while_sum,
|
|
|
|
bench_skip_while_ref_sum,
|
|
|
|
(0i64..1000000).skip_while(|&x| x < 1000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_skip_while_chain_sum,
|
|
|
|
bench_skip_while_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(0..1000000).skip_while(|&x| x < 1000)
|
Customize `<FlatMap as Iterator>::fold`
`FlatMap` can use internal iteration for its `fold`, which shows a
performance advantage in the new benchmarks:
test iter::bench_flat_map_chain_ref_sum ... bench: 4,354,111 ns/iter (+/- 108,871)
test iter::bench_flat_map_chain_sum ... bench: 468,167 ns/iter (+/- 2,274)
test iter::bench_flat_map_ref_sum ... bench: 449,616 ns/iter (+/- 6,257)
test iter::bench_flat_map_sum ... bench: 348,010 ns/iter (+/- 1,227)
... where the "ref" benches are using `by_ref()` that isn't optimized.
So this change shows a decent advantage on its own, but much more when
combined with a `chain` iterator that also optimizes `fold`.
2017-09-14 20:51:32 +00:00
|
|
|
}
|
2017-10-23 05:47:27 +00:00
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_take_while_chain_sum,
|
|
|
|
bench_take_while_chain_ref_sum,
|
|
|
|
(0i64..1000000).chain(1000000..).take_while(|&x| x < 1111111)
|
|
|
|
}
|
2018-03-01 09:57:25 +00:00
|
|
|
|
2018-12-08 11:09:44 +00:00
|
|
|
bench_sums! {
|
|
|
|
bench_cycle_take_sum,
|
|
|
|
bench_cycle_take_ref_sum,
|
2021-01-08 09:50:35 +00:00
|
|
|
(0..10000).cycle().take(1000000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_cycle_skip_take_sum,
|
|
|
|
bench_cycle_skip_take_ref_sum,
|
|
|
|
(0..100000).cycle().skip(1000000).take(1000000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_cycle_take_skip_sum,
|
|
|
|
bench_cycle_take_skip_ref_sum,
|
|
|
|
(0..100000).cycle().take(1000000).skip(100000)
|
|
|
|
}
|
|
|
|
|
|
|
|
bench_sums! {
|
|
|
|
bench_skip_cycle_skip_zip_add_sum,
|
|
|
|
bench_skip_cycle_skip_zip_add_ref_sum,
|
|
|
|
(0..100000).skip(100).cycle().skip(100)
|
|
|
|
.zip((0..100000).cycle().skip(10))
|
|
|
|
.map(|(a,b)| a+b)
|
|
|
|
.skip(100000)
|
|
|
|
.take(1000000)
|
2018-12-08 11:09:44 +00:00
|
|
|
}
|
|
|
|
|
2018-03-01 09:57:25 +00:00
|
|
|
// Checks whether Skip<Zip<A,B>> is as fast as Zip<Skip<A>, Skip<B>>, from
|
|
|
|
// https://users.rust-lang.org/t/performance-difference-between-iterator-zip-and-skip-order/15743
|
|
|
|
#[bench]
|
|
|
|
fn bench_zip_then_skip(b: &mut Bencher) {
|
|
|
|
let v: Vec<_> = (0..100_000).collect();
|
|
|
|
let t: Vec<_> = (0..100_000).collect();
|
|
|
|
|
|
|
|
b.iter(|| {
|
2019-12-07 04:18:12 +00:00
|
|
|
let s = v
|
|
|
|
.iter()
|
|
|
|
.zip(t.iter())
|
|
|
|
.skip(10000)
|
2018-03-01 09:57:25 +00:00
|
|
|
.take_while(|t| *t.0 < 10100)
|
|
|
|
.map(|(a, b)| *a + *b)
|
|
|
|
.sum::<u64>();
|
|
|
|
assert_eq!(s, 2009900);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
#[bench]
|
|
|
|
fn bench_skip_then_zip(b: &mut Bencher) {
|
|
|
|
let v: Vec<_> = (0..100_000).collect();
|
|
|
|
let t: Vec<_> = (0..100_000).collect();
|
|
|
|
|
|
|
|
b.iter(|| {
|
2019-12-07 04:18:12 +00:00
|
|
|
let s = v
|
|
|
|
.iter()
|
|
|
|
.skip(10000)
|
|
|
|
.zip(t.iter().skip(10000))
|
2018-03-01 09:57:25 +00:00
|
|
|
.take_while(|t| *t.0 < 10100)
|
|
|
|
.map(|(a, b)| *a + *b)
|
|
|
|
.sum::<u64>();
|
|
|
|
assert_eq!(s, 2009900);
|
|
|
|
});
|
|
|
|
}
|
2019-02-27 10:44:30 +00:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_filter_count(b: &mut Bencher) {
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| (0i64..1000000).map(black_box).filter(|x| x % 3 == 0).count())
|
2019-02-27 10:44:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_filter_ref_count(b: &mut Bencher) {
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| (0i64..1000000).map(black_box).by_ref().filter(|x| x % 3 == 0).count())
|
2019-02-27 10:44:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_filter_chain_count(b: &mut Bencher) {
|
2019-12-07 04:18:12 +00:00
|
|
|
b.iter(|| (0i64..1000000).chain(0..1000000).map(black_box).filter(|x| x % 3 == 0).count())
|
2019-02-27 10:44:30 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_filter_chain_ref_count(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
(0i64..1000000).chain(0..1000000).map(black_box).by_ref().filter(|x| x % 3 == 0).count()
|
|
|
|
})
|
2019-02-27 12:22:18 +00:00
|
|
|
}
|
2019-03-17 18:13:44 +00:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_partial_cmp(b: &mut Bencher) {
|
|
|
|
b.iter(|| (0..100000).map(black_box).partial_cmp((0..100000).map(black_box)))
|
|
|
|
}
|
|
|
|
|
2022-08-21 10:18:36 +00:00
|
|
|
#[bench]
|
|
|
|
fn bench_chain_partial_cmp(b: &mut Bencher) {
|
|
|
|
b.iter(|| {
|
|
|
|
(0..50000).chain(50000..100000).map(black_box).partial_cmp((0..100000).map(black_box))
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
2019-03-17 18:13:44 +00:00
|
|
|
#[bench]
|
|
|
|
fn bench_lt(b: &mut Bencher) {
|
|
|
|
b.iter(|| (0..100000).map(black_box).lt((0..100000).map(black_box)))
|
|
|
|
}
|
2022-01-05 01:28:30 +00:00
|
|
|
|
|
|
|
#[bench]
|
|
|
|
fn bench_trusted_random_access_adapters(b: &mut Bencher) {
|
|
|
|
let vec1: Vec<_> = (0usize..100000).collect();
|
|
|
|
let vec2 = black_box(vec1.clone());
|
|
|
|
b.iter(|| {
|
|
|
|
let mut iter = vec1
|
|
|
|
.iter()
|
|
|
|
.copied()
|
|
|
|
.enumerate()
|
|
|
|
.map(|(idx, e)| idx.wrapping_add(e))
|
|
|
|
.zip(vec2.iter().copied())
|
|
|
|
.map(|(a, b)| a.wrapping_add(b))
|
|
|
|
.fuse();
|
|
|
|
let mut acc: usize = 0;
|
|
|
|
let size = iter.size();
|
|
|
|
for i in 0..size {
|
|
|
|
// SAFETY: TRA requirements are satisfied by 0..size iteration and then dropping the
|
|
|
|
// iterator.
|
|
|
|
acc = acc.wrapping_add(unsafe { iter.__iterator_get_unchecked(i) });
|
|
|
|
}
|
|
|
|
acc
|
|
|
|
})
|
|
|
|
}
|
2022-10-17 20:47:39 +00:00
|
|
|
|
|
|
|
/// Exercises the iter::Copied specialization for slice::Iter
|
|
|
|
#[bench]
|
2022-10-23 17:16:49 +00:00
|
|
|
fn bench_copied_chunks(b: &mut Bencher) {
|
|
|
|
let v = vec![1u8; 1024];
|
|
|
|
|
|
|
|
b.iter(|| {
|
|
|
|
let mut iter = black_box(&v).iter().copied();
|
|
|
|
let mut acc = Wrapping(0);
|
|
|
|
// This uses a while-let loop to side-step the TRA specialization in ArrayChunks
|
|
|
|
while let Ok(chunk) = iter.next_chunk::<{ mem::size_of::<u64>() }>() {
|
|
|
|
let d = u64::from_ne_bytes(chunk);
|
|
|
|
acc += Wrapping(d.rotate_left(7).wrapping_add(1));
|
|
|
|
}
|
|
|
|
acc
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Exercises the TrustedRandomAccess specialization in ArrayChunks
|
|
|
|
#[bench]
|
|
|
|
fn bench_trusted_random_access_chunks(b: &mut Bencher) {
|
2022-10-17 20:47:39 +00:00
|
|
|
let v = vec![1u8; 1024];
|
|
|
|
|
|
|
|
b.iter(|| {
|
|
|
|
black_box(&v)
|
|
|
|
.iter()
|
2022-10-23 17:16:49 +00:00
|
|
|
// this shows that we're not relying on the slice::Iter specialization in Copied
|
|
|
|
.map(|b| *b.borrow())
|
2022-10-17 20:47:39 +00:00
|
|
|
.array_chunks::<{ mem::size_of::<u64>() }>()
|
|
|
|
.map(|ary| {
|
|
|
|
let d = u64::from_ne_bytes(ary);
|
|
|
|
Wrapping(d.rotate_left(7).wrapping_add(1))
|
|
|
|
})
|
|
|
|
.sum::<Wrapping<u64>>()
|
|
|
|
})
|
|
|
|
}
|