rust/library
Matthias Krüger 8b8adfd05d
Rollup merge of #120308 - utkarshgupta137:duration-opt, r=m-ou-se
core/time: avoid divisions in Duration::new

In our (decently large) code base, we use `SystemTime::UNIX_EPOCH.elapsed()` in a lot of places & often in a loop or in the hot path. On [Unix](https://github.com/rust-lang/rust/blob/1.75.0/library/std/src/sys/unix/time.rs#L153-L162) at least, it seems we do calculations before hand to ensure that nanos is within the valid range, yet `Duration::new()` still checks it again, using 2 divisions. It seems like adding a branch can make this function 33% faster on ARM64 in the cases where nanos is already in the valid range & seems to have no effect in the other case.

Benchmarks:
M1 Pro (14-inch base model):
```
duration/current/checked
                        time:   [1.5945 ns 1.6167 ns 1.6407 ns]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) high mild
  3 (3.00%) high severe
duration/current/unchecked
                        time:   [1.5941 ns 1.6051 ns 1.6179 ns]
Found 2 outliers among 100 measurements (2.00%)
  1 (1.00%) high mild
  1 (1.00%) high severe

duration/branched/checked
                        time:   [1.1997 ns 1.2048 ns 1.2104 ns]
Found 8 outliers among 100 measurements (8.00%)
  4 (4.00%) high mild
  4 (4.00%) high severe
duration/branched/unchecked
                        time:   [1.5881 ns 1.5957 ns 1.6039 ns]
Found 6 outliers among 100 measurements (6.00%)
  3 (3.00%) high mild
  3 (3.00%) high severe
```
EC2 c7gd.16xlarge (Graviton 3):
```
duration/current/checked
                        time:   [2.7996 ns 2.8000 ns 2.8003 ns]
Found 5 outliers among 100 measurements (5.00%)
  2 (2.00%) low severe
  3 (3.00%) low mild
duration/current/unchecked
                        time:   [2.9922 ns 2.9925 ns 2.9928 ns]
Found 7 outliers among 100 measurements (7.00%)
  4 (4.00%) low severe
  1 (1.00%) low mild
  2 (2.00%) high mild

duration/branched/checked
                        time:   [2.0830 ns 2.0843 ns 2.0857 ns]
Found 3 outliers among 100 measurements (3.00%)
  1 (1.00%) low severe
  1 (1.00%) low mild
  1 (1.00%) high mild
duration/branched/unchecked
                        time:   [2.9879 ns 2.9886 ns 2.9893 ns]
Found 5 outliers among 100 measurements (5.00%)
  3 (3.00%) low severe
  2 (2.00%) low mild
```
EC2 r7iz.16xlarge (Intel Xeon Scalable-based (Sapphire Rapids)):
```
duration/current/checked
                        time:   [980.60 ps 980.79 ps 980.99 ps]
Found 10 outliers among 100 measurements (10.00%)
  4 (4.00%) low severe
  2 (2.00%) low mild
  3 (3.00%) high mild
  1 (1.00%) high severe
duration/current/unchecked
                        time:   [979.53 ps 979.74 ps 979.96 ps]
Found 6 outliers among 100 measurements (6.00%)
  2 (2.00%) low severe
  1 (1.00%) low mild
  2 (2.00%) high mild
  1 (1.00%) high severe

duration/branched/checked
                        time:   [938.72 ps 938.96 ps 939.22 ps]
Found 4 outliers among 100 measurements (4.00%)
  1 (1.00%) low mild
  1 (1.00%) high mild
  2 (2.00%) high severe
duration/branched/unchecked
                        time:   [1.0103 ns 1.0110 ns 1.0118 ns]
Found 10 outliers among 100 measurements (10.00%)
  2 (2.00%) low mild
  7 (7.00%) high mild
  1 (1.00%) high severe
```

Bench code (ran using stable 1.75.0 & criterion latest 0.5.1):
I couldn't find any benches for `Duration` in this repo, so I just copied the relevant types & recreated it.
```rust
use criterion::{black_box, criterion_group, criterion_main, Criterion};

pub fn duration_bench(c: &mut Criterion) {
    const NANOS_PER_SEC: u32 = 1_000_000_000;

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
    #[repr(transparent)]
    struct Nanoseconds(u32);

    impl Default for Nanoseconds {
        #[inline]
        fn default() -> Self {
            // SAFETY: 0 is within the valid range
            unsafe { Nanoseconds(0) }
        }
    }

    #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Default)]
    pub struct Duration {
        secs: u64,
        nanos: Nanoseconds, // Always 0 <= nanos < NANOS_PER_SEC
    }

    impl Duration {
        #[inline]
        pub const fn new_current(secs: u64, nanos: u32) -> Duration {
            let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
                Some(secs) => secs,
                None => panic!("overflow in Duration::new"),
            };
            let nanos = nanos % NANOS_PER_SEC;
            // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
            Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
        }

        #[inline]
        pub const fn new_branched(secs: u64, nanos: u32) -> Duration {
            if nanos < NANOS_PER_SEC {
                // SAFETY: nanos < NANOS_PER_SEC, therefore nanos is within the valid range
                Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
            } else {
                let secs = match secs.checked_add((nanos / NANOS_PER_SEC) as u64) {
                    Some(secs) => secs,
                    None => panic!("overflow in Duration::new"),
                };
                let nanos = nanos % NANOS_PER_SEC;
                // SAFETY: nanos % NANOS_PER_SEC < NANOS_PER_SEC, therefore nanos is within the valid range
                Duration { secs, nanos: unsafe { Nanoseconds(nanos) } }
            }
        }
    }

    let mut group = c.benchmark_group("duration/current");
    group.bench_function("checked", |b| {
        b.iter(|| black_box(Duration::new_current(black_box(1_000_000_000), black_box(1_000_000))));
    });
    group.bench_function("unchecked", |b| {
        b.iter(|| {
            black_box(Duration::new_current(black_box(1_000_000_000), black_box(2_000_000_000)))
        });
    });
    drop(group);
    let mut group = c.benchmark_group("duration/branched");
    group.bench_function("checked", |b| {
        b.iter(|| {
            black_box(Duration::new_branched(black_box(1_000_000_000), black_box(1_000_000)))
        });
    });
    group.bench_function("unchecked", |b| {
        b.iter(|| {
            black_box(Duration::new_branched(black_box(1_000_000_000), black_box(2_000_000_000)))
        });
    });
}

criterion_group!(duration_benches, duration_bench);
criterion_main!(duration_benches);
```
2024-02-09 14:41:49 +01:00
..
alloc Rollup merge of #118960 - tvallotton:local_waker, r=Mark-Simulacrum 2024-02-05 11:07:26 +01:00
backtrace@6145fe6bac Update backtrace submodule 2023-11-21 16:33:42 +01:00
core Rollup merge of #120308 - utkarshgupta137:duration-opt, r=m-ou-se 2024-02-09 14:41:49 +01:00
panic_abort rustc: implement support for riscv32im_risc0_zkvm_elf 2024-01-22 10:07:36 -08:00
panic_unwind Update test for E0796 and static_mut_ref lint 2024-01-07 17:29:25 +03:00
portable-simd Disable conversions between portable_simd and stdarch on big-endian ARM 2024-01-30 04:47:01 +00:00
proc_macro Switch OwnedStore handle count to AtomicU32 2024-01-31 13:36:37 +00:00
profiler_builtins Bump cfg(bootstrap) 2023-08-23 20:05:14 -04:00
rtstartup library: Fix warnings in rtstartup 2024-01-06 01:32:03 +03:00
rustc-std-workspace-alloc Replace libstd, libcore, liballoc in line comments. 2022-12-30 14:00:42 +01:00
rustc-std-workspace-core
rustc-std-workspace-std
std Auto merge of #120238 - joboet:always_confirm_lock_success, r=Mark-Simulacrum 2024-02-09 10:27:16 +00:00
stdarch@5ef6eb42bd Update stdarch submodule 2024-01-30 03:33:12 +00:00
sysroot Expose compiler-builtins-weak-intrinsics feature for -Zbuild-std 2023-06-23 11:15:34 +01:00
test Actually abort in panic-abort-tests 2024-01-30 18:19:49 -08:00
unwind Enable Static Builds for FreeBSD 2024-01-11 15:26:16 +00:00