Auto merge of #3548 - RalfJung:many-seeds, r=RalfJung

make many-seeds a mode of ./miri run rather than a separate command Also parallelize it so we use all cores to try seeds at the same time. Fixes https://github.com/rust-lang/miri/issues/3509 by not alternating between different build modes (with/without dev-dependencies) all the time.
2024-11-26 00:34:06 +00:00 · 2024-05-04 07:04:10 +00:00 · 2024-05-04 07:04:10 +00:00 · 38715f714f
commit 38715f714f
parent 74701dcef2 f7a3aa9811
6 changed files with 134 additions and 96 deletions
--- a/src/tools/miri/ci/ci.sh
+++ b/src/tools/miri/ci/ci.sh
@ -13,15 +13,6 @@ function endgroup {

 begingroup "Building Miri"

-# Special Windows hacks
-if [ "$HOST_TARGET" = i686-pc-windows-msvc ]; then
-  # The $BASH variable is `/bin/bash` here, but that path does not actually work. There are some
-  # hacks in place somewhere to try to paper over this, but the hacks dont work either (see
-  # <https://github.com/rust-lang/miri/pull/3402>). So we hard-code the correct location for Github
-  # CI instead.
-  BASH="C:/Program Files/Git/usr/bin/bash"
-fi
-
 # Global configuration
 export RUSTFLAGS="-D warnings"
 export CARGO_INCREMENTAL=0
@ -71,10 +62,9 @@ function run_tests {
    time MIRIFLAGS="${MIRIFLAGS-} -O -Zmir-opt-level=4 -Cdebug-assertions=yes" MIRI_SKIP_UI_CHECKS=1 ./miri test -- tests/{pass,panic}
  fi
  if [ -n "${MANY_SEEDS-}" ]; then
-    # Also run some many-seeds tests. 64 seeds means this takes around a minute per test.
-    # (Need to invoke via explicit `bash -c` for Windows.)
+    # Also run some many-seeds tests.
    time for FILE in tests/many-seeds/*.rs; do
-      MIRI_SEEDS=$MANY_SEEDS ./miri many-seeds "$BASH" -c "./miri run '$FILE'"
+      ./miri run "--many-seeds=0..$MANY_SEEDS" "$FILE"
    done
  fi
  if [ -n "${TEST_BENCH-}" ]; then
@ -135,7 +125,7 @@ case $HOST_TARGET in
    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=64 TEST_BENCH=1 CARGO_MIRI_ENV=1 run_tests
    # Extra tier 1
    # With reduced many-seed count to avoid spending too much time on that.
-    # (All OSes are run with 64 seeds at least once though via the macOS runner.)
+    # (All OSes and ABIs are run with 64 seeds at least once though via the macOS runner.)
    MANY_SEEDS=16 MIRI_TEST_TARGET=i686-unknown-linux-gnu run_tests
    MANY_SEEDS=16 MIRI_TEST_TARGET=aarch64-unknown-linux-gnu run_tests
    MANY_SEEDS=16 MIRI_TEST_TARGET=x86_64-apple-darwin run_tests
@ -164,9 +154,9 @@ case $HOST_TARGET in
    ;;
  i686-pc-windows-msvc)
    # Host
-    # Only smoke-test `many-seeds`; 64 runs of just the scoped-thread-leak test take 15min here!
-    # See <https://github.com/rust-lang/miri/issues/3509>.
-    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=1 TEST_BENCH=1 run_tests
+    # With reduced many-seeds count as this is the slowest runner already.
+    # (The macOS runner checks windows-msvc with full many-seeds count.)
+    GC_STRESS=1 MIR_OPT=1 MANY_SEEDS=16 TEST_BENCH=1 run_tests
    # Extra tier 1
    # We really want to ensure a Linux target works on a Windows host,
    # and a 64bit target works on a 32bit host.
--- a/src/tools/miri/miri-script/Cargo.lock
+++ b/src/tools/miri/miri-script/Cargo.lock
@ -466,15 +466,15 @@ checksum = "0770833d60a970638e989b3fa9fd2bb1aaadcf88963d1659fd7d9990196ed2d6"

 [[package]]
 name = "xshell"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ce2107fe03e558353b4c71ad7626d58ed82efaf56c54134228608893c77023ad"
+checksum = "6db0ab86eae739efd1b054a8d3d16041914030ac4e01cd1dca0cf252fd8b6437"
 dependencies = [
 "xshell-macros",
 ]

 [[package]]
 name = "xshell-macros"
-version = "0.2.5"
+version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e2c411759b501fb9501aac2b1b2d287a6e93e5bdcf13c25306b23e1b716dd0e"
+checksum = "9d422e8e38ec76e2f06ee439ccc765e9c6a9638b9e7c9f2e8255e4d41e8bd852"
--- a/src/tools/miri/miri-script/Cargo.toml
+++ b/src/tools/miri/miri-script/Cargo.toml
@ -19,7 +19,7 @@ itertools = "0.11"
 path_macro = "1.0"
 shell-words = "1.1"
 anyhow = "1.0"
-xshell = "0.2"
+xshell = "0.2.6"
 rustc_version = "0.4"
 dunce = "1.0.4"
 directories = "5"
--- a/src/tools/miri/miri-script/src/commands.rs
+++ b/src/tools/miri/miri-script/src/commands.rs
@ -2,6 +2,7 @@ use std::env;
 use std::ffi::OsString;
 use std::io::Write;
 use std::ops::Not;
+use std::ops::Range;
 use std::path::PathBuf;
 use std::process;
 use std::thread;
@ -150,7 +151,6 @@ impl Command {
            | Command::Fmt { .. }
            | Command::Clippy { .. }
            | Command::Cargo { .. } => Self::auto_actions()?,
-            | Command::ManySeeds { .. }
            | Command::Toolchain { .. }
            | Command::Bench { .. }
            | Command::RustcPull { .. }
@ -162,11 +162,11 @@ impl Command {
            Command::Build { flags } => Self::build(flags),
            Command::Check { flags } => Self::check(flags),
            Command::Test { bless, flags } => Self::test(bless, flags),
-            Command::Run { dep, verbose, flags } => Self::run(dep, verbose, flags),
+            Command::Run { dep, verbose, many_seeds, flags } =>
+                Self::run(dep, verbose, many_seeds, flags),
            Command::Fmt { flags } => Self::fmt(flags),
            Command::Clippy { flags } => Self::clippy(flags),
            Command::Cargo { flags } => Self::cargo(flags),
-            Command::ManySeeds { command } => Self::many_seeds(command),
            Command::Bench { benches } => Self::bench(benches),
            Command::Toolchain { flags } => Self::toolchain(flags),
            Command::RustcPull { commit } => Self::rustc_pull(commit.clone()),
@ -367,43 +367,6 @@ impl Command {
        Ok(())
    }

-    fn many_seeds(command: Vec<OsString>) -> Result<()> {
-        let seed_start: u64 = env::var("MIRI_SEED_START")
-            .unwrap_or_else(|_| "0".into())
-            .parse()
-            .context("failed to parse MIRI_SEED_START")?;
-        let seed_end: u64 = match (env::var("MIRI_SEEDS"), env::var("MIRI_SEED_END")) {
-            (Ok(_), Ok(_)) => bail!("Only one of MIRI_SEEDS and MIRI_SEED_END may be set"),
-            (Ok(seeds), Err(_)) =>
-                seed_start + seeds.parse::<u64>().context("failed to parse MIRI_SEEDS")?,
-            (Err(_), Ok(seed_end)) => seed_end.parse().context("failed to parse MIRI_SEED_END")?,
-            (Err(_), Err(_)) => seed_start + 256,
-        };
-        if seed_end <= seed_start {
-            bail!("the end of the seed range must be larger than the start.");
-        }
-
-        let Some((command_name, trailing_args)) = command.split_first() else {
-            bail!("expected many-seeds command to be non-empty");
-        };
-        let sh = Shell::new()?;
-        sh.set_var("MIRI_AUTO_OPS", "no"); // just in case we get recursively invoked
-        for seed in seed_start..seed_end {
-            println!("Trying seed: {seed}");
-            let mut miriflags = env::var_os("MIRIFLAGS").unwrap_or_default();
-            miriflags.push(format!(" -Zlayout-seed={seed} -Zmiri-seed={seed}"));
-            let status = cmd!(sh, "{command_name} {trailing_args...}")
-                .env("MIRIFLAGS", miriflags)
-                .quiet()
-                .run();
-            if let Err(err) = status {
-                println!("Failing seed: {seed}");
-                return Err(err.into());
-            }
-        }
-        Ok(())
-    }
-
    fn bench(benches: Vec<OsString>) -> Result<()> {
        // The hyperfine to use
        let hyperfine = env::var("HYPERFINE");
@ -495,7 +458,12 @@ impl Command {
        Ok(())
    }

-    fn run(dep: bool, verbose: bool, mut flags: Vec<OsString>) -> Result<()> {
+    fn run(
+        dep: bool,
+        verbose: bool,
+        many_seeds: Option<Range<u32>>,
+        mut flags: Vec<OsString>,
+    ) -> Result<()> {
        let mut e = MiriEnv::new()?;
        // Scan for "--target" to overwrite the "MIRI_TEST_TARGET" env var so
        // that we set the MIRI_SYSROOT up the right way. We must make sure that
@ -526,26 +494,46 @@ impl Command {
        flags.push("--sysroot".into());
        flags.push(miri_sysroot.into());

-        // Then run the actual command. Also add MIRIFLAGS.
+        // Compute everything needed to run the actual command. Also add MIRIFLAGS.
        let miri_manifest = path!(e.miri_dir / "Cargo.toml");
        let miri_flags = e.sh.var("MIRIFLAGS").unwrap_or_default();
        let miri_flags = flagsplit(&miri_flags);
        let toolchain = &e.toolchain;
        let extra_flags = &e.cargo_extra_flags;
        let quiet_flag = if verbose { None } else { Some("--quiet") };
-        let mut cmd = if dep {
-            cmd!(
-                e.sh,
-                "cargo +{toolchain} {quiet_flag...} test {extra_flags...} --manifest-path {miri_manifest} --test ui -- --miri-run-dep-mode {miri_flags...} {flags...}"
-            )
-        } else {
-            cmd!(
-                e.sh,
-                "cargo +{toolchain} {quiet_flag...} run {extra_flags...} --manifest-path {miri_manifest} -- {miri_flags...} {flags...}"
-            )
+        // This closure runs the command with the given `seed_flag` added between the MIRIFLAGS and
+        // the `flags` given on the command-line.
+        let run_miri = |sh: &Shell, seed_flag: Option<String>| -> Result<()> {
+            // The basic command that executes the Miri driver.
+            let mut cmd = if dep {
+                cmd!(
+                    sh,
+                    "cargo +{toolchain} {quiet_flag...} test {extra_flags...} --manifest-path {miri_manifest} --test ui -- --miri-run-dep-mode"
+                )
+            } else {
+                cmd!(
+                    sh,
+                    "cargo +{toolchain} {quiet_flag...} run {extra_flags...} --manifest-path {miri_manifest} --"
+                )
+            };
+            cmd.set_quiet(!verbose);
+            // Add Miri flags
+            let cmd = cmd.args(&miri_flags).args(seed_flag).args(&flags);
+            // And run the thing.
+            Ok(cmd.run()?)
        };
-        cmd.set_quiet(!verbose);
-        cmd.run()?;
+        // Run the closure once or many times.
+        if let Some(seed_range) = many_seeds {
+            e.run_many_times(seed_range, |sh, seed| {
+                eprintln!("Trying seed: {seed}");
+                run_miri(sh, Some(format!("-Zmiri-seed={seed}"))).map_err(|err| {
+                    eprintln!("FAILING SEED: {seed}");
+                    err
+                })
+            })?;
+        } else {
+            run_miri(&e.sh, None)?;
+        }
        Ok(())
    }

--- a/src/tools/miri/miri-script/src/main.rs
+++ b/src/tools/miri/miri-script/src/main.rs
@ -3,10 +3,10 @@
 mod commands;
 mod util;

-use std::env;
 use std::ffi::OsString;
+use std::{env, ops::Range};

-use anyhow::{anyhow, bail, Result};
+use anyhow::{anyhow, bail, Context, Result};

 #[derive(Clone, Debug)]
 pub enum Command {
@ -39,6 +39,7 @@ pub enum Command {
    Run {
        dep: bool,
        verbose: bool,
+        many_seeds: Option<Range<u32>>,
        /// Flags that are passed through to `miri`.
        flags: Vec<OsString>,
    },
@ -55,10 +56,6 @@ pub enum Command {
    /// Runs just `cargo <flags>` with the Miri-specific environment variables.
    /// Mainly meant to be invoked by rust-analyzer.
    Cargo { flags: Vec<OsString> },
-    /// Runs <command> over and over again with different seeds for Miri. The MIRIFLAGS
-    /// variable is set to its original value appended with ` -Zmiri-seed=$SEED` for
-    /// many different seeds.
-    ManySeeds { command: Vec<OsString> },
    /// Runs the benchmarks from bench-cargo-miri in hyperfine. hyperfine needs to be installed.
    Bench {
        /// List of benchmarks to run. By default all benchmarks are run.
@ -91,9 +88,11 @@ Just check miri. <flags> are passed to `cargo check`.
 Build miri, set up a sysroot and then run the test suite. <flags> are passed
 to the final `cargo test` invocation.

-./miri run [--dep] [-v|--verbose] <flags>:
+./miri run [--dep] [-v|--verbose] [--many-seeds|--many-seeds=..to|--many-seeds=from..to] <flags>:
 Build miri, set up a sysroot and then run the driver with the given <flags>.
 (Also respects MIRIFLAGS environment variable.)
+If `--many-seeds` is present, Miri is run many times in parallel with different seeds.
+The range defaults to `0..256`.

 ./miri fmt <flags>:
 Format all sources and tests. <flags> are passed to `rustfmt`.
@ -111,13 +110,6 @@ install`. Sets up the rpath such that the installed binary should work in any
 working directory. Note that the binaries are placed in the `miri` toolchain
 sysroot, to prevent conflicts with other toolchains.

-./miri many-seeds <command>:
-Runs <command> over and over again with different seeds for Miri. The MIRIFLAGS
-variable is set to its original value appended with ` -Zmiri-seed=$SEED` for
-many different seeds. MIRI_SEED_START controls the first seed to try (default: 0).
-MIRI_SEEDS controls how many seeds are being tried (default: 256);
-alternatively, MIRI_SEED_END controls the end of the (exclusive) seed range to try.
-
 ./miri bench <benches>:
 Runs the benchmarks from bench-cargo-miri in hyperfine. hyperfine needs to be installed.
 <benches> can explicitly list the benchmarks to run; by default, all of them are run.
@ -165,22 +157,37 @@ fn main() -> Result<()> {
        Some("run") => {
            let mut dep = false;
            let mut verbose = false;
-            while let Some(arg) = args.peek().and_then(|a| a.to_str()) {
-                match arg {
-                    "--dep" => dep = true,
-                    "-v" | "--verbose" => verbose = true,
-                    _ => break, // not for us
+            let mut many_seeds = None;
+            while let Some(arg) = args.peek().and_then(|s| s.to_str()) {
+                if arg == "--dep" {
+                    dep = true;
+                } else if arg == "-v" || arg == "--verbose" {
+                    verbose = true;
+                } else if arg == "--many-seeds" {
+                    many_seeds = Some(0..256);
+                } else if let Some(val) = arg.strip_prefix("--many-seeds=") {
+                    let (from, to) = val.split_once("..").ok_or_else(|| {
+                        anyhow!("invalid format for `--many-seeds`: expected `from..to`")
+                    })?;
+                    let from: u32 = if from.is_empty() {
+                        0
+                    } else {
+                        from.parse().context("invalid `from` in `--many-seeds=from..to")?
+                    };
+                    let to: u32 = to.parse().context("invalid `to` in `--many-seeds=from..to")?;
+                    many_seeds = Some(from..to);
+                } else {
+                    break; // not for us
                }
                // Consume the flag, look at the next one.
                args.next().unwrap();
            }
-            Command::Run { dep, verbose, flags: args.collect() }
+            Command::Run { dep, verbose, many_seeds, flags: args.collect() }
        }
        Some("fmt") => Command::Fmt { flags: args.collect() },
        Some("clippy") => Command::Clippy { flags: args.collect() },
        Some("cargo") => Command::Cargo { flags: args.collect() },
        Some("install") => Command::Install { flags: args.collect() },
-        Some("many-seeds") => Command::ManySeeds { command: args.collect() },
        Some("bench") => Command::Bench { benches: args.collect() },
        Some("toolchain") => Command::Toolchain { flags: args.collect() },
        Some("rustc-pull") => {
--- a/src/tools/miri/miri-script/src/util.rs
+++ b/src/tools/miri/miri-script/src/util.rs
@ -1,5 +1,8 @@
 use std::ffi::{OsStr, OsString};
+use std::ops::Range;
 use std::path::{Path, PathBuf};
+use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
+use std::thread;

 use anyhow::{anyhow, Context, Result};
 use dunce::canonicalize;
@ -189,4 +192,54 @@ impl MiriEnv {

        Ok(())
    }
+
+    /// Run the given closure many times in parallel with access to the shell, once for each value in the `range`.
+    pub fn run_many_times(
+        &self,
+        range: Range<u32>,
+        run: impl Fn(&Shell, u32) -> Result<()> + Sync,
+    ) -> Result<()> {
+        // `next` is atomic so threads can concurrently fetch their next value to run.
+        let next = AtomicU32::new(range.start);
+        let end = range.end; // exclusive!
+        let failed = AtomicBool::new(false);
+        thread::scope(|s| {
+            let mut handles = Vec::new();
+            // Spawn one worker per core.
+            for _ in 0..thread::available_parallelism()?.get() {
+                // Create a copy of the shell for this thread.
+                let local_shell = self.sh.clone();
+                let handle = s.spawn(|| -> Result<()> {
+                    let local_shell = local_shell; // move the copy into this thread.
+                    // Each worker thread keeps asking for numbers until we're all done.
+                    loop {
+                        let cur = next.fetch_add(1, Ordering::Relaxed);
+                        if cur >= end {
+                            // We hit the upper limit and are done.
+                            break;
+                        }
+                        // Run the command with this seed.
+                        run(&local_shell, cur).map_err(|err| {
+                            // If we failed, tell everyone about this.
+                            failed.store(true, Ordering::Relaxed);
+                            err
+                        })?;
+                        // Check if some other command failed (in which case we'll stop as well).
+                        if failed.load(Ordering::Relaxed) {
+                            return Ok(());
+                        }
+                    }
+                    Ok(())
+                });
+                handles.push(handle);
+            }
+            // Wait for all workers to be done.
+            for handle in handles {
+                handle.join().unwrap()?;
+            }
+            // If all workers succeeded, we can't have failed.
+            assert!(!failed.load(Ordering::Relaxed));
+            Ok(())
+        })
+    }
 }