From 91d2fb2e2b0dd5d285a6c5c1489ff06c806490e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Sat, 3 Jun 2023 09:41:44 +0200 Subject: [PATCH] Port PGO/LTO/BOLT optimized build pipeline to Rust --- .github/workflows/ci.yml | 2 +- Cargo.lock | 440 ++++++- Cargo.toml | 1 + src/bootstrap/Cargo.lock | 4 + src/bootstrap/builder.rs | 1 + src/bootstrap/metrics.rs | 98 +- src/bootstrap/render_tests.rs | 6 +- src/bootstrap/test.rs | 8 +- src/bootstrap/tool.rs | 1 + .../host-x86_64/dist-x86_64-linux/Dockerfile | 7 +- src/ci/github-actions/ci.yml | 2 +- src/ci/stage-build.py | 1035 ----------------- src/tools/build_helper/Cargo.toml | 2 + src/tools/build_helper/src/lib.rs | 1 + src/tools/build_helper/src/metrics.rs | 92 ++ src/tools/opt-dist/Cargo.toml | 22 + src/tools/opt-dist/README.md | 7 + src/tools/opt-dist/src/environment/linux.rs | 54 + src/tools/opt-dist/src/environment/mod.rs | 75 ++ src/tools/opt-dist/src/environment/windows.rs | 78 ++ src/tools/opt-dist/src/exec.rs | 169 +++ src/tools/opt-dist/src/main.rs | 175 +++ src/tools/opt-dist/src/metrics.rs | 106 ++ src/tools/opt-dist/src/tests.rs | 101 ++ src/tools/opt-dist/src/timer.rs | 167 +++ src/tools/opt-dist/src/training.rs | 202 ++++ src/tools/opt-dist/src/utils/io.rs | 48 + src/tools/opt-dist/src/utils/mod.rs | 36 + src/tools/tidy/src/deps.rs | 2 + 29 files changed, 1802 insertions(+), 1140 deletions(-) delete mode 100755 src/ci/stage-build.py create mode 100644 src/tools/build_helper/src/metrics.rs create mode 100644 src/tools/opt-dist/Cargo.toml create mode 100644 src/tools/opt-dist/README.md create mode 100644 src/tools/opt-dist/src/environment/linux.rs create mode 100644 src/tools/opt-dist/src/environment/mod.rs create mode 100644 src/tools/opt-dist/src/environment/windows.rs create mode 100644 src/tools/opt-dist/src/exec.rs create mode 100644 src/tools/opt-dist/src/main.rs create mode 100644 src/tools/opt-dist/src/metrics.rs create mode 100644 src/tools/opt-dist/src/tests.rs create mode 100644 src/tools/opt-dist/src/timer.rs create mode 100644 src/tools/opt-dist/src/training.rs create mode 100644 src/tools/opt-dist/src/utils/io.rs create mode 100644 src/tools/opt-dist/src/utils/mod.rs diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 016d1f53758..3929b064c26 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -408,7 +408,7 @@ jobs: - name: dist-x86_64-msvc env: RUST_CONFIGURE_ARGS: "--build=x86_64-pc-windows-msvc --host=x86_64-pc-windows-msvc --target=x86_64-pc-windows-msvc --enable-full-tools --enable-profiler" - SCRIPT: PGO_HOST=x86_64-pc-windows-msvc python src/ci/stage-build.py python x.py dist bootstrap --include-default-paths + SCRIPT: python x.py build --set rust.debug=true opt-dist && PGO_HOST=x86_64-pc-windows-msvc ./build/x86_64-pc-windows-msvc/stage0-tools-bin/opt-dist python x.py dist bootstrap --include-default-paths DIST_REQUIRE_ALL_TOOLS: 1 os: windows-2019-8core-32gb - name: dist-i686-msvc diff --git a/Cargo.lock b/Cargo.lock index 5c936ca40e1..3d3f6fbe7ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -172,6 +172,9 @@ name = "anyhow" version = "1.0.71" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c7d0618f0e0b7e8ff11427422b64564d5fb0be1940354bfe2e0529b18a9d9b8" +dependencies = [ + "backtrace", +] [[package]] name = "ar_archive_writer" @@ -258,6 +261,12 @@ dependencies = [ "rustc-demangle", ] +[[package]] +name = "base64" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "604178f6c5c21f02dc555784810edfb88d34ac2c73b2eae109655649ee73ce3d" + [[package]] name = "basic-toml" version = "0.1.2" @@ -328,6 +337,10 @@ dependencies = [ [[package]] name = "build_helper" version = "0.1.0" +dependencies = [ + "serde", + "serde_derive", +] [[package]] name = "bump-stage0" @@ -685,6 +698,16 @@ dependencies = [ "rand_xorshift", ] +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "core-foundation-sys" version = "0.8.4" @@ -985,6 +1008,15 @@ dependencies = [ "log", ] +[[package]] +name = "encoding_rs" +version = "0.8.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394" +dependencies = [ + "cfg-if", +] + [[package]] name = "env_logger" version = "0.7.1" @@ -1174,6 +1206,21 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + [[package]] name = "form_urlencoded" version = "1.2.0" @@ -1199,6 +1246,12 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0845fa252299212f0389d64ba26f34fa32cfe41588355f21ed507c59a0f64541" +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "futf" version = "0.1.5" @@ -1396,6 +1449,25 @@ dependencies = [ "serde", ] +[[package]] +name = "h2" +version = "0.3.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d357c7ae988e7d2182f7d7871d0b963962420b0678b0997ce7de72001aeab782" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap 1.9.3", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "handlebars" version = "4.3.7" @@ -1494,6 +1566,49 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "http" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd6effc99afb63425aff9b05836f029929e345a6148a14b7ecd5ab67af944482" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm 0.2.7", +] + [[package]] name = "humantime" version = "1.3.0" @@ -1509,6 +1624,43 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" +[[package]] +name = "hyper" +version = "0.14.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abfba89e19b959ca163c7752ba59d737c1ceea53a5d31a149c805446fc958064" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "iana-time-zone" version = "0.1.57" @@ -1717,6 +1869,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "ipnet" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12b6ee2129af8d4fb011108c73d99a1b83a85977f23b82460c0ae2e25bb4b57f" + [[package]] name = "is-terminal" version = "0.4.8" @@ -1866,6 +2024,12 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a" +[[package]] +name = "libm" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7012b1bbb0719e1097c47611d3898568c546d597c2e74d66f6087edd5233ff4" + [[package]] name = "libz-sys" version = "1.1.9" @@ -2104,6 +2268,17 @@ dependencies = [ "rustc-std-workspace-core", ] +[[package]] +name = "mio" +version = "0.8.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "927a765cd3fc26206e66b296465fa9d3e5ab003e651c1b3c060e7956d96b19d2" +dependencies = [ + "libc", + "wasi", + "windows-sys 0.48.0", +] + [[package]] name = "miow" version = "0.5.0" @@ -2141,6 +2316,24 @@ dependencies = [ "regex", ] +[[package]] +name = "native-tls" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07226173c32f2926027b63cce4bcd8076c3552846cbe7925f3aaffeac0a3b92e" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + [[package]] name = "new_debug_unreachable" version = "1.0.4" @@ -2169,6 +2362,15 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "ntapi" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8a3895c6391c39d7fe7ebc444a87eb2991b2a0bc718fdabd071eec617fc68e4" +dependencies = [ + "winapi", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -2240,6 +2442,32 @@ dependencies = [ "winapi", ] +[[package]] +name = "openssl" +version = "0.10.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "345df152bc43501c5eb9e4654ff05f794effb78d4efe3d53abc158baddc0703d" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.8", +] + [[package]] name = "openssl-probe" version = "0.1.5" @@ -2258,6 +2486,28 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opt-dist" +version = "0.1.0" +dependencies = [ + "anyhow", + "build_helper", + "camino", + "env_logger 0.10.0", + "fs_extra", + "glob", + "humansize", + "humantime 2.1.0", + "log", + "reqwest", + "serde", + "serde_json", + "sysinfo", + "tar", + "xz", + "zip", +] + [[package]] name = "overload" version = "0.1.1" @@ -2277,7 +2527,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1914cd452d8fccd6f9db48147b29fd4ae05bea9dc5d9ad578509f72415de282" dependencies = [ "cfg-if", - "libm", + "libm 0.1.4", ] [[package]] @@ -2730,6 +2980,43 @@ dependencies = [ "walkdir", ] +[[package]] +name = "reqwest" +version = "0.11.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cde824a14b7c14f85caff81225f411faacc04a2013f41670f41443742b1c1c55" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "log", + "mime", + "native-tls", + "once_cell", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + [[package]] name = "rls" version = "2.0.0" @@ -4296,6 +4583,29 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "security-framework" +version = "2.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc758eb7bffce5b308734e9b0c1468893cae9ff70ebf13e7090be8dcbcc83a8" +dependencies = [ + "bitflags 1.3.2", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f51d0c0d83bec45f16480d0ce0058397a69e48fcdc52d1dc8855fb68acbd31a7" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "self_cell" version = "0.10.2" @@ -4352,6 +4662,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + [[package]] name = "sha1" version = "0.10.5" @@ -4630,6 +4952,20 @@ dependencies = [ "unicode-xid", ] +[[package]] +name = "sysinfo" +version = "0.29.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9557d0845b86eea8182f7b10dff120214fb6cd9fd937b6f4917714e546a38695" +dependencies = [ + "cfg-if", + "core-foundation-sys", + "libc", + "ntapi", + "once_cell", + "winapi", +] + [[package]] name = "sysroot" version = "0.0.0" @@ -4849,7 +5185,36 @@ dependencies = [ "autocfg", "backtrace", "bytes", + "libc", + "mio", + "num_cpus", "pin-project-lite", + "socket2", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbae76ab933c85776efabc971569dd6119c580d8f5d448769dec1764bf796ef2" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f988a1a1adc2fb21f9c12aa96441da33a1728193ae0b95d2be22dbd17fcb4e5c" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", ] [[package]] @@ -4901,6 +5266,12 @@ version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + [[package]] name = "tracing" version = "0.1.37" @@ -4986,6 +5357,12 @@ dependencies = [ "tracing-subscriber", ] +[[package]] +name = "try-lock" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" + [[package]] name = "twox-hash" version = "1.6.3" @@ -5304,6 +5681,15 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "want" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e" +dependencies = [ + "try-lock", +] + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -5340,6 +5726,18 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f219e0d211ba40266969f6dbdd90636da12f75bee4fc9d6c23d1260dadb51454" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.87" @@ -5369,6 +5767,16 @@ version = "0.2.87" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ca6ad05a4870b2bf5fe995117d3728437bd27d7cd5f06f13c17443ef369775a1" +[[package]] +name = "web-sys" +version = "0.3.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e33b99f4b23ba3eec1a53ac264e35a755f00e966e0065077d6027c0f575b0b97" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "winapi" version = "0.3.9" @@ -5563,6 +5971,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] + [[package]] name = "writeable" version = "0.5.2" @@ -5578,6 +5995,15 @@ dependencies = [ "libc", ] +[[package]] +name = "xz" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c887690ff2a2e233e8e49633461521f98ec57fbff9d59a884c9a4f04ec1da34" +dependencies = [ + "xz2", +] + [[package]] name = "xz2" version = "0.1.7" @@ -5683,3 +6109,15 @@ dependencies = [ "syn 1.0.109", "synstructure 0.12.6", ] + +[[package]] +name = "zip" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" +dependencies = [ + "byteorder", + "crc32fast", + "crossbeam-utils", + "flate2", +] diff --git a/Cargo.toml b/Cargo.toml index 20b1c656d35..53bf9a8af2c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ members = [ "src/tools/suggest-tests", "src/tools/generate-windows-sys", "src/tools/rustdoc-gui-test", + "src/tools/opt-dist", ] exclude = [ diff --git a/src/bootstrap/Cargo.lock b/src/bootstrap/Cargo.lock index 2b2e9e9f988..66d97a2a0d0 100644 --- a/src/bootstrap/Cargo.lock +++ b/src/bootstrap/Cargo.lock @@ -85,6 +85,10 @@ dependencies = [ [[package]] name = "build_helper" version = "0.1.0" +dependencies = [ + "serde", + "serde_derive", +] [[package]] name = "cc" diff --git a/src/bootstrap/builder.rs b/src/bootstrap/builder.rs index 05b66f94727..c9bd329d430 100644 --- a/src/bootstrap/builder.rs +++ b/src/bootstrap/builder.rs @@ -665,6 +665,7 @@ impl<'a> Builder<'a> { llvm::Lld, llvm::CrtBeginEnd, tool::RustdocGUITest, + tool::OptimizedDist ), Kind::Check | Kind::Clippy | Kind::Fix => describe!( check::Std, diff --git a/src/bootstrap/metrics.rs b/src/bootstrap/metrics.rs index 5990f33b9bc..b73df7fe822 100644 --- a/src/bootstrap/metrics.rs +++ b/src/bootstrap/metrics.rs @@ -7,7 +7,10 @@ use crate::builder::{Builder, Step}; use crate::util::t; use crate::Build; -use serde_derive::{Deserialize, Serialize}; +use build_helper::metrics::{ + JsonInvocation, JsonInvocationSystemStats, JsonNode, JsonRoot, JsonStepSystemStats, Test, + TestOutcome, TestSuite, TestSuiteMetadata, +}; use std::cell::RefCell; use std::fs::File; use std::io::BufWriter; @@ -241,98 +244,7 @@ struct StepMetrics { test_suites: Vec, } -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -struct JsonRoot { - #[serde(default)] // For version 0 the field was not present. - format_version: usize, - system_stats: JsonInvocationSystemStats, - invocations: Vec, -} - -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -struct JsonInvocation { - // Unix timestamp in seconds - // - // This is necessary to easily correlate this invocation with logs or other data. - start_time: u64, - duration_including_children_sec: f64, - children: Vec, -} - -#[derive(Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -enum JsonNode { - RustbuildStep { - #[serde(rename = "type")] - type_: String, - debug_repr: String, - - duration_excluding_children_sec: f64, - system_stats: JsonStepSystemStats, - - children: Vec, - }, - TestSuite(TestSuite), -} - -#[derive(Serialize, Deserialize)] -struct TestSuite { - metadata: TestSuiteMetadata, - tests: Vec, -} - -#[derive(Serialize, Deserialize)] -#[serde(tag = "kind", rename_all = "snake_case")] -pub(crate) enum TestSuiteMetadata { - CargoPackage { - crates: Vec, - target: String, - host: String, - stage: u32, - }, - Compiletest { - suite: String, - mode: String, - compare_mode: Option, - target: String, - host: String, - stage: u32, - }, -} - -#[derive(Serialize, Deserialize)] -pub(crate) struct Test { - name: String, - #[serde(flatten)] - outcome: TestOutcome, -} - -#[derive(Serialize, Deserialize)] -#[serde(tag = "outcome", rename_all = "snake_case")] -pub(crate) enum TestOutcome { - Passed, - Failed, - Ignored { ignore_reason: Option }, -} - -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -struct JsonInvocationSystemStats { - cpu_threads_count: usize, - cpu_model: String, - - memory_total_bytes: u64, -} - -#[derive(Serialize, Deserialize)] -#[serde(rename_all = "snake_case")] -struct JsonStepSystemStats { - cpu_utilization_percent: f64, -} - -#[derive(Deserialize)] +#[derive(serde_derive::Deserialize)] struct OnlyFormatVersion { #[serde(default)] // For version 0 the field was not present. format_version: usize, diff --git a/src/bootstrap/render_tests.rs b/src/bootstrap/render_tests.rs index 98a468c883d..ccd067053ef 100644 --- a/src/bootstrap/render_tests.rs +++ b/src/bootstrap/render_tests.rs @@ -141,9 +141,9 @@ impl<'a> Renderer<'a> { self.builder.metrics.record_test( &test.name, match outcome { - Outcome::Ok | Outcome::BenchOk => crate::metrics::TestOutcome::Passed, - Outcome::Failed => crate::metrics::TestOutcome::Failed, - Outcome::Ignored { reason } => crate::metrics::TestOutcome::Ignored { + Outcome::Ok | Outcome::BenchOk => build_helper::metrics::TestOutcome::Passed, + Outcome::Failed => build_helper::metrics::TestOutcome::Failed, + Outcome::Ignored { reason } => build_helper::metrics::TestOutcome::Ignored { ignore_reason: reason.map(|s| s.to_string()), }, }, diff --git a/src/bootstrap/test.rs b/src/bootstrap/test.rs index eed7a584b60..0907291b54d 100644 --- a/src/bootstrap/test.rs +++ b/src/bootstrap/test.rs @@ -340,7 +340,7 @@ impl Step for Cargo { #[cfg(feature = "build-metrics")] builder.metrics.begin_test_suite( - crate::metrics::TestSuiteMetadata::CargoPackage { + build_helper::metrics::TestSuiteMetadata::CargoPackage { crates: vec!["cargo".into()], target: self.host.triple.to_string(), host: self.host.triple.to_string(), @@ -1827,7 +1827,7 @@ note: if you're sure you want to do this, please open an issue as to why. In the #[cfg(feature = "build-metrics")] builder.metrics.begin_test_suite( - crate::metrics::TestSuiteMetadata::Compiletest { + build_helper::metrics::TestSuiteMetadata::Compiletest { suite: suite.into(), mode: mode.into(), compare_mode: None, @@ -1852,7 +1852,7 @@ note: if you're sure you want to do this, please open an issue as to why. In the #[cfg(feature = "build-metrics")] builder.metrics.begin_test_suite( - crate::metrics::TestSuiteMetadata::Compiletest { + build_helper::metrics::TestSuiteMetadata::Compiletest { suite: suite.into(), mode: mode.into(), compare_mode: Some(compare_mode.into()), @@ -2200,7 +2200,7 @@ fn run_cargo_test( #[cfg(feature = "build-metrics")] builder.metrics.begin_test_suite( - crate::metrics::TestSuiteMetadata::CargoPackage { + build_helper::metrics::TestSuiteMetadata::CargoPackage { crates: crates.iter().map(|c| c.to_string()).collect(), target: target.triple.to_string(), host: compiler.host.triple.to_string(), diff --git a/src/bootstrap/tool.rs b/src/bootstrap/tool.rs index 06c03178862..915dceae389 100644 --- a/src/bootstrap/tool.rs +++ b/src/bootstrap/tool.rs @@ -303,6 +303,7 @@ bootstrap_tool!( SuggestTests, "src/tools/suggest-tests", "suggest-tests"; GenerateWindowsSys, "src/tools/generate-windows-sys", "generate-windows-sys"; RustdocGUITest, "src/tools/rustdoc-gui-test", "rustdoc-gui-test", is_unstable_tool = true, allow_features = "test"; + OptimizedDist, "src/tools/opt-dist", "opt-dist"; ); #[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Ord, PartialOrd)] diff --git a/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile b/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile index c2fd2e3a91a..1fcb99f6d5f 100644 --- a/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile +++ b/src/ci/docker/host-x86_64/dist-x86_64-linux/Dockerfile @@ -54,7 +54,8 @@ COPY host-x86_64/dist-x86_64-linux/build-clang.sh /tmp/ RUN ./build-clang.sh ENV CC=clang CXX=clang++ -# rustc-perf version from 2023-03-15 +# rustc-perf version from 2023-05-30 +# Should also be changed in the opt-dist tool for other environments. ENV PERF_COMMIT 8b2ac3042e1ff2c0074455a0a3618adef97156b1 RUN curl -LS -o perf.zip https://github.com/rust-lang/rustc-perf/archive/$PERF_COMMIT.zip && \ unzip perf.zip && \ @@ -81,7 +82,9 @@ ENV RUST_CONFIGURE_ARGS \ --set rust.jemalloc \ --set rust.use-lld=true \ --set rust.lto=thin -ENV SCRIPT python3 ../src/ci/stage-build.py python3 ../x.py dist \ + +ENV SCRIPT python3 ../x.py build --set rust.debug=true opt-dist && \ + ./build/$HOSTS/stage0-tools-bin/opt-dist python3 ../x.py dist \ --host $HOSTS --target $HOSTS \ --include-default-paths \ build-manifest bootstrap diff --git a/src/ci/github-actions/ci.yml b/src/ci/github-actions/ci.yml index 8027e699666..b5b478e60f4 100644 --- a/src/ci/github-actions/ci.yml +++ b/src/ci/github-actions/ci.yml @@ -643,7 +643,7 @@ jobs: --target=x86_64-pc-windows-msvc --enable-full-tools --enable-profiler - SCRIPT: PGO_HOST=x86_64-pc-windows-msvc python src/ci/stage-build.py python x.py dist bootstrap --include-default-paths + SCRIPT: python x.py build --set rust.debug=true opt-dist && PGO_HOST=x86_64-pc-windows-msvc ./build/x86_64-pc-windows-msvc/stage0-tools-bin/opt-dist python x.py dist bootstrap --include-default-paths DIST_REQUIRE_ALL_TOOLS: 1 <<: *job-windows-8c diff --git a/src/ci/stage-build.py b/src/ci/stage-build.py deleted file mode 100755 index 8640a3e0f34..00000000000 --- a/src/ci/stage-build.py +++ /dev/null @@ -1,1035 +0,0 @@ -#!/usr/bin/env python3 -# ignore-tidy-linelength - -# Compatible with Python 3.6+ - -import contextlib -import getpass -import glob -import json -import logging -import os -import pprint -import shutil -import subprocess -import sys -import time -import traceback -import urllib.request -from io import StringIO -from pathlib import Path -from typing import Callable, ContextManager, Dict, Iterable, Iterator, List, Optional, \ - Tuple, Union - -PGO_HOST = os.environ["PGO_HOST"] - -LOGGER = logging.getLogger("stage-build") - -LLVM_PGO_CRATES = [ - "syn-1.0.89", - "cargo-0.60.0", - "serde-1.0.136", - "ripgrep-13.0.0", - "regex-1.5.5", - "clap-3.1.6", - "hyper-0.14.18" -] - -RUSTC_PGO_CRATES = [ - "externs", - "ctfe-stress-5", - "cargo-0.60.0", - "token-stream-stress", - "match-stress", - "tuple-stress", - "diesel-1.4.8", - "bitmaps-3.1.0" -] - -LLVM_BOLT_CRATES = LLVM_PGO_CRATES - - -def is_try_build() -> bool: - return os.environ.get("DIST_TRY_BUILD", "0") != "0" - - -class Pipeline: - # Paths - def checkout_path(self) -> Path: - """ - The root checkout, where the source is located. - """ - raise NotImplementedError - - def downloaded_llvm_dir(self) -> Path: - """ - Directory where the host LLVM is located. - """ - raise NotImplementedError - - def build_root(self) -> Path: - """ - The main directory where the build occurs. - """ - raise NotImplementedError - - def build_artifacts(self) -> Path: - return self.build_root() / "build" / PGO_HOST - - def rustc_stage_0(self) -> Path: - return self.build_artifacts() / "stage0" / "bin" / "rustc" - - def cargo_stage_0(self) -> Path: - return self.build_artifacts() / "stage0" / "bin" / "cargo" - - def rustc_stage_2(self) -> Path: - return self.build_artifacts() / "stage2" / "bin" / "rustc" - - def opt_artifacts(self) -> Path: - raise NotImplementedError - - def llvm_profile_dir_root(self) -> Path: - return self.opt_artifacts() / "llvm-pgo" - - def llvm_profile_merged_file(self) -> Path: - return self.opt_artifacts() / "llvm-pgo.profdata" - - def rustc_perf_dir(self) -> Path: - return self.opt_artifacts() / "rustc-perf" - - def build_rustc_perf(self): - raise NotImplementedError() - - def rustc_profile_dir_root(self) -> Path: - return self.opt_artifacts() / "rustc-pgo" - - def rustc_profile_merged_file(self) -> Path: - return self.opt_artifacts() / "rustc-pgo.profdata" - - def rustc_profile_template_path(self) -> Path: - """ - The profile data is written into a single filepath that is being repeatedly merged when each - rustc invocation ends. Empirically, this can result in some profiling data being lost. That's - why we override the profile path to include the PID. This will produce many more profiling - files, but the resulting profile will produce a slightly faster rustc binary. - """ - return self.rustc_profile_dir_root() / "default_%m_%p.profraw" - - def supports_bolt(self) -> bool: - raise NotImplementedError - - def llvm_bolt_profile_merged_file(self) -> Path: - return self.opt_artifacts() / "bolt.profdata" - - def metrics_path(self) -> Path: - return self.build_root() / "build" / "metrics.json" - - def executable_extension(self) -> str: - raise NotImplementedError - - def skipped_tests(self) -> Iterable[str]: - return () - - -class LinuxPipeline(Pipeline): - def checkout_path(self) -> Path: - return Path("/checkout") - - def downloaded_llvm_dir(self) -> Path: - return Path("/rustroot") - - def build_root(self) -> Path: - return self.checkout_path() / "obj" - - def opt_artifacts(self) -> Path: - return Path("/tmp/tmp-multistage/opt-artifacts") - - def build_rustc_perf(self): - # /tmp/rustc-perf comes from the Dockerfile - shutil.copytree("/tmp/rustc-perf", self.rustc_perf_dir()) - cmd(["chown", "-R", f"{getpass.getuser()}:", self.rustc_perf_dir()]) - - with change_cwd(self.rustc_perf_dir()): - cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict( - RUSTC=str(self.rustc_stage_0()), - RUSTC_BOOTSTRAP="1" - )) - - def supports_bolt(self) -> bool: - return True - - def executable_extension(self) -> str: - return "" - - def skipped_tests(self) -> Iterable[str]: - # This test fails because of linker errors, as of June 2023. - yield "tests/ui/process/nofile-limit.rs" - - -class WindowsPipeline(Pipeline): - def __init__(self): - self.checkout_dir = Path(os.getcwd()) - - def checkout_path(self) -> Path: - return self.checkout_dir - - def downloaded_llvm_dir(self) -> Path: - return self.checkout_path() / "citools" / "clang-rust" - - def build_root(self) -> Path: - return self.checkout_path() - - def opt_artifacts(self) -> Path: - return self.checkout_path() / "opt-artifacts" - - def rustc_stage_0(self) -> Path: - return super().rustc_stage_0().with_suffix(".exe") - - def cargo_stage_0(self) -> Path: - return super().cargo_stage_0().with_suffix(".exe") - - def rustc_stage_2(self) -> Path: - return super().rustc_stage_2().with_suffix(".exe") - - def build_rustc_perf(self): - # rustc-perf version from 2023-03-15 - perf_commit = "8b2ac3042e1ff2c0074455a0a3618adef97156b1" - rustc_perf_zip_path = self.opt_artifacts() / "perf.zip" - - def download_rustc_perf(): - download_file( - f"https://github.com/rust-lang/rustc-perf/archive/{perf_commit}.zip", - rustc_perf_zip_path - ) - with change_cwd(self.opt_artifacts()): - unpack_archive(rustc_perf_zip_path) - move_path(Path(f"rustc-perf-{perf_commit}"), self.rustc_perf_dir()) - delete_file(rustc_perf_zip_path) - - retry_action(download_rustc_perf, "Download rustc-perf") - - with change_cwd(self.rustc_perf_dir()): - cmd([self.cargo_stage_0(), "build", "-p", "collector"], env=dict( - RUSTC=str(self.rustc_stage_0()), - RUSTC_BOOTSTRAP="1" - )) - - def rustc_profile_template_path(self) -> Path: - """ - On Windows, we don't have enough space to use separate files for each rustc invocation. - Therefore, we use a single file for the generated profiles. - """ - return self.rustc_profile_dir_root() / "default_%m.profraw" - - def supports_bolt(self) -> bool: - return False - - def executable_extension(self) -> str: - return ".exe" - - def skipped_tests(self) -> Iterable[str]: - # This test fails as of June 2023 - yield "tests\\codegen\\vec-shrink-panik.rs" - - -def get_timestamp() -> float: - return time.time() - - -Duration = float - - -def iterate_timers(timer: "Timer", name: str, level: int = 0) -> Iterator[ - Tuple[int, str, Duration]]: - """ - Hierarchically iterate the children of a timer, in a depth-first order. - """ - yield (level, name, timer.total_duration()) - for (child_name, child_timer) in timer.children: - yield from iterate_timers(child_timer, child_name, level=level + 1) - - -class Timer: - def __init__(self, parent_names: Tuple[str, ...] = ()): - self.children: List[Tuple[str, Timer]] = [] - self.section_active = False - self.parent_names = parent_names - self.duration_excluding_children: Duration = 0 - - @contextlib.contextmanager - def section(self, name: str) -> ContextManager["Timer"]: - assert not self.section_active - self.section_active = True - - start = get_timestamp() - exc = None - - child_timer = Timer(parent_names=self.parent_names + (name,)) - full_name = " > ".join(child_timer.parent_names) - try: - LOGGER.info(f"Section `{full_name}` starts") - yield child_timer - except BaseException as exception: - exc = exception - raise - finally: - end = get_timestamp() - duration = end - start - - child_timer.duration_excluding_children = duration - child_timer.total_duration() - self.add_child(name, child_timer) - if exc is None: - LOGGER.info(f"Section `{full_name}` ended: OK ({duration:.2f}s)") - else: - LOGGER.info(f"Section `{full_name}` ended: FAIL ({duration:.2f}s)") - self.section_active = False - - def total_duration(self) -> Duration: - return self.duration_excluding_children + sum( - c.total_duration() for (_, c) in self.children) - - def has_children(self) -> bool: - return len(self.children) > 0 - - def print_stats(self): - rows = [] - for (child_name, child_timer) in self.children: - for (level, name, duration) in iterate_timers(child_timer, child_name, level=0): - label = f"{' ' * level}{name}:" - rows.append((label, duration)) - - # Empty row - rows.append(("", "")) - - total_duration_label = "Total duration:" - total_duration = self.total_duration() - rows.append((total_duration_label, humantime(total_duration))) - - space_after_label = 2 - max_label_length = max(16, max(len(label) for (label, _) in rows)) + space_after_label - - table_width = max_label_length + 23 - divider = "-" * table_width - - with StringIO() as output: - print(divider, file=output) - for (label, duration) in rows: - if isinstance(duration, Duration): - pct = (duration / total_duration) * 100 - value = f"{duration:>12.2f}s ({pct:>5.2f}%)" - else: - value = f"{duration:>{len(total_duration_label) + 7}}" - print(f"{label:<{max_label_length}} {value}", file=output) - print(divider, file=output, end="") - LOGGER.info(f"Timer results\n{output.getvalue()}") - - def add_child(self, name: str, timer: "Timer"): - self.children.append((name, timer)) - - def add_duration(self, name: str, duration: Duration): - timer = Timer(parent_names=self.parent_names + (name,)) - timer.duration_excluding_children = duration - self.add_child(name, timer) - - -class BuildStep: - def __init__(self, type: str, children: List["BuildStep"], duration: float): - self.type = type - self.children = children - self.duration = duration - - def find_all_by_type(self, type: str) -> Iterator["BuildStep"]: - if type == self.type: - yield self - for child in self.children: - yield from child.find_all_by_type(type) - - def __repr__(self): - return f"BuildStep(type={self.type}, duration={self.duration}, children={len(self.children)})" - - -def load_last_metrics(path: Path) -> BuildStep: - """ - Loads the metrics of the most recent bootstrap execution from a metrics.json file. - """ - with open(path, "r") as f: - metrics = json.load(f) - invocation = metrics["invocations"][-1] - - def parse(entry) -> Optional[BuildStep]: - if "kind" not in entry or entry["kind"] != "rustbuild_step": - return None - type = entry.get("type", "") - duration = entry.get("duration_excluding_children_sec", 0) - children = [] - - for child in entry.get("children", ()): - step = parse(child) - if step is not None: - children.append(step) - duration += step.duration - return BuildStep(type=type, children=children, duration=duration) - - children = [parse(child) for child in invocation.get("children", ())] - return BuildStep( - type="root", - children=children, - duration=invocation.get("duration_including_children_sec", 0) - ) - - -@contextlib.contextmanager -def change_cwd(dir: Path): - """ - Temporarily change working directory to `dir`. - """ - cwd = os.getcwd() - LOGGER.debug(f"Changing working dir from `{cwd}` to `{dir}`") - os.chdir(dir) - try: - yield - finally: - LOGGER.debug(f"Reverting working dir to `{cwd}`") - os.chdir(cwd) - - -def humantime(time_s: float) -> str: - hours = time_s // 3600 - time_s = time_s % 3600 - minutes = time_s // 60 - seconds = time_s % 60 - - result = "" - if hours > 0: - result += f"{int(hours)}h " - if minutes > 0: - result += f"{int(minutes)}m " - result += f"{round(seconds)}s" - return result - - -def move_path(src: Path, dst: Path): - LOGGER.info(f"Moving `{src}` to `{dst}`") - shutil.move(src, dst) - - -def delete_file(path: Path): - LOGGER.info(f"Deleting file `{path}`") - os.unlink(path) - - -def delete_directory(path: Path): - LOGGER.info(f"Deleting directory `{path}`") - shutil.rmtree(path) - - -def unpack_archive(archive: Path, target_dir: Optional[Path] = None): - LOGGER.info(f"Unpacking archive `{archive}`") - shutil.unpack_archive(str(archive), extract_dir=str(target_dir) if target_dir is not None else None) - - -def download_file(src: str, target: Path): - LOGGER.info(f"Downloading `{src}` into `{target}`") - urllib.request.urlretrieve(src, str(target)) - - -def retry_action(action, name: str, max_fails: int = 5): - LOGGER.info(f"Attempting to perform action `{name}` with retry") - for iteration in range(max_fails): - LOGGER.info(f"Attempt {iteration + 1}/{max_fails}") - try: - action() - return - except BaseException: # also catch ctrl+c/sysexit - LOGGER.error(f"Action `{name}` has failed\n{traceback.format_exc()}") - - raise Exception(f"Action `{name}` has failed after {max_fails} attempts") - - -def cmd( - args: List[Union[str, Path]], - env: Optional[Dict[str, str]] = None, - output_path: Optional[Path] = None -): - args = [str(arg) for arg in args] - - environment = os.environ.copy() - - cmd_str = "" - if env is not None: - environment.update(env) - cmd_str += " ".join(f"{k}={v}" for (k, v) in (env or {}).items()) - cmd_str += " " - cmd_str += " ".join(args) - if output_path is not None: - cmd_str += f" > {output_path}" - LOGGER.info(f"Executing `{cmd_str}`") - - if output_path is not None: - with open(output_path, "w") as f: - return subprocess.run( - args, - env=environment, - check=True, - stdout=f - ) - return subprocess.run(args, env=environment, check=True) - - -class BenchmarkRunner: - def run_rustc(self, pipeline: Pipeline): - raise NotImplementedError - - def run_llvm(self, pipeline: Pipeline): - raise NotImplementedError - - def run_bolt(self, pipeline: Pipeline): - raise NotImplementedError - - -class DefaultBenchmarkRunner(BenchmarkRunner): - def run_rustc(self, pipeline: Pipeline): - # Here we're profiling the `rustc` frontend, so we also include `Check`. - # The benchmark set includes various stress tests that put the frontend under pressure. - run_compiler_benchmarks( - pipeline, - profiles=["Check", "Debug", "Opt"], - scenarios=["All"], - crates=RUSTC_PGO_CRATES, - env=dict( - LLVM_PROFILE_FILE=str(pipeline.rustc_profile_template_path()) - ) - ) - - def run_llvm(self, pipeline: Pipeline): - run_compiler_benchmarks( - pipeline, - profiles=["Debug", "Opt"], - scenarios=["Full"], - crates=LLVM_PGO_CRATES - ) - - def run_bolt(self, pipeline: Pipeline): - run_compiler_benchmarks( - pipeline, - profiles=["Check", "Debug", "Opt"], - scenarios=["Full"], - crates=LLVM_BOLT_CRATES - ) - - -def run_compiler_benchmarks( - pipeline: Pipeline, - profiles: List[str], - scenarios: List[str], - crates: List[str], - env: Optional[Dict[str, str]] = None -): - env = env if env is not None else {} - - # Compile libcore, both in opt-level=0 and opt-level=3 - with change_cwd(pipeline.build_root()): - cmd([ - pipeline.rustc_stage_2(), - "--edition", "2021", - "--crate-type", "lib", - str(pipeline.checkout_path() / "library/core/src/lib.rs"), - "--out-dir", pipeline.opt_artifacts() - ], env=dict(RUSTC_BOOTSTRAP="1", **env)) - - cmd([ - pipeline.rustc_stage_2(), - "--edition", "2021", - "--crate-type", "lib", - "-Copt-level=3", - str(pipeline.checkout_path() / "library/core/src/lib.rs"), - "--out-dir", pipeline.opt_artifacts() - ], env=dict(RUSTC_BOOTSTRAP="1", **env)) - - # Run rustc-perf benchmarks - # Benchmark using profile_local with eprintln, which essentially just means - # don't actually benchmark -- just make sure we run rustc a bunch of times. - with change_cwd(pipeline.rustc_perf_dir()): - cmd([ - pipeline.cargo_stage_0(), - "run", - "-p", "collector", "--bin", "collector", "--", - "profile_local", "eprintln", - pipeline.rustc_stage_2(), - "--id", "Test", - "--cargo", pipeline.cargo_stage_0(), - "--profiles", ",".join(profiles), - "--scenarios", ",".join(scenarios), - "--include", ",".join(crates) - ], env=dict( - RUST_LOG="collector=debug", - RUSTC=str(pipeline.rustc_stage_0()), - RUSTC_BOOTSTRAP="1", - **env - )) - - -# https://stackoverflow.com/a/31631711/1107768 -def format_bytes(size: int) -> str: - """Return the given bytes as a human friendly KiB, MiB or GiB string.""" - KB = 1024 - MB = KB ** 2 # 1,048,576 - GB = KB ** 3 # 1,073,741,824 - TB = KB ** 4 # 1,099,511,627,776 - - if size < KB: - return f"{size} B" - elif KB <= size < MB: - return f"{size / KB:.2f} KiB" - elif MB <= size < GB: - return f"{size / MB:.2f} MiB" - elif GB <= size < TB: - return f"{size / GB:.2f} GiB" - else: - return str(size) - - -# https://stackoverflow.com/a/63307131/1107768 -def count_files(path: Path) -> int: - return sum(1 for p in path.rglob("*") if p.is_file()) - - -def count_files_with_prefix(path: Path) -> int: - return sum(1 for p in glob.glob(f"{path}*") if Path(p).is_file()) - - -# https://stackoverflow.com/a/55659577/1107768 -def get_path_size(path: Path) -> int: - if path.is_dir(): - return sum(p.stat().st_size for p in path.rglob("*")) - return path.stat().st_size - - -def get_path_prefix_size(path: Path) -> int: - """ - Get size of all files beginning with the prefix `path`. - Alternative to shell `du -sh *`. - """ - return sum(Path(p).stat().st_size for p in glob.glob(f"{path}*")) - - -def get_files(directory: Path, filter: Optional[Callable[[Path], bool]] = None) -> Iterable[Path]: - for file in os.listdir(directory): - path = directory / file - if filter is None or filter(path): - yield path - - -def bootstrap_build( - pipeline: Pipeline, - args: List[str], - env: Optional[Dict[str, str]] = None, - targets: Iterable[str] = ("library/std", ) -): - if env is None: - env = {} - else: - env = dict(env) - env["RUST_BACKTRACE"] = "1" - arguments = [ - sys.executable, - pipeline.checkout_path() / "x.py", - "build", - "--target", PGO_HOST, - "--host", PGO_HOST, - "--stage", "2", - ] + list(targets) + args - cmd(arguments, env=env) - - -def create_pipeline() -> Pipeline: - if sys.platform == "linux": - return LinuxPipeline() - elif sys.platform in ("cygwin", "win32"): - return WindowsPipeline() - else: - raise Exception(f"Optimized build is not supported for platform {sys.platform}") - - -def gather_llvm_profiles(pipeline: Pipeline, runner: BenchmarkRunner): - LOGGER.info("Running benchmarks with PGO instrumented LLVM") - - runner.run_llvm(pipeline) - - profile_path = pipeline.llvm_profile_merged_file() - LOGGER.info(f"Merging LLVM PGO profiles to {profile_path}") - cmd([ - pipeline.downloaded_llvm_dir() / "bin" / "llvm-profdata", - "merge", - "-o", profile_path, - pipeline.llvm_profile_dir_root() - ]) - - LOGGER.info("LLVM PGO statistics") - LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}") - LOGGER.info( - f"{pipeline.llvm_profile_dir_root()}: {format_bytes(get_path_size(pipeline.llvm_profile_dir_root()))}") - LOGGER.info(f"Profile file count: {count_files(pipeline.llvm_profile_dir_root())}") - - # We don't need the individual .profraw files now that they have been merged - # into a final .profdata - delete_directory(pipeline.llvm_profile_dir_root()) - - -def gather_rustc_profiles(pipeline: Pipeline, runner: BenchmarkRunner): - LOGGER.info("Running benchmarks with PGO instrumented rustc") - - runner.run_rustc(pipeline) - - profile_path = pipeline.rustc_profile_merged_file() - LOGGER.info(f"Merging Rustc PGO profiles to {profile_path}") - cmd([ - pipeline.build_artifacts() / "llvm" / "bin" / "llvm-profdata", - "merge", - "-o", profile_path, - pipeline.rustc_profile_dir_root() - ]) - - LOGGER.info("Rustc PGO statistics") - LOGGER.info(f"{profile_path}: {format_bytes(get_path_size(profile_path))}") - LOGGER.info( - f"{pipeline.rustc_profile_dir_root()}: {format_bytes(get_path_size(pipeline.rustc_profile_dir_root()))}") - LOGGER.info(f"Profile file count: {count_files(pipeline.rustc_profile_dir_root())}") - - # We don't need the individual .profraw files now that they have been merged - # into a final .profdata - delete_directory(pipeline.rustc_profile_dir_root()) - - -def gather_llvm_bolt_profiles(pipeline: Pipeline, runner: BenchmarkRunner): - LOGGER.info("Running benchmarks with BOLT instrumented LLVM") - - runner.run_bolt(pipeline) - - merged_profile_path = pipeline.llvm_bolt_profile_merged_file() - profile_files_path = Path("/tmp/prof.fdata") - LOGGER.info(f"Merging LLVM BOLT profiles to {merged_profile_path}") - - profile_files = sorted(glob.glob(f"{profile_files_path}*")) - cmd([ - "merge-fdata", - *profile_files, - ], output_path=merged_profile_path) - - LOGGER.info("LLVM BOLT statistics") - LOGGER.info(f"{merged_profile_path}: {format_bytes(get_path_size(merged_profile_path))}") - LOGGER.info( - f"{profile_files_path}: {format_bytes(get_path_prefix_size(profile_files_path))}") - LOGGER.info(f"Profile file count: {count_files_with_prefix(profile_files_path)}") - - -def clear_llvm_files(pipeline: Pipeline): - """ - Rustbuild currently doesn't support rebuilding LLVM when PGO options - change (or any other llvm-related options); so just clear out the relevant - directories ourselves. - """ - LOGGER.info("Clearing LLVM build files") - delete_directory(pipeline.build_artifacts() / "llvm") - delete_directory(pipeline.build_artifacts() / "lld") - - -def print_binary_sizes(pipeline: Pipeline): - bin_dir = pipeline.build_artifacts() / "stage2" / "bin" - binaries = get_files(bin_dir) - - lib_dir = pipeline.build_artifacts() / "stage2" / "lib" - libraries = get_files(lib_dir, lambda p: p.suffix == ".so") - - paths = sorted(binaries) + sorted(libraries) - with StringIO() as output: - for path in paths: - path_str = f"{path.name}:" - print(f"{path_str:<50}{format_bytes(path.stat().st_size):>14}", file=output) - LOGGER.info(f"Rustc binary size\n{output.getvalue()}") - - -def print_free_disk_space(pipeline: Pipeline): - usage = shutil.disk_usage(pipeline.opt_artifacts()) - total = usage.total - used = usage.used - free = usage.free - - logging.info( - f"Free disk space: {format_bytes(free)} out of total {format_bytes(total)} ({(used / total) * 100:.2f}% used)") - - -def log_metrics(step: BuildStep): - substeps: List[Tuple[int, BuildStep]] = [] - - def visit(step: BuildStep, level: int): - substeps.append((level, step)) - for child in step.children: - visit(child, level=level + 1) - - visit(step, 0) - - output = StringIO() - for (level, step) in substeps: - label = f"{'.' * level}{step.type}" - print(f"{label:<65}{step.duration:>8.2f}s", file=output) - logging.info(f"Build step durations\n{output.getvalue()}") - - -def record_metrics(pipeline: Pipeline, timer: Timer): - metrics = load_last_metrics(pipeline.metrics_path()) - if metrics is None: - return - llvm_steps = tuple(metrics.find_all_by_type("bootstrap::llvm::Llvm")) - llvm_duration = sum(step.duration for step in llvm_steps) - - rustc_steps = tuple(metrics.find_all_by_type("bootstrap::compile::Rustc")) - rustc_duration = sum(step.duration for step in rustc_steps) - - # The LLVM step is part of the Rustc step - rustc_duration = max(0, rustc_duration - llvm_duration) - - if llvm_duration > 0: - timer.add_duration("LLVM", llvm_duration) - if rustc_duration > 0: - timer.add_duration("Rustc", rustc_duration) - - log_metrics(metrics) - - -def run_tests(pipeline: Pipeline): - """ - After `dist` is executed, we extract its archived components into a sysroot directory, - and then use that extracted rustc as a stage0 compiler. - Then we run a subset of tests using that compiler, to have a basic smoke test which checks - whether the optimization pipeline hasn't broken something. - """ - build_dir = pipeline.build_root() / "build" - dist_dir = build_dir / "dist" - - def extract_dist_dir(name: str) -> Path: - target_dir = build_dir / "optimized-dist" - target_dir.mkdir(parents=True, exist_ok=True) - unpack_archive(dist_dir / f"{name}.tar.xz", target_dir=target_dir) - extracted_path = target_dir / name - assert extracted_path.is_dir() - return extracted_path - - # Extract rustc, libstd, cargo and src archives to create the optimized sysroot - rustc_dir = extract_dist_dir(f"rustc-nightly-{PGO_HOST}") / "rustc" - libstd_dir = extract_dist_dir(f"rust-std-nightly-{PGO_HOST}") / f"rust-std-{PGO_HOST}" - cargo_dir = extract_dist_dir(f"cargo-nightly-{PGO_HOST}") / "cargo" - extracted_src_dir = extract_dist_dir("rust-src-nightly") / "rust-src" - - # We need to manually copy libstd to the extracted rustc sysroot - shutil.copytree( - libstd_dir / "lib" / "rustlib" / PGO_HOST / "lib", - rustc_dir / "lib" / "rustlib" / PGO_HOST / "lib" - ) - - # Extract sources - they aren't in the `rustc-nightly-{host}` tarball, so we need to manually copy libstd - # sources to the extracted sysroot. We need sources available so that `-Zsimulate-remapped-rust-src-base` - # works correctly. - shutil.copytree( - extracted_src_dir / "lib" / "rustlib" / "src", - rustc_dir / "lib" / "rustlib" / "src" - ) - - rustc_path = rustc_dir / "bin" / f"rustc{pipeline.executable_extension()}" - assert rustc_path.is_file() - cargo_path = cargo_dir / "bin" / f"cargo{pipeline.executable_extension()}" - assert cargo_path.is_file() - - # Specify path to a LLVM config so that LLVM is not rebuilt. - # It doesn't really matter which LLVM config we choose, because no sysroot will be compiled. - llvm_config = pipeline.build_artifacts() / "llvm" / "bin" / f"llvm-config{pipeline.executable_extension()}" - assert llvm_config.is_file() - - config_content = f"""profile = "user" -changelog-seen = 2 - -[build] -rustc = "{rustc_path.as_posix()}" -cargo = "{cargo_path.as_posix()}" - -[target.{PGO_HOST}] -llvm-config = "{llvm_config.as_posix()}" -""" - logging.info(f"Using following `config.toml` for running tests:\n{config_content}") - - # Simulate a stage 0 compiler with the extracted optimized dist artifacts. - with open("config.toml", "w") as f: - f.write(config_content) - - args = [ - sys.executable, - pipeline.checkout_path() / "x.py", - "test", - "--stage", "0", - "tests/assembly", - "tests/codegen", - "tests/codegen-units", - "tests/incremental", - "tests/mir-opt", - "tests/pretty", - "tests/run-pass-valgrind", - "tests/ui", - ] - for test_path in pipeline.skipped_tests(): - args.extend(["--exclude", test_path]) - cmd(args=args, env=dict( - COMPILETEST_FORCE_STAGE0="1" - )) - - -def execute_build_pipeline(timer: Timer, pipeline: Pipeline, runner: BenchmarkRunner, dist_build_args: List[str]): - # Clear and prepare tmp directory - shutil.rmtree(pipeline.opt_artifacts(), ignore_errors=True) - os.makedirs(pipeline.opt_artifacts(), exist_ok=True) - - pipeline.build_rustc_perf() - - """ - Stage 1: Build PGO instrumented rustc - - We use a normal build of LLVM, because gathering PGO profiles for LLVM and `rustc` at the same time - can cause issues. - """ - with timer.section("Stage 1 (rustc PGO)") as stage1: - with stage1.section("Build PGO instrumented rustc and LLVM") as rustc_pgo_instrument: - bootstrap_build(pipeline, args=[ - "--rust-profile-generate", - pipeline.rustc_profile_dir_root() - ]) - record_metrics(pipeline, rustc_pgo_instrument) - - with stage1.section("Gather profiles"): - gather_rustc_profiles(pipeline, runner) - print_free_disk_space(pipeline) - - with stage1.section("Build PGO optimized rustc") as rustc_pgo_use: - bootstrap_build(pipeline, args=[ - "--rust-profile-use", - pipeline.rustc_profile_merged_file() - ]) - record_metrics(pipeline, rustc_pgo_use) - dist_build_args += [ - "--rust-profile-use", - pipeline.rustc_profile_merged_file() - ] - - """ - Stage 2: Gather LLVM PGO profiles - """ - with timer.section("Stage 2 (LLVM PGO)") as stage2: - # Clear normal LLVM artifacts - clear_llvm_files(pipeline) - - with stage2.section("Build PGO instrumented LLVM") as llvm_pgo_instrument: - bootstrap_build(pipeline, args=[ - "--llvm-profile-generate", - # We want to keep the already built PGO-optimized `rustc`. - "--keep-stage", "0", - "--keep-stage", "1" - ], env=dict( - LLVM_PROFILE_DIR=str(pipeline.llvm_profile_dir_root() / "prof-%p") - )) - record_metrics(pipeline, llvm_pgo_instrument) - - with stage2.section("Gather profiles"): - gather_llvm_profiles(pipeline, runner) - - dist_build_args += [ - "--llvm-profile-use", - pipeline.llvm_profile_merged_file(), - ] - print_free_disk_space(pipeline) - - # Clear PGO-instrumented LLVM artifacts - clear_llvm_files(pipeline) - - """ - Stage 3: Build BOLT instrumented LLVM - - We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles. - Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build. - BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc, - therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused. - """ - if pipeline.supports_bolt(): - with timer.section("Stage 3 (LLVM BOLT)") as stage3: - with stage3.section("Build BOLT instrumented LLVM") as llvm_bolt_instrument: - bootstrap_build(pipeline, args=[ - "--llvm-profile-use", - pipeline.llvm_profile_merged_file(), - "--llvm-bolt-profile-generate", - # We want to keep the already built PGO-optimized `rustc`. - "--keep-stage", "0", - "--keep-stage", "1" - ]) - record_metrics(pipeline, llvm_bolt_instrument) - - with stage3.section("Gather profiles"): - gather_llvm_bolt_profiles(pipeline, runner) - - dist_build_args += [ - "--llvm-bolt-profile-use", - pipeline.llvm_bolt_profile_merged_file() - ] - print_free_disk_space(pipeline) - - # We want to keep the already built PGO-optimized `rustc`. - dist_build_args += [ - "--keep-stage", "0", - "--keep-stage", "1" - ] - - """ - Final stage: Build PGO optimized rustc + PGO/BOLT optimized LLVM - """ - with timer.section("Final stage (dist build)") as final_stage: - cmd(dist_build_args) - record_metrics(pipeline, final_stage) - - # Try builds can be in various broken states, so we don't want to gatekeep them with tests - if not is_try_build(): - with timer.section("Run tests"): - run_tests(pipeline) - - -def run(runner: BenchmarkRunner): - logging.basicConfig( - level=logging.DEBUG, - format="%(name)s %(levelname)-4s: %(message)s", - ) - - LOGGER.info(f"Running multi-stage build using Python {sys.version}") - LOGGER.info(f"Environment values\n{pprint.pformat(dict(os.environ), indent=2)}") - - build_args = sys.argv[1:] - - # Skip components that are not needed for try builds to speed them up - if is_try_build(): - LOGGER.info("Skipping building of unimportant components for a try build") - for target in ("rust-docs", "rustc-docs", "rust-docs-json", "rust-analyzer", - "rustc-src", "clippy", "miri", "rustfmt"): - build_args.extend(["--exclude", target]) - - timer = Timer() - pipeline = create_pipeline() - - try: - execute_build_pipeline(timer, pipeline, runner, build_args) - except BaseException as e: - LOGGER.error("The multi-stage build has failed") - raise e - finally: - timer.print_stats() - print_free_disk_space(pipeline) - - print_binary_sizes(pipeline) - - -if __name__ == "__main__": - runner = DefaultBenchmarkRunner() - run(runner) diff --git a/src/tools/build_helper/Cargo.toml b/src/tools/build_helper/Cargo.toml index 99f6fea2ecf..66894e1abc4 100644 --- a/src/tools/build_helper/Cargo.toml +++ b/src/tools/build_helper/Cargo.toml @@ -6,3 +6,5 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +serde = "1" +serde_derive = "1" diff --git a/src/tools/build_helper/src/lib.rs b/src/tools/build_helper/src/lib.rs index 3fa970373b3..575f3677155 100644 --- a/src/tools/build_helper/src/lib.rs +++ b/src/tools/build_helper/src/lib.rs @@ -1,3 +1,4 @@ pub mod ci; pub mod git; +pub mod metrics; pub mod util; diff --git a/src/tools/build_helper/src/metrics.rs b/src/tools/build_helper/src/metrics.rs new file mode 100644 index 00000000000..2d0c66a8f33 --- /dev/null +++ b/src/tools/build_helper/src/metrics.rs @@ -0,0 +1,92 @@ +use serde_derive::{Deserialize, Serialize}; + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct JsonRoot { + #[serde(default)] // For version 0 the field was not present. + pub format_version: usize, + pub system_stats: JsonInvocationSystemStats, + pub invocations: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct JsonInvocation { + // Unix timestamp in seconds + // + // This is necessary to easily correlate this invocation with logs or other data. + pub start_time: u64, + pub duration_including_children_sec: f64, + pub children: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum JsonNode { + RustbuildStep { + #[serde(rename = "type")] + type_: String, + debug_repr: String, + + duration_excluding_children_sec: f64, + system_stats: JsonStepSystemStats, + + children: Vec, + }, + TestSuite(TestSuite), +} + +#[derive(Serialize, Deserialize)] +pub struct TestSuite { + pub metadata: TestSuiteMetadata, + pub tests: Vec, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum TestSuiteMetadata { + CargoPackage { + crates: Vec, + target: String, + host: String, + stage: u32, + }, + Compiletest { + suite: String, + mode: String, + compare_mode: Option, + target: String, + host: String, + stage: u32, + }, +} + +#[derive(Serialize, Deserialize)] +pub struct Test { + pub name: String, + #[serde(flatten)] + pub outcome: TestOutcome, +} + +#[derive(Serialize, Deserialize)] +#[serde(tag = "outcome", rename_all = "snake_case")] +pub enum TestOutcome { + Passed, + Failed, + Ignored { ignore_reason: Option }, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct JsonInvocationSystemStats { + pub cpu_threads_count: usize, + pub cpu_model: String, + + pub memory_total_bytes: u64, +} + +#[derive(Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct JsonStepSystemStats { + pub cpu_utilization_percent: f64, +} diff --git a/src/tools/opt-dist/Cargo.toml b/src/tools/opt-dist/Cargo.toml new file mode 100644 index 00000000000..5a1794d3336 --- /dev/null +++ b/src/tools/opt-dist/Cargo.toml @@ -0,0 +1,22 @@ +[package] +name = "opt-dist" +version = "0.1.0" +edition = "2021" + +[dependencies] +build_helper = { path = "../build_helper" } +env_logger = "0.10" +log = "0.4" +anyhow = { version = "1", features = ["backtrace"] } +humantime = "2" +humansize = "2" +sysinfo = { version = "0.29", default-features = false } +fs_extra = "1" +camino = "1" +reqwest = { version = "0.11", features = ["blocking"] } +zip = { version = "0.6", default-features = false, features = ["deflate"] } +tar = "0.4" +xz = "0.1" +serde = { version = "1", features = ["derive"] } +serde_json = "1" +glob = "0.3" diff --git a/src/tools/opt-dist/README.md b/src/tools/opt-dist/README.md new file mode 100644 index 00000000000..05a75870da1 --- /dev/null +++ b/src/tools/opt-dist/README.md @@ -0,0 +1,7 @@ +# Optimized build pipeline +This binary implements a heavily optimized build pipeline for `rustc` and `LLVM` artifacts that are used for both for +benchmarking using the perf. bot and for final distribution to users. + +It uses LTO, PGO and BOLT to optimize the compiler and LLVM as much as possible. +This logic is not part of bootstrap, because it needs to invoke bootstrap multiple times, force-rebuild various +artifacts repeatedly and sometimes go around bootstrap's cache mechanism. diff --git a/src/tools/opt-dist/src/environment/linux.rs b/src/tools/opt-dist/src/environment/linux.rs new file mode 100644 index 00000000000..d4c55c46f7c --- /dev/null +++ b/src/tools/opt-dist/src/environment/linux.rs @@ -0,0 +1,54 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::copy_directory; +use camino::{Utf8Path, Utf8PathBuf}; + +pub(super) struct LinuxEnvironment; + +impl Environment for LinuxEnvironment { + fn python_binary(&self) -> &'static str { + "python3" + } + + fn checkout_path(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/checkout") + } + + fn downloaded_llvm_dir(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/rustroot") + } + + fn opt_artifacts(&self) -> Utf8PathBuf { + Utf8PathBuf::from("/tmp/tmp-multistage/opt-artifacts") + } + + fn build_root(&self) -> Utf8PathBuf { + self.checkout_path().join("obj") + } + + fn prepare_rustc_perf(&self) -> anyhow::Result<()> { + // /tmp/rustc-perf comes from the x64 dist Dockerfile + copy_directory(Utf8Path::new("/tmp/rustc-perf"), &self.rustc_perf_dir())?; + cmd(&[self.cargo_stage_0().as_str(), "build", "-p", "collector"]) + .workdir(&self.rustc_perf_dir()) + .env("RUSTC", &self.rustc_stage_0().into_string()) + .env("RUSTC_BOOTSTRAP", "1") + .run()?; + Ok(()) + } + + fn supports_bolt(&self) -> bool { + true + } + + fn executable_extension(&self) -> &'static str { + "" + } + + fn skipped_tests(&self) -> &'static [&'static str] { + &[ + // Fails because of linker errors, as of June 2023. + "tests/ui/process/nofile-limit.rs", + ] + } +} diff --git a/src/tools/opt-dist/src/environment/mod.rs b/src/tools/opt-dist/src/environment/mod.rs new file mode 100644 index 00000000000..f66b9ab41ea --- /dev/null +++ b/src/tools/opt-dist/src/environment/mod.rs @@ -0,0 +1,75 @@ +use camino::Utf8PathBuf; + +#[cfg(target_family = "unix")] +mod linux; +#[cfg(target_family = "windows")] +mod windows; + +pub trait Environment { + fn host_triple(&self) -> String { + std::env::var("PGO_HOST").expect("PGO_HOST environment variable missing") + } + + fn python_binary(&self) -> &'static str; + + /// The rustc checkout, where the compiler source is located. + fn checkout_path(&self) -> Utf8PathBuf; + + /// Path to the downloaded host LLVM. + fn downloaded_llvm_dir(&self) -> Utf8PathBuf; + + /// Directory where the optimization artifacts (PGO/BOLT profiles, etc.) + /// will be stored. + fn opt_artifacts(&self) -> Utf8PathBuf; + + /// The main directory where the build occurs. + fn build_root(&self) -> Utf8PathBuf; + + fn build_artifacts(&self) -> Utf8PathBuf { + self.build_root().join("build").join(self.host_triple()) + } + + fn cargo_stage_0(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage0") + .join("bin") + .join(format!("cargo{}", self.executable_extension())) + } + + fn rustc_stage_0(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage0") + .join("bin") + .join(format!("rustc{}", self.executable_extension())) + } + + fn rustc_stage_2(&self) -> Utf8PathBuf { + self.build_artifacts() + .join("stage2") + .join("bin") + .join(format!("rustc{}", self.executable_extension())) + } + + /// Path to the built rustc-perf benchmark suite. + fn rustc_perf_dir(&self) -> Utf8PathBuf { + self.opt_artifacts().join("rustc-perf") + } + + /// Download and/or compile rustc-perf. + fn prepare_rustc_perf(&self) -> anyhow::Result<()>; + + fn supports_bolt(&self) -> bool; + + /// What is the extension of binary executables in this environment? + fn executable_extension(&self) -> &'static str; + + /// List of test paths that should be skipped when testing the optimized artifacts. + fn skipped_tests(&self) -> &'static [&'static str]; +} + +pub fn create_environment() -> Box { + #[cfg(target_family = "unix")] + return Box::new(linux::LinuxEnvironment); + #[cfg(target_family = "windows")] + return Box::new(windows::WindowsEnvironment::new()); +} diff --git a/src/tools/opt-dist/src/environment/windows.rs b/src/tools/opt-dist/src/environment/windows.rs new file mode 100644 index 00000000000..36f4c0f29cc --- /dev/null +++ b/src/tools/opt-dist/src/environment/windows.rs @@ -0,0 +1,78 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::move_directory; +use camino::Utf8PathBuf; +use std::io::Cursor; +use zip::ZipArchive; + +pub(super) struct WindowsEnvironment { + checkout_dir: Utf8PathBuf, +} + +impl WindowsEnvironment { + pub fn new() -> Self { + Self { checkout_dir: std::env::current_dir().unwrap().try_into().unwrap() } + } +} + +impl Environment for WindowsEnvironment { + fn python_binary(&self) -> &'static str { + "python" + } + + fn checkout_path(&self) -> Utf8PathBuf { + self.checkout_dir.clone() + } + + fn downloaded_llvm_dir(&self) -> Utf8PathBuf { + self.checkout_path().join("citools").join("clang-rust") + } + + fn opt_artifacts(&self) -> Utf8PathBuf { + self.checkout_path().join("opt-artifacts") + } + + fn build_root(&self) -> Utf8PathBuf { + self.checkout_path() + } + + fn prepare_rustc_perf(&self) -> anyhow::Result<()> { + // FIXME: add some mechanism for synchronization of this commit SHA with + // Linux (which builds rustc-perf in a Dockerfile) + // rustc-perf version from 2023-05-30 + const PERF_COMMIT: &str = "8b2ac3042e1ff2c0074455a0a3618adef97156b1"; + + let url = format!("https://github.com/rust-lang/rustc-perf/archive/{PERF_COMMIT}.zip"); + let response = reqwest::blocking::get(url)?.error_for_status()?.bytes()?.to_vec(); + + let mut archive = ZipArchive::new(Cursor::new(response))?; + archive.extract(self.rustc_perf_dir())?; + move_directory( + &self.rustc_perf_dir().join(format!("rustc-perf-{PERF_COMMIT}")), + &self.rustc_perf_dir(), + )?; + + cmd(&[self.cargo_stage_0().as_str(), "build", "-p", "collector"]) + .workdir(&self.rustc_perf_dir()) + .env("RUSTC", &self.rustc_stage_0().into_string()) + .env("RUSTC_BOOTSTRAP", "1") + .run()?; + + Ok(()) + } + + fn supports_bolt(&self) -> bool { + false + } + + fn executable_extension(&self) -> &'static str { + ".exe" + } + + fn skipped_tests(&self) -> &'static [&'static str] { + &[ + // Fails as of June 2023. + "tests\\codegen\\vec-shrink-panik.rs", + ] + } +} diff --git a/src/tools/opt-dist/src/exec.rs b/src/tools/opt-dist/src/exec.rs new file mode 100644 index 00000000000..d05ddbd4c0e --- /dev/null +++ b/src/tools/opt-dist/src/exec.rs @@ -0,0 +1,169 @@ +use crate::environment::Environment; +use crate::metrics::{load_metrics, record_metrics}; +use crate::timer::TimerSection; +use crate::training::{LlvmBoltProfile, LlvmPGOProfile, RustcPGOProfile}; +use camino::{Utf8Path, Utf8PathBuf}; +use std::collections::BTreeMap; +use std::fs::File; +use std::process::{Command, Stdio}; + +#[derive(Default)] +pub struct CmdBuilder { + args: Vec, + env: BTreeMap, + workdir: Option, + output: Option, +} + +impl CmdBuilder { + pub fn arg(mut self, arg: &str) -> Self { + self.args.push(arg.to_string()); + self + } + + pub fn env(mut self, name: &str, value: &str) -> Self { + self.env.insert(name.to_string(), value.to_string()); + self + } + + pub fn workdir(mut self, path: &Utf8Path) -> Self { + self.workdir = Some(path.to_path_buf()); + self + } + + pub fn redirect_output(mut self, path: Utf8PathBuf) -> Self { + self.output = Some(path); + self + } + + pub fn run(self) -> anyhow::Result<()> { + let mut cmd_str = String::new(); + cmd_str.push_str( + &self + .env + .iter() + .map(|(key, value)| format!("{key}={value}")) + .collect::>() + .join(" "), + ); + if !self.env.is_empty() { + cmd_str.push(' '); + } + cmd_str.push_str(&self.args.join(" ")); + if let Some(ref path) = self.output { + cmd_str.push_str(&format!(" > {path:?}")); + } + cmd_str.push_str(&format!( + " [at {}]", + self.workdir + .clone() + .unwrap_or_else(|| std::env::current_dir().unwrap().try_into().unwrap()) + )); + log::info!("Executing `{cmd_str}`"); + + let mut cmd = Command::new(&self.args[0]); + cmd.stdin(Stdio::null()); + cmd.args(self.args.iter().skip(1)); + for (key, value) in &self.env { + cmd.env(key, value); + } + if let Some(ref output) = self.output { + cmd.stdout(File::create(output.clone().into_std_path_buf())?); + } + if let Some(ref workdir) = self.workdir { + cmd.current_dir(workdir.clone().into_std_path_buf()); + } + let exit_status = cmd.spawn()?.wait()?; + if !exit_status.success() { + Err(anyhow::anyhow!( + "Command {cmd_str} has failed with exit code {:?}", + exit_status.code(), + )) + } else { + Ok(()) + } + } +} + +pub fn cmd(args: &[&str]) -> CmdBuilder { + assert!(!args.is_empty()); + CmdBuilder { args: args.iter().map(|s| s.to_string()).collect(), ..Default::default() } +} + +pub struct Bootstrap { + cmd: CmdBuilder, + metrics_path: Utf8PathBuf, +} + +impl Bootstrap { + pub fn build(env: &dyn Environment) -> Self { + let metrics_path = env.build_root().join("build").join("metrics.json"); + let cmd = cmd(&[ + env.python_binary(), + env.checkout_path().join("x.py").as_str(), + "build", + "--target", + &env.host_triple(), + "--host", + &env.host_triple(), + "--stage", + "2", + "library/std", + ]) + .env("RUST_BACKTRACE", "full"); + Self { cmd, metrics_path } + } + + pub fn dist(env: &dyn Environment, dist_args: &[String]) -> Self { + let metrics_path = env.build_root().join("build").join("metrics.json"); + let cmd = cmd(&dist_args.iter().map(|arg| arg.as_str()).collect::>()) + .env("RUST_BACKTRACE", "full"); + Self { cmd, metrics_path } + } + + pub fn llvm_pgo_instrument(mut self, profile_dir: &Utf8Path) -> Self { + self.cmd = self + .cmd + .arg("--llvm-profile-generate") + .env("LLVM_PROFILE_DIR", profile_dir.join("prof-%p").as_str()); + self + } + + pub fn llvm_pgo_optimize(mut self, profile: &LlvmPGOProfile) -> Self { + self.cmd = self.cmd.arg("--llvm-profile-use").arg(profile.0.as_str()); + self + } + + pub fn rustc_pgo_instrument(mut self, profile_dir: &Utf8Path) -> Self { + self.cmd = self.cmd.arg("--rust-profile-generate").arg(profile_dir.as_str()); + self + } + + pub fn rustc_pgo_optimize(mut self, profile: &RustcPGOProfile) -> Self { + self.cmd = self.cmd.arg("--rust-profile-use").arg(profile.0.as_str()); + self + } + + pub fn llvm_bolt_instrument(mut self) -> Self { + self.cmd = self.cmd.arg("--llvm-bolt-profile-generate"); + self + } + + pub fn llvm_bolt_optimize(mut self, profile: &LlvmBoltProfile) -> Self { + self.cmd = self.cmd.arg("--llvm-bolt-profile-use").arg(profile.0.as_str()); + self + } + + /// Do not rebuild rustc, and use a previously built rustc sysroot instead. + pub fn avoid_rustc_rebuild(mut self) -> Self { + self.cmd = self.cmd.arg("--keep-stage").arg("0").arg("--keep-stage").arg("1"); + self + } + + pub fn run(self, timer: &mut TimerSection) -> anyhow::Result<()> { + self.cmd.run()?; + let metrics = load_metrics(&self.metrics_path)?; + record_metrics(&metrics, timer); + Ok(()) + } +} diff --git a/src/tools/opt-dist/src/main.rs b/src/tools/opt-dist/src/main.rs new file mode 100644 index 00000000000..39fa7e1fb2a --- /dev/null +++ b/src/tools/opt-dist/src/main.rs @@ -0,0 +1,175 @@ +use anyhow::Context; +use log::LevelFilter; + +use crate::environment::{create_environment, Environment}; +use crate::exec::Bootstrap; +use crate::tests::run_tests; +use crate::timer::Timer; +use crate::training::{gather_llvm_bolt_profiles, gather_llvm_profiles, gather_rustc_profiles}; +use crate::utils::io::reset_directory; +use crate::utils::{clear_llvm_files, format_env_variables, print_free_disk_space}; + +mod environment; +mod exec; +mod metrics; +mod tests; +mod timer; +mod training; +mod utils; + +fn is_try_build() -> bool { + std::env::var("DIST_TRY_BUILD").unwrap_or_else(|_| "0".to_string()) != "0" +} + +fn execute_pipeline( + env: &dyn Environment, + timer: &mut Timer, + dist_args: Vec, +) -> anyhow::Result<()> { + reset_directory(&env.opt_artifacts())?; + env.prepare_rustc_perf()?; + + // Stage 1: Build PGO instrumented rustc + // We use a normal build of LLVM, because gathering PGO profiles for LLVM and `rustc` at the + // same time can cause issues, because the host and in-tree LLVM versions can diverge. + let rustc_pgo_profile = timer.section("Stage 1 (Rustc PGO)", |stage| { + let rustc_profile_dir_root = env.opt_artifacts().join("rustc-pgo"); + + stage.section("Build PGO instrumented rustc and LLVM", |section| { + Bootstrap::build(env).rustc_pgo_instrument(&rustc_profile_dir_root).run(section) + })?; + + let profile = stage + .section("Gather profiles", |_| gather_rustc_profiles(env, &rustc_profile_dir_root))?; + print_free_disk_space()?; + + stage.section("Build PGO optimized rustc", |section| { + Bootstrap::build(env).rustc_pgo_optimize(&profile).run(section) + })?; + + Ok(profile) + })?; + + // Stage 2: Gather LLVM PGO profiles + // Here we build a PGO instrumented LLVM, reusing the previously PGO optimized rustc. + // Then we use the instrumented LLVM to gather LLVM PGO profiles. + let llvm_pgo_profile = timer.section("Stage 2 (LLVM PGO)", |stage| { + // Remove the previous, uninstrumented build of LLVM. + clear_llvm_files(env)?; + + let llvm_profile_dir_root = env.opt_artifacts().join("llvm-pgo"); + + stage.section("Build PGO instrumented LLVM", |section| { + Bootstrap::build(env) + .llvm_pgo_instrument(&llvm_profile_dir_root) + .avoid_rustc_rebuild() + .run(section) + })?; + + let profile = stage + .section("Gather profiles", |_| gather_llvm_profiles(env, &llvm_profile_dir_root))?; + + print_free_disk_space()?; + + // Proactively delete the instrumented artifacts, to avoid using them by accident in + // follow-up stages. + clear_llvm_files(env)?; + + Ok(profile) + })?; + + let llvm_bolt_profile = if env.supports_bolt() { + // Stage 3: Build BOLT instrumented LLVM + // We build a PGO optimized LLVM in this step, then instrument it with BOLT and gather BOLT profiles. + // Note that we don't remove LLVM artifacts after this step, so that they are reused in the final dist build. + // BOLT instrumentation is performed "on-the-fly" when the LLVM library is copied to the sysroot of rustc, + // therefore the LLVM artifacts on disk are not "tainted" with BOLT instrumentation and they can be reused. + timer.section("Stage 3 (LLVM BOLT)", |stage| { + stage.section("Build BOLT instrumented LLVM", |stage| { + Bootstrap::build(env) + .llvm_bolt_instrument() + .llvm_pgo_optimize(&llvm_pgo_profile) + .avoid_rustc_rebuild() + .run(stage) + })?; + + let profile = stage.section("Gather profiles", |_| gather_llvm_bolt_profiles(env))?; + print_free_disk_space()?; + + // LLVM is not being cleared here, we want to reuse the previous PGO-optimized build + + Ok(Some(profile)) + })? + } else { + None + }; + + let mut dist = Bootstrap::dist(env, &dist_args) + .llvm_pgo_optimize(&llvm_pgo_profile) + .rustc_pgo_optimize(&rustc_pgo_profile) + .avoid_rustc_rebuild(); + + if let Some(llvm_bolt_profile) = llvm_bolt_profile { + dist = dist.llvm_bolt_optimize(&llvm_bolt_profile); + } + + // Final stage: Assemble the dist artifacts + // The previous PGO optimized rustc build and PGO optimized LLVM builds should be reused. + timer.section("Stage 4 (final build)", |stage| dist.run(stage))?; + + // After dist has finished, run a subset of the test suite on the optimized artifacts to discover + // possible regressions. + // The tests are not executed for try builds, which can be in various broken states, so we don't + // want to gatekeep them with tests. + if !is_try_build() { + timer.section("Run tests", |_| run_tests(env))?; + } + + Ok(()) +} + +fn main() -> anyhow::Result<()> { + // Make sure that we get backtraces for easier debugging in CI + std::env::set_var("RUST_BACKTRACE", "1"); + + env_logger::builder() + .filter_level(LevelFilter::Info) + .format_timestamp_millis() + .parse_default_env() + .init(); + + let mut build_args: Vec = std::env::args().skip(1).collect(); + log::info!("Running optimized build pipeline with args `{}`", build_args.join(" ")); + log::info!("Environment values\n{}", format_env_variables()); + + if let Ok(config) = std::fs::read_to_string("config.toml") { + log::info!("Contents of `config.toml`:\n{config}"); + } + + // Skip components that are not needed for try builds to speed them up + if is_try_build() { + log::info!("Skipping building of unimportant components for a try build"); + for target in [ + "rust-docs", + "rustc-docs", + "rust-docs-json", + "rust-analyzer", + "rustc-src", + "clippy", + "miri", + "rustfmt", + ] { + build_args.extend(["--exclude".to_string(), target.to_string()]); + } + } + + let mut timer = Timer::new(); + let env = create_environment(); + + let result = execute_pipeline(env.as_ref(), &mut timer, build_args); + log::info!("Timer results\n{}", timer.format_stats()); + + print_free_disk_space()?; + + result.context("Optimized build pipeline has failed") +} diff --git a/src/tools/opt-dist/src/metrics.rs b/src/tools/opt-dist/src/metrics.rs new file mode 100644 index 00000000000..cabe07eda32 --- /dev/null +++ b/src/tools/opt-dist/src/metrics.rs @@ -0,0 +1,106 @@ +use crate::timer::TimerSection; +use build_helper::metrics::{JsonNode, JsonRoot}; +use camino::Utf8Path; +use std::time::Duration; + +#[derive(Clone, Debug)] +pub struct BuildStep { + r#type: String, + children: Vec, + duration: Duration, +} + +impl BuildStep { + pub fn find_all_by_type(&self, r#type: &str) -> Vec<&BuildStep> { + let mut result = Vec::new(); + self.find_by_type(r#type, &mut result); + result + } + fn find_by_type<'a>(&'a self, r#type: &str, result: &mut Vec<&'a BuildStep>) { + if self.r#type == r#type { + result.push(self); + } + for child in &self.children { + child.find_by_type(r#type, result); + } + } +} + +/// Loads the metrics of the most recent bootstrap execution from a metrics.json file. +pub fn load_metrics(path: &Utf8Path) -> anyhow::Result { + let content = std::fs::read(path.as_std_path())?; + let mut metrics = serde_json::from_slice::(&content)?; + let invocation = metrics + .invocations + .pop() + .ok_or_else(|| anyhow::anyhow!("No bootstrap invocation found in metrics file"))?; + + fn parse(node: JsonNode) -> Option { + match node { + JsonNode::RustbuildStep { + type_: kind, + children, + duration_excluding_children_sec, + .. + } => { + let children: Vec<_> = children.into_iter().filter_map(parse).collect(); + let children_duration = children.iter().map(|c| c.duration).sum::(); + Some(BuildStep { + r#type: kind.to_string(), + children, + duration: children_duration + + Duration::from_secs_f64(duration_excluding_children_sec), + }) + } + JsonNode::TestSuite(_) => None, + } + } + + let duration = Duration::from_secs_f64(invocation.duration_including_children_sec); + let children: Vec<_> = invocation.children.into_iter().filter_map(parse).collect(); + Ok(BuildStep { r#type: "root".to_string(), children, duration }) +} + +/// Logs the individual metrics in a table and add Rustc and LLVM durations to the passed +/// timer. +pub fn record_metrics(metrics: &BuildStep, timer: &mut TimerSection) { + let llvm_steps = metrics.find_all_by_type("bootstrap::llvm::Llvm"); + let llvm_duration: Duration = llvm_steps.into_iter().map(|s| s.duration).sum(); + + let rustc_steps = metrics.find_all_by_type("bootstrap::compile::Rustc"); + let rustc_duration: Duration = rustc_steps.into_iter().map(|s| s.duration).sum(); + + // The LLVM step is part of the Rustc step + let rustc_duration = rustc_duration.saturating_sub(llvm_duration); + + if !llvm_duration.is_zero() { + timer.add_duration("LLVM", llvm_duration); + } + if !rustc_duration.is_zero() { + timer.add_duration("Rustc", rustc_duration); + } + + log_metrics(metrics); +} + +fn log_metrics(metrics: &BuildStep) { + use std::fmt::Write; + + let mut substeps: Vec<(u32, &BuildStep)> = Vec::new(); + + fn visit<'a>(step: &'a BuildStep, level: u32, substeps: &mut Vec<(u32, &'a BuildStep)>) { + substeps.push((level, step)); + for child in &step.children { + visit(child, level + 1, substeps); + } + } + + visit(metrics, 0, &mut substeps); + + let mut output = String::new(); + for (level, step) in substeps { + let label = format!("{}{}", ".".repeat(level as usize), step.r#type); + writeln!(output, "{label:<65}{:>8.2}s", step.duration.as_secs_f64()).unwrap(); + } + log::info!("Build step durations\n{output}"); +} diff --git a/src/tools/opt-dist/src/tests.rs b/src/tools/opt-dist/src/tests.rs new file mode 100644 index 00000000000..6b35b13e586 --- /dev/null +++ b/src/tools/opt-dist/src/tests.rs @@ -0,0 +1,101 @@ +use crate::environment::Environment; +use crate::exec::cmd; +use crate::utils::io::{copy_directory, unpack_archive}; +use anyhow::Context; +use camino::Utf8PathBuf; + +/// Run tests on optimized dist artifacts. +pub fn run_tests(env: &dyn Environment) -> anyhow::Result<()> { + // After `dist` is executed, we extract its archived components into a sysroot directory, + // and then use that extracted rustc as a stage0 compiler. + // Then we run a subset of tests using that compiler, to have a basic smoke test which checks + // whether the optimization pipeline hasn't broken something. + let build_dir = env.build_root().join("build"); + let dist_dir = build_dir.join("dist"); + let unpacked_dist_dir = build_dir.join("unpacked-dist"); + std::fs::create_dir_all(&unpacked_dist_dir)?; + + let extract_dist_dir = |name: &str| -> anyhow::Result { + unpack_archive(&dist_dir.join(format!("{name}.tar.xz")), &unpacked_dist_dir)?; + let extracted_path = unpacked_dist_dir.join(name); + assert!(extracted_path.is_dir()); + Ok(extracted_path) + }; + let host_triple = env.host_triple(); + + // Extract rustc, libstd, cargo and src archives to create the optimized sysroot + let rustc_dir = extract_dist_dir(&format!("rustc-nightly-{host_triple}"))?.join("rustc"); + let libstd_dir = extract_dist_dir(&format!("rust-std-nightly-{host_triple}"))? + .join(format!("rust-std-{host_triple}")); + let cargo_dir = extract_dist_dir(&format!("cargo-nightly-{host_triple}"))?.join("cargo"); + let extracted_src_dir = extract_dist_dir("rust-src-nightly")?.join("rust-src"); + + // We need to manually copy libstd to the extracted rustc sysroot + copy_directory( + &libstd_dir.join("lib").join("rustlib").join(&host_triple).join("lib"), + &rustc_dir.join("lib").join("rustlib").join(&host_triple).join("lib"), + )?; + + // Extract sources - they aren't in the `rustc-nightly-{host}` tarball, so we need to manually copy libstd + // sources to the extracted sysroot. We need sources available so that `-Zsimulate-remapped-rust-src-base` + // works correctly. + copy_directory( + &extracted_src_dir.join("lib").join("rustlib").join("src"), + &rustc_dir.join("lib").join("rustlib").join("src"), + )?; + + let rustc_path = rustc_dir.join("bin").join(format!("rustc{}", env.executable_extension())); + assert!(rustc_path.is_file()); + let cargo_path = cargo_dir.join("bin").join(format!("cargo{}", env.executable_extension())); + assert!(cargo_path.is_file()); + + // Specify path to a LLVM config so that LLVM is not rebuilt. + // It doesn't really matter which LLVM config we choose, because no sysroot will be compiled. + let llvm_config = env + .build_artifacts() + .join("llvm") + .join("bin") + .join(format!("llvm-config{}", env.executable_extension())); + assert!(llvm_config.is_file()); + + let config_content = format!( + r#"profile = "user" +changelog-seen = 2 + +[build] +rustc = "{rustc}" +cargo = "{cargo}" + +[target.{host_triple}] +llvm-config = "{llvm_config}" +"#, + rustc = rustc_path.to_string().replace('\\', "/"), + cargo = cargo_path.to_string().replace('\\', "/"), + llvm_config = llvm_config.to_string().replace('\\', "/") + ); + log::info!("Using following `config.toml` for running tests:\n{config_content}"); + + // Simulate a stage 0 compiler with the extracted optimized dist artifacts. + std::fs::write("config.toml", config_content)?; + + let x_py = env.checkout_path().join("x.py"); + let mut args = vec![ + env.python_binary(), + x_py.as_str(), + "test", + "--stage", + "0", + "tests/assembly", + "tests/codegen", + "tests/codegen-units", + "tests/incremental", + "tests/mir-opt", + "tests/pretty", + "tests/run-pass-valgrind", + "tests/ui", + ]; + for test_path in env.skipped_tests() { + args.extend(["--exclude", test_path]); + } + cmd(&args).env("COMPILETEST_FORCE_STAGE0", "1").run().context("Cannot execute tests") +} diff --git a/src/tools/opt-dist/src/timer.rs b/src/tools/opt-dist/src/timer.rs new file mode 100644 index 00000000000..2b29ba8d59f --- /dev/null +++ b/src/tools/opt-dist/src/timer.rs @@ -0,0 +1,167 @@ +use std::ops::{Deref, DerefMut}; +use std::time::{Duration, SystemTime}; + +pub struct Timer { + root: TimerSection, +} + +impl Timer { + pub fn new() -> Self { + Timer { root: TimerSection::new(None) } + } + + pub fn format_stats(&self) -> String { + use std::fmt::Write; + + let mut items = Vec::new(); + for (name, child) in &self.root.children { + match child { + SectionEntry::SubSection(section) => { + section.collect_levels(0, name, &mut items); + } + SectionEntry::Duration(duration) => items.push((0, name, *duration)), + } + } + + let rows: Vec<(String, Duration)> = items + .into_iter() + .map(|(level, name, duration)| (format!("{}{name}:", " ".repeat(level)), duration)) + .collect(); + + let total_duration = self.total_duration(); + let total_duration_label = "Total duration:".to_string(); + + const SPACE_AFTER_LABEL: usize = 2; + let max_label_length = 16.max(rows.iter().map(|(label, _)| label.len()).max().unwrap_or(0)) + + SPACE_AFTER_LABEL; + + let table_width = max_label_length + 23; + let divider = "-".repeat(table_width); + + let mut output = String::new(); + writeln!(output, "{divider}").unwrap(); + for (label, duration) in rows { + let pct = (duration.as_millis() as f64 / total_duration.as_millis() as f64) * 100.0; + let duration_fmt = format!("{:>12.2}s ({pct:>5.2}%)", duration.as_secs_f64()); + writeln!(output, "{label:<0$} {duration_fmt}", max_label_length).unwrap(); + } + output.push('\n'); + + let total_duration = Duration::new(total_duration.as_secs(), 0); + let total_duration = format!( + "{:>1$}", + humantime::format_duration(total_duration).to_string(), + table_width - total_duration_label.len() + ); + writeln!(output, "{total_duration_label}{total_duration}").unwrap(); + + writeln!(output, "{divider}").unwrap(); + output + } +} + +impl Deref for Timer { + type Target = TimerSection; + + fn deref(&self) -> &Self::Target { + &self.root + } +} + +impl DerefMut for Timer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.root + } +} + +pub struct TimerSection { + name: Option, + children: Vec<(String, SectionEntry)>, + duration_excluding_children: Duration, +} + +impl TimerSection { + pub fn new(name: Option) -> Self { + TimerSection { + name, + children: Default::default(), + duration_excluding_children: Duration::ZERO, + } + } + + pub fn section anyhow::Result, R>( + &mut self, + name: &str, + func: F, + ) -> anyhow::Result { + let full_name = match &self.name { + Some(current_name) => { + format!("{current_name} > {name}") + } + None => name.to_string(), + }; + log::info!("Section `{full_name}` starts"); + let mut child = TimerSection { + name: Some(full_name.clone()), + children: Default::default(), + duration_excluding_children: Duration::ZERO, + }; + + let start = SystemTime::now(); + let result = func(&mut child); + let duration = start.elapsed().unwrap(); + + let msg = match result { + Ok(_) => "OK", + Err(_) => "FAIL", + }; + + child.duration_excluding_children = duration.saturating_sub(child.total_duration()); + + log::info!("Section `{full_name}` ended: {msg} ({:.2}s)`", duration.as_secs_f64()); + self.children.push((name.to_string(), SectionEntry::SubSection(child))); + result + } + + pub fn add_duration(&mut self, name: &str, duration: Duration) { + self.children.push((name.to_string(), SectionEntry::Duration(duration))); + } + + fn total_duration(&self) -> Duration { + self.duration_excluding_children + + self.children.iter().map(|(_, child)| child.total_duration()).sum::() + } + + fn collect_levels<'a>( + &'a self, + level: usize, + name: &'a str, + items: &mut Vec<(usize, &'a str, Duration)>, + ) { + items.push((level, name, self.total_duration())); + for (name, child) in &self.children { + match &child { + SectionEntry::Duration(duration) => { + items.push((level + 1, name, *duration)); + } + SectionEntry::SubSection(section) => { + section.collect_levels(level + 1, name, items); + } + } + } + } +} + +enum SectionEntry { + Duration(Duration), + SubSection(TimerSection), +} + +impl SectionEntry { + fn total_duration(&self) -> Duration { + match self { + SectionEntry::Duration(duration) => *duration, + SectionEntry::SubSection(timer) => timer.total_duration(), + } + } +} diff --git a/src/tools/opt-dist/src/training.rs b/src/tools/opt-dist/src/training.rs new file mode 100644 index 00000000000..10f4a603695 --- /dev/null +++ b/src/tools/opt-dist/src/training.rs @@ -0,0 +1,202 @@ +use crate::environment::Environment; +use crate::exec::{cmd, CmdBuilder}; +use crate::utils::io::{count_files, delete_directory}; +use anyhow::Context; +use camino::{Utf8Path, Utf8PathBuf}; +use humansize::BINARY; + +const LLVM_PGO_CRATES: &[&str] = &[ + "syn-1.0.89", + "cargo-0.60.0", + "serde-1.0.136", + "ripgrep-13.0.0", + "regex-1.5.5", + "clap-3.1.6", + "hyper-0.14.18", +]; + +const RUSTC_PGO_CRATES: &[&str] = &[ + "externs", + "ctfe-stress-5", + "cargo-0.60.0", + "token-stream-stress", + "match-stress", + "tuple-stress", + "diesel-1.4.8", + "bitmaps-3.1.0", +]; + +const LLVM_BOLT_CRATES: &[&str] = LLVM_PGO_CRATES; + +fn init_compiler_benchmarks( + env: &dyn Environment, + profiles: &[&str], + scenarios: &[&str], + crates: &[&str], +) -> CmdBuilder { + // Run rustc-perf benchmarks + // Benchmark using profile_local with eprintln, which essentially just means + // don't actually benchmark -- just make sure we run rustc a bunch of times. + cmd(&[ + env.cargo_stage_0().as_str(), + "run", + "-p", + "collector", + "--bin", + "collector", + "--", + "profile_local", + "eprintln", + env.rustc_stage_2().as_str(), + "--id", + "Test", + "--cargo", + env.cargo_stage_0().as_str(), + "--profiles", + profiles.join(",").as_str(), + "--scenarios", + scenarios.join(",").as_str(), + "--include", + crates.join(",").as_str(), + ]) + .env("RUST_LOG", "collector=debug") + .env("RUSTC", env.rustc_stage_0().as_str()) + .env("RUSTC_BOOTSTRAP", "1") + .workdir(&env.rustc_perf_dir()) +} + +fn merge_llvm_profiles( + env: &dyn Environment, + merged_path: &Utf8Path, + profile_dir: &Utf8Path, +) -> anyhow::Result<()> { + cmd(&[ + env.downloaded_llvm_dir().join("bin/llvm-profdata").as_str(), + "merge", + "-o", + merged_path.as_str(), + profile_dir.as_str(), + ]) + .run() + .context("Cannot merge LLVM profiles")?; + Ok(()) +} + +fn log_profile_stats( + name: &str, + merged_profile: &Utf8Path, + profile_root: &Utf8Path, +) -> anyhow::Result<()> { + log::info!("{name} PGO statistics"); + log::info!( + "{merged_profile}: {}", + humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY) + ); + log::info!( + "{profile_root}: {}", + humansize::format_size(fs_extra::dir::get_size(profile_root.as_std_path())?, BINARY) + ); + log::info!("Profile file count: {}", count_files(profile_root)?); + Ok(()) +} + +pub struct LlvmPGOProfile(pub Utf8PathBuf); + +pub fn gather_llvm_profiles( + env: &dyn Environment, + profile_root: &Utf8Path, +) -> anyhow::Result { + log::info!("Running benchmarks with PGO instrumented LLVM"); + + init_compiler_benchmarks(env, &["Debug", "Opt"], &["Full"], LLVM_PGO_CRATES) + .run() + .context("Cannot gather LLVM PGO profiles")?; + + let merged_profile = env.opt_artifacts().join("llvm-pgo.profdata"); + log::info!("Merging LLVM PGO profiles to {merged_profile}"); + + merge_llvm_profiles(env, &merged_profile, profile_root)?; + log_profile_stats("LLVM", &merged_profile, profile_root)?; + + // We don't need the individual .profraw files now that they have been merged + // into a final .profdata + delete_directory(profile_root)?; + + Ok(LlvmPGOProfile(merged_profile)) +} + +pub struct RustcPGOProfile(pub Utf8PathBuf); + +pub fn gather_rustc_profiles( + env: &dyn Environment, + profile_root: &Utf8Path, +) -> anyhow::Result { + log::info!("Running benchmarks with PGO instrumented rustc"); + + // The profile data is written into a single filepath that is being repeatedly merged when each + // rustc invocation ends. Empirically, this can result in some profiling data being lost. That's + // why we override the profile path to include the PID. This will produce many more profiling + // files, but the resulting profile will produce a slightly faster rustc binary. + let profile_template = profile_root.join("default_%m_%p.profraw"); + + // Here we're profiling the `rustc` frontend, so we also include `Check`. + // The benchmark set includes various stress tests that put the frontend under pressure. + init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["All"], RUSTC_PGO_CRATES) + .env("LLVM_PROFILE_FILE", profile_template.as_str()) + .run() + .context("Cannot gather rustc PGO profiles")?; + + let merged_profile = env.opt_artifacts().join("rustc-pgo.profdata"); + log::info!("Merging Rustc PGO profiles to {merged_profile}"); + + merge_llvm_profiles(env, &merged_profile, profile_root)?; + log_profile_stats("Rustc", &merged_profile, profile_root)?; + + // We don't need the individual .profraw files now that they have been merged + // into a final .profdata + delete_directory(profile_root)?; + + Ok(RustcPGOProfile(merged_profile)) +} + +pub struct LlvmBoltProfile(pub Utf8PathBuf); + +pub fn gather_llvm_bolt_profiles(env: &dyn Environment) -> anyhow::Result { + log::info!("Running benchmarks with BOLT instrumented LLVM"); + + init_compiler_benchmarks(env, &["Check", "Debug", "Opt"], &["Full"], LLVM_BOLT_CRATES) + .run() + .context("Cannot gather LLVM BOLT profiles")?; + + let merged_profile = env.opt_artifacts().join("bolt.profdata"); + let profile_root = Utf8PathBuf::from("/tmp/prof.fdata"); + log::info!("Merging LLVM BOLT profiles to {merged_profile}"); + + let profiles: Vec<_> = + glob::glob(&format!("{profile_root}*"))?.into_iter().collect::, _>>()?; + + let mut merge_args = vec!["merge-fdata"]; + merge_args.extend(profiles.iter().map(|p| p.to_str().unwrap())); + + cmd(&merge_args) + .redirect_output(merged_profile.clone()) + .run() + .context("Cannot merge BOLT profiles")?; + + log::info!("LLVM BOLT statistics"); + log::info!( + "{merged_profile}: {}", + humansize::format_size(std::fs::metadata(merged_profile.as_std_path())?.len(), BINARY) + ); + + let size = profiles + .iter() + .map(|p| std::fs::metadata(p).map(|metadata| metadata.len())) + .collect::, _>>()? + .into_iter() + .sum::(); + log::info!("{profile_root}: {}", humansize::format_size(size, BINARY)); + log::info!("Profile file count: {}", profiles.len()); + + Ok(LlvmBoltProfile(merged_profile)) +} diff --git a/src/tools/opt-dist/src/utils/io.rs b/src/tools/opt-dist/src/utils/io.rs new file mode 100644 index 00000000000..43546e5fcfa --- /dev/null +++ b/src/tools/opt-dist/src/utils/io.rs @@ -0,0 +1,48 @@ +use anyhow::Context; +use camino::Utf8Path; +use fs_extra::dir::CopyOptions; +use std::fs::File; + +/// Delete and re-create the directory. +pub fn reset_directory(path: &Utf8Path) -> anyhow::Result<()> { + log::info!("Resetting directory {path}"); + let _ = std::fs::remove_dir(path); + std::fs::create_dir_all(path)?; + Ok(()) +} + +pub fn copy_directory(src: &Utf8Path, dst: &Utf8Path) -> anyhow::Result<()> { + log::info!("Copying directory {src} to {dst}"); + fs_extra::dir::copy(src, dst, &CopyOptions::default().copy_inside(true))?; + Ok(()) +} + +#[allow(unused)] +pub fn move_directory(src: &Utf8Path, dst: &Utf8Path) -> anyhow::Result<()> { + log::info!("Moving directory {src} to {dst}"); + fs_extra::dir::move_dir(src, dst, &CopyOptions::default().content_only(true))?; + Ok(()) +} + +/// Counts all children of a directory (non-recursively). +pub fn count_files(dir: &Utf8Path) -> anyhow::Result { + Ok(std::fs::read_dir(dir)?.count() as u64) +} + +pub fn delete_directory(path: &Utf8Path) -> anyhow::Result<()> { + log::info!("Deleting directory `{path}`"); + std::fs::remove_dir_all(path.as_std_path()) + .context(format!("Cannot remove directory {path}"))?; + Ok(()) +} + +pub fn unpack_archive(path: &Utf8Path, dest_dir: &Utf8Path) -> anyhow::Result<()> { + log::info!("Unpacking directory `{path}` into `{dest_dir}`"); + + assert!(path.as_str().ends_with(".tar.xz")); + let file = File::open(path.as_std_path())?; + let file = xz::read::XzDecoder::new(file); + let mut archive = tar::Archive::new(file); + archive.unpack(dest_dir.as_std_path())?; + Ok(()) +} diff --git a/src/tools/opt-dist/src/utils/mod.rs b/src/tools/opt-dist/src/utils/mod.rs new file mode 100644 index 00000000000..9305d4989aa --- /dev/null +++ b/src/tools/opt-dist/src/utils/mod.rs @@ -0,0 +1,36 @@ +pub mod io; + +use crate::environment::Environment; +use crate::utils::io::delete_directory; +use humansize::BINARY; +use sysinfo::{DiskExt, RefreshKind, System, SystemExt}; + +pub fn format_env_variables() -> String { + let vars = std::env::vars().map(|(key, value)| format!("{key}={value}")).collect::>(); + vars.join("\n") +} + +pub fn print_free_disk_space() -> anyhow::Result<()> { + let sys = System::new_with_specifics(RefreshKind::default().with_disks_list().with_disks()); + let available_space: u64 = sys.disks().iter().map(|d| d.available_space()).sum(); + let total_space: u64 = sys.disks().iter().map(|d| d.total_space()).sum(); + let used_space = total_space - available_space; + + log::info!( + "Free disk space: {} out of total {} ({:.2}% used)", + humansize::format_size(available_space, BINARY), + humansize::format_size(total_space, BINARY), + (used_space as f64 / total_space as f64) * 100.0 + ); + Ok(()) +} + +pub fn clear_llvm_files(env: &dyn Environment) -> anyhow::Result<()> { + // Bootstrap currently doesn't support rebuilding LLVM when PGO options + // change (or any other llvm-related options); so just clear out the relevant + // directories ourselves. + log::info!("Clearing LLVM build files"); + delete_directory(&env.build_artifacts().join("llvm"))?; + delete_directory(&env.build_artifacts().join("lld"))?; + Ok(()) +} diff --git a/src/tools/tidy/src/deps.rs b/src/tools/tidy/src/deps.rs index ecc84c1618c..1dea956c117 100644 --- a/src/tools/tidy/src/deps.rs +++ b/src/tools/tidy/src/deps.rs @@ -40,10 +40,12 @@ const EXCEPTIONS: &[(&str, &str)] = &[ ("ar_archive_writer", "Apache-2.0 WITH LLVM-exception"), // rustc ("colored", "MPL-2.0"), // rustfmt ("dissimilar", "Apache-2.0"), // rustdoc, rustc_lexer (few tests) via expect-test, (dev deps) + ("encoding_rs", "(Apache-2.0 OR MIT) AND BSD-3-Clause"), // opt-dist ("fluent-langneg", "Apache-2.0"), // rustc (fluent translations) ("fortanix-sgx-abi", "MPL-2.0"), // libstd but only for `sgx` target. FIXME: this dependency violates the documentation comment above. ("instant", "BSD-3-Clause"), // rustc_driver/tracing-subscriber/parking_lot ("mdbook", "MPL-2.0"), // mdbook + ("openssl", "Apache-2.0"), // opt-dist ("ryu", "Apache-2.0 OR BSL-1.0"), // cargo/... (because of serde) ("self_cell", "Apache-2.0"), // rustc (fluent translations) ("snap", "BSD-3-Clause"), // rustc