From e419b3d1ec234d428a00c9d6a8527ec4f6dc7e1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E8=AE=B8=E6=9D=B0=E5=8F=8B=20Jieyou=20Xu=20=28Joe=29?=
 <39484203+jieyouxu@users.noreply.github.com>
Date: Tue, 29 Oct 2024 16:10:05 +0800
Subject: [PATCH] compiletest: improve robustness of LLVM version handling

---
 Cargo.lock                                |   1 +
 src/tools/compiletest/Cargo.toml          |   1 +
 src/tools/compiletest/src/common.rs       |   3 +-
 src/tools/compiletest/src/header.rs       | 112 ++++++++++++++--------
 src/tools/compiletest/src/header/tests.rs |  78 ++++++++++++---
 src/tools/compiletest/src/lib.rs          |   2 +-
 src/tools/compiletest/src/tests.rs        |  13 ---
 7 files changed, 140 insertions(+), 70 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 22b3e8280d6..fc16ccf0a67 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -720,6 +720,7 @@ dependencies = [
  "miropt-test-tools",
  "regex",
  "rustfix",
+ "semver",
  "serde",
  "serde_json",
  "tracing",
diff --git a/src/tools/compiletest/Cargo.toml b/src/tools/compiletest/Cargo.toml
index b6a89dd49e6..cef6b525a7b 100644
--- a/src/tools/compiletest/Cargo.toml
+++ b/src/tools/compiletest/Cargo.toml
@@ -18,6 +18,7 @@ build_helper = { path = "../build_helper" }
 tracing = "0.1"
 tracing-subscriber = { version = "0.3.3", default-features = false, features = ["fmt", "env-filter", "smallvec", "parking_lot", "ansi"] }
 regex = "1.0"
+semver = { version = "1.0.23", features = ["serde"] }
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
 rustfix = "0.8.1"
diff --git a/src/tools/compiletest/src/common.rs b/src/tools/compiletest/src/common.rs
index 69ac4644941..e82b88eef79 100644
--- a/src/tools/compiletest/src/common.rs
+++ b/src/tools/compiletest/src/common.rs
@@ -7,6 +7,7 @@ use std::sync::OnceLock;
 use std::{fmt, iter};
 
 use build_helper::git::GitConfig;
+use semver::Version;
 use serde::de::{Deserialize, Deserializer, Error as _};
 use test::{ColorConfig, OutputFormat};
 
@@ -298,7 +299,7 @@ pub struct Config {
     pub lldb_version: Option<u32>,
 
     /// Version of LLVM
-    pub llvm_version: Option<u32>,
+    pub llvm_version: Option<Version>,
 
     /// Is LLVM a system LLVM
     pub system_llvm: bool,
diff --git a/src/tools/compiletest/src/header.rs b/src/tools/compiletest/src/header.rs
index a3bf36c0e90..bfcdd747eb4 100644
--- a/src/tools/compiletest/src/header.rs
+++ b/src/tools/compiletest/src/header.rs
@@ -6,6 +6,7 @@ use std::io::prelude::*;
 use std::path::{Path, PathBuf};
 use std::process::Command;
 
+use semver::Version;
 use tracing::*;
 
 use crate::common::{Config, Debugger, FailMode, Mode, PassMode};
@@ -1113,26 +1114,39 @@ fn parse_normalize_rule(header: &str) -> Option<(String, String)> {
     Some((regex, replacement))
 }
 
-pub fn extract_llvm_version(version: &str) -> Option<u32> {
-    let pat = |c: char| !c.is_ascii_digit() && c != '.';
-    let version_without_suffix = match version.find(pat) {
-        Some(pos) => &version[..pos],
+/// Given an llvm version string that looks like `1.2.3-rc1`, extract as semver. Note that this
+/// accepts more than just strict `semver` syntax (as in `major.minor.patch`); this permits omitting
+/// minor and patch version components so users can write e.g. `//@ min-llvm-version: 19` instead of
+/// having to write `//@ min-llvm-version: 19.0.0`.
+///
+/// Currently panics if the input string is malformed, though we really should not use panic as an
+/// error handling strategy.
+///
+/// FIXME(jieyouxu): improve error handling
+pub fn extract_llvm_version(version: &str) -> Version {
+    // The version substring we're interested in usually looks like the `1.2.3`, without any of the
+    // fancy suffix like `-rc1` or `meow`.
+    let version = version.trim();
+    let uninterested = |c: char| !c.is_ascii_digit() && c != '.';
+    let version_without_suffix = match version.split_once(uninterested) {
+        Some((prefix, _suffix)) => prefix,
         None => version,
     };
-    let components: Vec<u32> = version_without_suffix
+
+    let components: Vec<u64> = version_without_suffix
         .split('.')
-        .map(|s| s.parse().expect("Malformed version component"))
+        .map(|s| s.parse().expect("llvm version component should consist of only digits"))
         .collect();
-    let version = match *components {
-        [a] => a * 10_000,
-        [a, b] => a * 10_000 + b * 100,
-        [a, b, c] => a * 10_000 + b * 100 + c,
-        _ => panic!("Malformed version"),
-    };
-    Some(version)
+
+    match &components[..] {
+        [major] => Version::new(*major, 0, 0),
+        [major, minor] => Version::new(*major, *minor, 0),
+        [major, minor, patch] => Version::new(*major, *minor, *patch),
+        _ => panic!("malformed llvm version string, expected only 1-3 components: {version}"),
+    }
 }
 
-pub fn extract_llvm_version_from_binary(binary_path: &str) -> Option<u32> {
+pub fn extract_llvm_version_from_binary(binary_path: &str) -> Option<Version> {
     let output = Command::new(binary_path).arg("--version").output().ok()?;
     if !output.status.success() {
         return None;
@@ -1140,7 +1154,7 @@ pub fn extract_llvm_version_from_binary(binary_path: &str) -> Option<u32> {
     let version = String::from_utf8(output.stdout).ok()?;
     for line in version.lines() {
         if let Some(version) = line.split("LLVM version ").nth(1) {
-            return extract_llvm_version(version);
+            return Some(extract_llvm_version(version));
         }
     }
     None
@@ -1247,15 +1261,17 @@ pub fn llvm_has_libzstd(config: &Config) -> bool {
     false
 }
 
-/// Takes a directive of the form `"<version1> [- <version2>]"`,
-/// returns the numeric representation of `<version1>` and `<version2>` as
-/// tuple: `(<version1> as u32, <version2> as u32)`.
+/// Takes a directive of the form `"<version1> [- <version2>]"`, returns the numeric representation
+/// of `<version1>` and `<version2>` as tuple: `(<version1>, <version2>)`.
 ///
-/// If the `<version2>` part is omitted, the second component of the tuple
-/// is the same as `<version1>`.
-fn extract_version_range<F>(line: &str, parse: F) -> Option<(u32, u32)>
+/// If the `<version2>` part is omitted, the second component of the tuple is the same as
+/// `<version1>`.
+fn extract_version_range<'a, F, VersionTy: Clone>(
+    line: &'a str,
+    parse: F,
+) -> Option<(VersionTy, VersionTy)>
 where
-    F: Fn(&str) -> Option<u32>,
+    F: Fn(&'a str) -> Option<VersionTy>,
 {
     let mut splits = line.splitn(2, "- ").map(str::trim);
     let min = splits.next().unwrap();
@@ -1273,7 +1289,7 @@ where
     let max = match max {
         Some("") => return None,
         Some(max) => parse(max)?,
-        _ => min,
+        _ => min.clone(),
     };
 
     Some((min, max))
@@ -1489,43 +1505,55 @@ fn ignore_llvm(config: &Config, line: &str) -> IgnoreDecision {
             };
         }
     }
-    if let Some(actual_version) = config.llvm_version {
-        if let Some(rest) = line.strip_prefix("min-llvm-version:").map(str::trim) {
-            let min_version = extract_llvm_version(rest).unwrap();
-            // Ignore if actual version is smaller the minimum required
-            // version
-            if actual_version < min_version {
+    if let Some(actual_version) = &config.llvm_version {
+        // Note that these `min` versions will check for not just major versions.
+
+        if let Some(version_string) = config.parse_name_value_directive(line, "min-llvm-version") {
+            let min_version = extract_llvm_version(&version_string);
+            // Ignore if actual version is smaller than the minimum required version.
+            if *actual_version < min_version {
                 return IgnoreDecision::Ignore {
-                    reason: format!("ignored when the LLVM version is older than {rest}"),
+                    reason: format!(
+                        "ignored when the LLVM version {actual_version} is older than {min_version}"
+                    ),
                 };
             }
-        } else if let Some(rest) = line.strip_prefix("min-system-llvm-version:").map(str::trim) {
-            let min_version = extract_llvm_version(rest).unwrap();
+        } else if let Some(version_string) =
+            config.parse_name_value_directive(line, "min-system-llvm-version")
+        {
+            let min_version = extract_llvm_version(&version_string);
             // Ignore if using system LLVM and actual version
             // is smaller the minimum required version
-            if config.system_llvm && actual_version < min_version {
+            if config.system_llvm && *actual_version < min_version {
                 return IgnoreDecision::Ignore {
-                    reason: format!("ignored when the system LLVM version is older than {rest}"),
+                    reason: format!(
+                        "ignored when the system LLVM version {actual_version} is older than {min_version}"
+                    ),
                 };
             }
-        } else if let Some(rest) = line.strip_prefix("ignore-llvm-version:").map(str::trim) {
+        } else if let Some(version_range) =
+            config.parse_name_value_directive(line, "ignore-llvm-version")
+        {
             // Syntax is: "ignore-llvm-version: <version1> [- <version2>]"
             let (v_min, v_max) =
-                extract_version_range(rest, extract_llvm_version).unwrap_or_else(|| {
-                    panic!("couldn't parse version range: {:?}", rest);
-                });
+                extract_version_range(&version_range, |s| Some(extract_llvm_version(s)))
+                    .unwrap_or_else(|| {
+                        panic!("couldn't parse version range: \"{version_range}\"");
+                    });
             if v_max < v_min {
-                panic!("Malformed LLVM version range: max < min")
+                panic!("malformed LLVM version range where {v_max} < {v_min}")
             }
             // Ignore if version lies inside of range.
-            if actual_version >= v_min && actual_version <= v_max {
+            if *actual_version >= v_min && *actual_version <= v_max {
                 if v_min == v_max {
                     return IgnoreDecision::Ignore {
-                        reason: format!("ignored when the LLVM version is {rest}"),
+                        reason: format!("ignored when the LLVM version is {actual_version}"),
                     };
                 } else {
                     return IgnoreDecision::Ignore {
-                        reason: format!("ignored when the LLVM version is between {rest}"),
+                        reason: format!(
+                            "ignored when the LLVM version is between {v_min} and {v_max}"
+                        ),
                     };
                 }
             }
diff --git a/src/tools/compiletest/src/header/tests.rs b/src/tools/compiletest/src/header/tests.rs
index c3c9496c4d2..2e6effcab98 100644
--- a/src/tools/compiletest/src/header/tests.rs
+++ b/src/tools/compiletest/src/header/tests.rs
@@ -1,9 +1,13 @@
 use std::io::Read;
 use std::path::Path;
 
-use super::iter_header;
+use semver::Version;
+
+use super::{
+    EarlyProps, HeadersCache, extract_llvm_version, extract_version_range, iter_header,
+    parse_normalize_rule,
+};
 use crate::common::{Config, Debugger, Mode};
-use crate::header::{EarlyProps, HeadersCache, parse_normalize_rule};
 
 fn make_test_description<R: Read>(
     config: &Config,
@@ -408,18 +412,66 @@ fn channel() {
 }
 
 #[test]
-fn test_extract_version_range() {
-    use super::{extract_llvm_version, extract_version_range};
+fn test_extract_llvm_version() {
+    // Note: officially, semver *requires* that versions at the minimum have all three
+    // `major.minor.patch` numbers, though for test-writer's convenience we allow omitting the minor
+    // and patch numbers (which will be stubbed out as 0).
+    assert_eq!(extract_llvm_version("0"), Version::new(0, 0, 0));
+    assert_eq!(extract_llvm_version("0.0"), Version::new(0, 0, 0));
+    assert_eq!(extract_llvm_version("0.0.0"), Version::new(0, 0, 0));
+    assert_eq!(extract_llvm_version("1"), Version::new(1, 0, 0));
+    assert_eq!(extract_llvm_version("1.2"), Version::new(1, 2, 0));
+    assert_eq!(extract_llvm_version("1.2.3"), Version::new(1, 2, 3));
+    assert_eq!(extract_llvm_version("4.5.6git"), Version::new(4, 5, 6));
+    assert_eq!(extract_llvm_version("4.5.6-rc1"), Version::new(4, 5, 6));
+    assert_eq!(extract_llvm_version("123.456.789-rc1"), Version::new(123, 456, 789));
+    assert_eq!(extract_llvm_version("8.1.2-rust"), Version::new(8, 1, 2));
+    assert_eq!(extract_llvm_version("9.0.1-rust-1.43.0-dev"), Version::new(9, 0, 1));
+    assert_eq!(extract_llvm_version("9.3.1-rust-1.43.0-dev"), Version::new(9, 3, 1));
+    assert_eq!(extract_llvm_version("10.0.0-rust"), Version::new(10, 0, 0));
+    assert_eq!(extract_llvm_version("11.1.0"), Version::new(11, 1, 0));
+    assert_eq!(extract_llvm_version("12.0.0libcxx"), Version::new(12, 0, 0));
+    assert_eq!(extract_llvm_version("12.0.0-rc3"), Version::new(12, 0, 0));
+    assert_eq!(extract_llvm_version("13.0.0git"), Version::new(13, 0, 0));
+}
 
-    assert_eq!(extract_version_range("1.2.3 - 4.5.6", extract_llvm_version), Some((10203, 40506)));
-    assert_eq!(extract_version_range("0   - 4.5.6", extract_llvm_version), Some((0, 40506)));
-    assert_eq!(extract_version_range("1.2.3 -", extract_llvm_version), None);
-    assert_eq!(extract_version_range("1.2.3 - ", extract_llvm_version), None);
-    assert_eq!(extract_version_range("- 4.5.6", extract_llvm_version), None);
-    assert_eq!(extract_version_range("-", extract_llvm_version), None);
-    assert_eq!(extract_version_range(" - 4.5.6", extract_llvm_version), None);
-    assert_eq!(extract_version_range("   - 4.5.6", extract_llvm_version), None);
-    assert_eq!(extract_version_range("0  -", extract_llvm_version), None);
+#[test]
+#[should_panic]
+fn test_llvm_version_invalid_components() {
+    extract_llvm_version("4.x.6");
+}
+
+#[test]
+#[should_panic]
+fn test_llvm_version_invalid_prefix() {
+    extract_llvm_version("meow4.5.6");
+}
+
+#[test]
+#[should_panic]
+fn test_llvm_version_too_many_components() {
+    extract_llvm_version("4.5.6.7");
+}
+
+#[test]
+fn test_extract_version_range() {
+    let wrapped_extract = |s: &str| Some(extract_llvm_version(s));
+
+    assert_eq!(
+        extract_version_range("1.2.3 - 4.5.6", wrapped_extract),
+        Some((Version::new(1, 2, 3), Version::new(4, 5, 6)))
+    );
+    assert_eq!(
+        extract_version_range("0   - 4.5.6", wrapped_extract),
+        Some((Version::new(0, 0, 0), Version::new(4, 5, 6)))
+    );
+    assert_eq!(extract_version_range("1.2.3 -", wrapped_extract), None);
+    assert_eq!(extract_version_range("1.2.3 - ", wrapped_extract), None);
+    assert_eq!(extract_version_range("- 4.5.6", wrapped_extract), None);
+    assert_eq!(extract_version_range("-", wrapped_extract), None);
+    assert_eq!(extract_version_range(" - 4.5.6", wrapped_extract), None);
+    assert_eq!(extract_version_range("   - 4.5.6", wrapped_extract), None);
+    assert_eq!(extract_version_range("0  -", wrapped_extract), None);
 }
 
 #[test]
diff --git a/src/tools/compiletest/src/lib.rs b/src/tools/compiletest/src/lib.rs
index 490df313228..ccf8057bf5c 100644
--- a/src/tools/compiletest/src/lib.rs
+++ b/src/tools/compiletest/src/lib.rs
@@ -228,7 +228,7 @@ pub fn parse_config(args: Vec<String>) -> Config {
         Some(x) => panic!("argument for --color must be auto, always, or never, but found `{}`", x),
     };
     let llvm_version =
-        matches.opt_str("llvm-version").as_deref().and_then(header::extract_llvm_version).or_else(
+        matches.opt_str("llvm-version").as_deref().map(header::extract_llvm_version).or_else(
             || header::extract_llvm_version_from_binary(&matches.opt_str("llvm-filecheck")?),
         );
 
diff --git a/src/tools/compiletest/src/tests.rs b/src/tools/compiletest/src/tests.rs
index 680579c59ae..fec746904de 100644
--- a/src/tools/compiletest/src/tests.rs
+++ b/src/tools/compiletest/src/tests.rs
@@ -1,7 +1,6 @@
 use std::ffi::OsString;
 
 use crate::debuggers::{extract_gdb_version, extract_lldb_version};
-use crate::header::extract_llvm_version;
 use crate::is_test;
 
 #[test]
@@ -67,15 +66,3 @@ fn is_test_test() {
     assert!(!is_test(&OsString::from("#a_dog_gif")));
     assert!(!is_test(&OsString::from("~a_temp_file")));
 }
-
-#[test]
-fn test_extract_llvm_version() {
-    assert_eq!(extract_llvm_version("8.1.2-rust"), Some(80102));
-    assert_eq!(extract_llvm_version("9.0.1-rust-1.43.0-dev"), Some(90001));
-    assert_eq!(extract_llvm_version("9.3.1-rust-1.43.0-dev"), Some(90301));
-    assert_eq!(extract_llvm_version("10.0.0-rust"), Some(100000));
-    assert_eq!(extract_llvm_version("11.1.0"), Some(110100));
-    assert_eq!(extract_llvm_version("12.0.0libcxx"), Some(120000));
-    assert_eq!(extract_llvm_version("12.0.0-rc3"), Some(120000));
-    assert_eq!(extract_llvm_version("13.0.0git"), Some(130000));
-}