2025-01-02 10:25:27 +00:00
|
|
|
#!/usr/bin/env python3
|
2025-01-14 07:46:20 +00:00
|
|
|
"""Utilities for CI.
|
2025-01-02 10:25:27 +00:00
|
|
|
|
|
|
|
This dynamically prepares a list of routines that had a source file change based on
|
|
|
|
git history.
|
|
|
|
"""
|
|
|
|
|
2025-01-14 07:46:20 +00:00
|
|
|
import json
|
2025-01-02 10:25:27 +00:00
|
|
|
import subprocess as sp
|
|
|
|
import sys
|
|
|
|
from dataclasses import dataclass
|
2025-01-16 07:31:34 +00:00
|
|
|
from glob import glob, iglob
|
2025-01-14 07:46:20 +00:00
|
|
|
from inspect import cleandoc
|
2025-01-02 10:25:27 +00:00
|
|
|
from os import getenv
|
|
|
|
from pathlib import Path
|
|
|
|
from typing import TypedDict
|
|
|
|
|
2025-01-14 07:46:20 +00:00
|
|
|
USAGE = cleandoc(
|
|
|
|
"""
|
|
|
|
usage:
|
|
|
|
|
2025-01-16 07:31:34 +00:00
|
|
|
./ci/ci-util.py <COMMAND> [flags]
|
2025-01-14 07:46:20 +00:00
|
|
|
|
2025-01-16 07:31:34 +00:00
|
|
|
COMMAND:
|
|
|
|
generate-matrix
|
|
|
|
Calculate a matrix of which functions had source change, print that as
|
|
|
|
a JSON object.
|
|
|
|
|
|
|
|
locate-baseline [--download] [--extract]
|
|
|
|
Locate the most recent benchmark baseline available in CI and, if flags
|
|
|
|
specify, download and extract it. Never exits with nonzero status if
|
|
|
|
downloading fails.
|
|
|
|
|
|
|
|
Note that `--extract` will overwrite files in `iai-home`.
|
|
|
|
|
|
|
|
check-regressions [iai-home]
|
|
|
|
Check `iai-home` (or `iai-home` if unspecified) for `summary.json`
|
|
|
|
files and see if there are any regressions. This is used as a workaround
|
|
|
|
for `iai-callgrind` not exiting with error status; see
|
|
|
|
<https://github.com/iai-callgrind/iai-callgrind/issues/337>.
|
2025-01-14 07:46:20 +00:00
|
|
|
"""
|
|
|
|
)
|
2025-01-02 10:25:27 +00:00
|
|
|
|
|
|
|
REPO_ROOT = Path(__file__).parent.parent
|
|
|
|
GIT = ["git", "-C", REPO_ROOT]
|
2025-01-16 07:31:34 +00:00
|
|
|
DEFAULT_BRANCH = "master"
|
|
|
|
WORKFLOW_NAME = "CI" # Workflow that generates the benchmark artifacts
|
|
|
|
ARTIFACT_GLOB = "baseline-icount*"
|
2025-01-02 10:25:27 +00:00
|
|
|
|
|
|
|
# Don't run exhaustive tests if these files change, even if they contaiin a function
|
|
|
|
# definition.
|
|
|
|
IGNORE_FILES = [
|
|
|
|
"src/math/support/",
|
|
|
|
"src/libm_helper.rs",
|
|
|
|
"src/math/arch/intrinsics.rs",
|
|
|
|
]
|
|
|
|
|
|
|
|
TYPES = ["f16", "f32", "f64", "f128"]
|
|
|
|
|
|
|
|
|
2025-01-16 07:31:34 +00:00
|
|
|
def eprint(*args, **kwargs):
|
|
|
|
"""Print to stderr."""
|
|
|
|
print(*args, file=sys.stderr, **kwargs)
|
|
|
|
|
|
|
|
|
2025-01-02 10:25:27 +00:00
|
|
|
class FunctionDef(TypedDict):
|
|
|
|
"""Type for an entry in `function-definitions.json`"""
|
|
|
|
|
|
|
|
sources: list[str]
|
|
|
|
type: str
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class Context:
|
|
|
|
gh_ref: str | None
|
|
|
|
changed: list[Path]
|
|
|
|
defs: dict[str, FunctionDef]
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
self.gh_ref = getenv("GITHUB_REF")
|
|
|
|
self.changed = []
|
|
|
|
self._init_change_list()
|
|
|
|
|
|
|
|
with open(REPO_ROOT.joinpath("etc/function-definitions.json")) as f:
|
|
|
|
defs = json.load(f)
|
|
|
|
|
|
|
|
defs.pop("__comment", None)
|
|
|
|
self.defs = defs
|
|
|
|
|
|
|
|
def _init_change_list(self):
|
|
|
|
"""Create a list of files that have been changed. This uses GITHUB_REF if
|
|
|
|
available, otherwise a diff between `HEAD` and `master`.
|
|
|
|
"""
|
|
|
|
|
|
|
|
# For pull requests, GitHub creates a ref `refs/pull/1234/merge` (1234 being
|
|
|
|
# the PR number), and sets this as `GITHUB_REF`.
|
|
|
|
ref = self.gh_ref
|
|
|
|
eprint(f"using ref `{ref}`")
|
|
|
|
if ref is None or "merge" not in ref:
|
|
|
|
# If the ref is not for `merge` then we are not in PR CI
|
|
|
|
eprint("No diff available for ref")
|
|
|
|
return
|
|
|
|
|
|
|
|
# The ref is for a dummy merge commit. We can extract the merge base by
|
|
|
|
# inspecting all parents (`^@`).
|
|
|
|
merge_sha = sp.check_output(
|
|
|
|
GIT + ["show-ref", "--hash", ref], text=True
|
|
|
|
).strip()
|
|
|
|
merge_log = sp.check_output(GIT + ["log", "-1", merge_sha], text=True)
|
|
|
|
eprint(f"Merge:\n{merge_log}\n")
|
|
|
|
|
|
|
|
parents = (
|
|
|
|
sp.check_output(GIT + ["rev-parse", f"{merge_sha}^@"], text=True)
|
|
|
|
.strip()
|
|
|
|
.splitlines()
|
|
|
|
)
|
|
|
|
assert len(parents) == 2, f"expected two-parent merge but got:\n{parents}"
|
|
|
|
base = parents[0].strip()
|
|
|
|
incoming = parents[1].strip()
|
|
|
|
|
|
|
|
eprint(f"base: {base}, incoming: {incoming}")
|
|
|
|
textlist = sp.check_output(
|
|
|
|
GIT + ["diff", base, incoming, "--name-only"], text=True
|
|
|
|
)
|
|
|
|
self.changed = [Path(p) for p in textlist.splitlines()]
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def _ignore_file(fname: str) -> bool:
|
|
|
|
return any(fname.startswith(pfx) for pfx in IGNORE_FILES)
|
|
|
|
|
|
|
|
def changed_routines(self) -> dict[str, list[str]]:
|
|
|
|
"""Create a list of routines for which one or more files have been updated,
|
|
|
|
separated by type.
|
|
|
|
"""
|
|
|
|
routines = set()
|
|
|
|
for name, meta in self.defs.items():
|
|
|
|
# Don't update if changes to the file should be ignored
|
|
|
|
sources = (f for f in meta["sources"] if not self._ignore_file(f))
|
|
|
|
|
|
|
|
# Select changed files
|
|
|
|
changed = [f for f in sources if Path(f) in self.changed]
|
|
|
|
|
|
|
|
if len(changed) > 0:
|
|
|
|
eprint(f"changed files for {name}: {changed}")
|
|
|
|
routines.add(name)
|
|
|
|
|
|
|
|
ret = {}
|
|
|
|
for r in sorted(routines):
|
|
|
|
ret.setdefault(self.defs[r]["type"], []).append(r)
|
|
|
|
|
|
|
|
return ret
|
|
|
|
|
|
|
|
def make_workflow_output(self) -> str:
|
|
|
|
"""Create a JSON object a list items for each type's changed files, if any
|
|
|
|
did change, and the routines that were affected by the change.
|
|
|
|
"""
|
|
|
|
changed = self.changed_routines()
|
|
|
|
ret = []
|
|
|
|
for ty in TYPES:
|
|
|
|
ty_changed = changed.get(ty, [])
|
|
|
|
item = {
|
|
|
|
"ty": ty,
|
|
|
|
"changed": ",".join(ty_changed),
|
|
|
|
}
|
|
|
|
ret.append(item)
|
|
|
|
output = json.dumps({"matrix": ret}, separators=(",", ":"))
|
|
|
|
eprint(f"output: {output}")
|
|
|
|
return output
|
|
|
|
|
|
|
|
|
2025-01-16 07:31:34 +00:00
|
|
|
def locate_baseline(flags: list[str]) -> None:
|
|
|
|
"""Find the most recent baseline from CI, download it if specified.
|
|
|
|
|
|
|
|
This returns rather than erroring, even if the `gh` commands fail. This is to avoid
|
|
|
|
erroring in CI if the baseline is unavailable (artifact time limit exceeded, first
|
|
|
|
run on the branch, etc).
|
|
|
|
"""
|
|
|
|
|
|
|
|
download = False
|
|
|
|
extract = False
|
|
|
|
|
|
|
|
while len(flags) > 0:
|
|
|
|
match flags[0]:
|
|
|
|
case "--download":
|
|
|
|
download = True
|
|
|
|
case "--extract":
|
|
|
|
extract = True
|
|
|
|
case _:
|
|
|
|
eprint(USAGE)
|
|
|
|
exit(1)
|
|
|
|
flags = flags[1:]
|
|
|
|
|
|
|
|
if extract and not download:
|
|
|
|
eprint("cannot extract without downloading")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
try:
|
|
|
|
# Locate the most recent job to complete with success on our branch
|
|
|
|
latest_job = sp.check_output(
|
|
|
|
[
|
|
|
|
"gh",
|
|
|
|
"run",
|
|
|
|
"list",
|
|
|
|
"--status=success",
|
|
|
|
f"--branch={DEFAULT_BRANCH}",
|
|
|
|
"--json=databaseId,url,headSha,conclusion,createdAt,"
|
|
|
|
"status,workflowDatabaseId,workflowName",
|
2025-01-16 20:30:47 +00:00
|
|
|
# Return the first array element matching our workflow name. NB: cannot
|
|
|
|
# just use `--limit=1`, jq filtering happens after limiting. We also
|
|
|
|
# cannot just use `--workflow` because GH gets confused from
|
|
|
|
# different file names in history.
|
|
|
|
f'--jq=[.[] | select(.workflowName == "{WORKFLOW_NAME}")][0]',
|
2025-01-16 07:31:34 +00:00
|
|
|
],
|
|
|
|
text=True,
|
|
|
|
)
|
|
|
|
except sp.CalledProcessError as e:
|
|
|
|
eprint(f"failed to run github command: {e}")
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
2025-01-16 20:30:47 +00:00
|
|
|
latest = json.loads(latest_job)
|
2025-01-16 07:31:34 +00:00
|
|
|
eprint("latest job: ", json.dumps(latest, indent=4))
|
|
|
|
except json.JSONDecodeError as e:
|
|
|
|
eprint(f"failed to decode json '{latest_job}', {e}")
|
|
|
|
return
|
|
|
|
|
|
|
|
if not download:
|
|
|
|
eprint("--download not specified, returning")
|
|
|
|
return
|
|
|
|
|
|
|
|
job_id = latest.get("databaseId")
|
|
|
|
if job_id is None:
|
|
|
|
eprint("skipping download step")
|
|
|
|
return
|
|
|
|
|
|
|
|
sp.run(
|
|
|
|
["gh", "run", "download", str(job_id), f"--pattern={ARTIFACT_GLOB}"],
|
|
|
|
check=False,
|
|
|
|
)
|
|
|
|
|
|
|
|
if not extract:
|
|
|
|
eprint("skipping extraction step")
|
|
|
|
return
|
|
|
|
|
|
|
|
# Find the baseline with the most recent timestamp. GH downloads the files to e.g.
|
|
|
|
# `some-dirname/some-dirname.tar.xz`, so just glob the whole thing together.
|
|
|
|
candidate_baselines = glob(f"{ARTIFACT_GLOB}/{ARTIFACT_GLOB}")
|
|
|
|
if len(candidate_baselines) == 0:
|
|
|
|
eprint("no possible baseline directories found")
|
|
|
|
return
|
|
|
|
|
|
|
|
candidate_baselines.sort(reverse=True)
|
|
|
|
baseline_archive = candidate_baselines[0]
|
|
|
|
eprint(f"extracting {baseline_archive}")
|
|
|
|
sp.run(["tar", "xJvf", baseline_archive], check=True)
|
|
|
|
eprint("baseline extracted successfully")
|
|
|
|
|
|
|
|
|
|
|
|
def check_iai_regressions(iai_home: str | None | Path):
|
|
|
|
"""Find regressions in iai summary.json files, exit with failure if any are
|
|
|
|
found.
|
|
|
|
"""
|
|
|
|
if iai_home is None:
|
|
|
|
iai_home = "iai-home"
|
|
|
|
iai_home = Path(iai_home)
|
|
|
|
|
|
|
|
found_summaries = False
|
|
|
|
regressions = []
|
|
|
|
for summary_path in iglob("**/summary.json", root_dir=iai_home, recursive=True):
|
|
|
|
found_summaries = True
|
|
|
|
with open(iai_home / summary_path, "r") as f:
|
|
|
|
summary = json.load(f)
|
|
|
|
|
|
|
|
summary_regs = []
|
|
|
|
run = summary["callgrind_summary"]["callgrind_run"]
|
|
|
|
name_entry = {"name": f"{summary["function_name"]}.{summary["id"]}"}
|
|
|
|
|
|
|
|
for segment in run["segments"]:
|
|
|
|
summary_regs.extend(segment["regressions"])
|
|
|
|
|
|
|
|
summary_regs.extend(run["total"]["regressions"])
|
|
|
|
|
|
|
|
regressions.extend(name_entry | reg for reg in summary_regs)
|
|
|
|
|
|
|
|
if not found_summaries:
|
|
|
|
eprint(f"did not find any summary.json files within {iai_home}")
|
|
|
|
exit(1)
|
|
|
|
|
|
|
|
if len(regressions) > 0:
|
|
|
|
eprint("Found regressions:", json.dumps(regressions, indent=4))
|
|
|
|
exit(1)
|
2025-01-02 10:25:27 +00:00
|
|
|
|
|
|
|
|
|
|
|
def main():
|
2025-01-14 07:46:20 +00:00
|
|
|
match sys.argv[1:]:
|
|
|
|
case ["generate-matrix"]:
|
|
|
|
ctx = Context()
|
|
|
|
output = ctx.make_workflow_output()
|
|
|
|
print(f"matrix={output}")
|
2025-01-16 07:31:34 +00:00
|
|
|
case ["locate-baseline", *flags]:
|
|
|
|
locate_baseline(flags)
|
|
|
|
case ["check-regressions"]:
|
|
|
|
check_iai_regressions(None)
|
|
|
|
case ["check-regressions", iai_home]:
|
|
|
|
check_iai_regressions(iai_home)
|
2025-01-14 07:46:20 +00:00
|
|
|
case ["--help" | "-h"]:
|
|
|
|
print(USAGE)
|
|
|
|
exit()
|
|
|
|
case _:
|
|
|
|
eprint(USAGE)
|
|
|
|
exit(1)
|
2025-01-02 10:25:27 +00:00
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|