nix/maintainers/release-credits

184 lines
5.8 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env nix
#!nix develop --impure --expr
#!nix ``
#!nix let flake = builtins.getFlake ("git+file://" + toString ../.);
#!nix pkgs = flake.inputs.nixpkgs.legacyPackages.${builtins.currentSystem};
#!nix in pkgs.mkShell { nativeBuildInputs = [
#!nix (pkgs.python3.withPackages (ps: with ps; [ requests ]))
#!nix ]; }
#!nix `` --command python3
# This script lists out the contributors for a given release.
# It must be run from the root of the Nix repository.
import os
import sys
import json
import requests
github_token = os.environ.get("GITHUB_TOKEN")
if not github_token:
print("GITHUB_TOKEN is not set. If you hit the rate limit, set it", file=sys.stderr)
# Might be ok, as we have a cache.
# raise ValueError("GITHUB_TOKEN must be set")
# 1. Read the current version in .version
version = os.environ.get("VERSION")
if not version:
version = open(".version").read().strip()
print(f"Generating release credits for Nix {version}", file=sys.stderr)
# 2. Compute previous version
vcomponents = version.split(".")
if len(vcomponents) >= 2:
prev_version = f"{vcomponents[0]}.{int(vcomponents[1])-1}.0"
else:
raise ValueError(".version must have at least two components")
# For unreleased versions
endref = "HEAD"
# For older releases
# endref = version
# 2. Find the merge base between the current version and the previous version
mergeBase = os.popen(f"git merge-base {prev_version} {endref}").read().strip()
print(f"Merge base between {prev_version} and {endref} is {mergeBase}", file=sys.stderr)
# 3. Find the date of the merge base
mergeBaseDate = os.popen(f"git show -s --format=%ci {mergeBase}").read().strip()[0:10]
print(f"Merge base date is {mergeBaseDate}", file=sys.stderr)
# 4. Get the commits between the merge base and the current version
def get_commits():
raw = os.popen(f"git log --pretty=format:'%H\t%an\t%ae' {mergeBase}..{endref}").read().strip()
lines = raw.split("\n")
return [ { "hash": items[0], "author": items[1], "email": items[2] }
for line in lines
for items in (line.split("\t"),)
]
def commits_to_first_commit_by_email(commits):
by_email = dict()
for commit in commits:
email = commit["email"]
if email not in by_email:
by_email[email] = commit
return by_email
samples = commits_to_first_commit_by_email(get_commits())
# For quick testing, only pick two samples from the dict
# samples = dict(list(samples.items())[:2])
# Query the GitHub API to get handle
def get_github_commit(commit):
url = f"https://api.github.com/repos/NixOS/nix/commits/{commit['hash']}"
headers = {'Authorization': f'token {github_token}'}
response = requests.get(url, headers=headers)
response.raise_for_status()
return response.json()
class Cache:
def __init__(self, filename, require = True):
self.filename = filename
try:
with open(filename, "r") as f:
self.values = json.load(f)
except FileNotFoundError:
if require:
raise
self.values = dict()
def save(self):
with open(self.filename, "w") as f:
json.dump(self.values, f, indent=4)
print(f"Saved cache to {self.filename}", file=sys.stderr)
# The email to handle cache maps email addresses to either
# - a handle (string)
# - None (if no handle was found)
email_to_handle_cache = Cache("maintainers/data/release-credits-email-to-handle.json")
handles = set()
emails = dict()
for sample in samples:
s = samples[sample]
email = s["email"]
if not email in email_to_handle_cache.values:
print(f"Querying GitHub API for {s['hash']}, to get handle for {s['email']}")
ghc = get_github_commit(samples[sample])
gha = ghc["author"]
if gha and gha["login"]:
handle = gha["login"]
print(f"Handle: {handle}")
email_to_handle_cache.values[email] = handle
else:
print(f"Found no handle for {s['email']}")
email_to_handle_cache.values[email] = None
handle = email_to_handle_cache.values[email]
if handle is not None:
handles.add(handle)
else:
emails[email] = s["author"]
# print(email_to_handle_cache.values)
email_to_handle_cache.save()
handle_to_name_cache = Cache("maintainers/data/release-credits-handle-to-name.json")
print(f"Found {len(handles)} handles", file=sys.stderr)
for handle in handles:
if not handle in handle_to_name_cache.values:
print(f"Querying GitHub API for {handle}, to get name", file=sys.stderr)
url = f"https://api.github.com/users/{handle}"
headers = {'Authorization': f'token {github_token}'}
response = requests.get(url, headers=headers)
response.raise_for_status()
user = response.json()
name = user["name"]
print(f"Name: {name}", file=sys.stderr)
handle_to_name_cache.values[handle] = name
handle_to_name_cache.save()
entries = list()
2024-07-29 22:15:49 +00:00
def is_bot(handle):
return handle.endswith("[bot]")
for handle in handles:
name = handle_to_name_cache.values[handle]
if name is None:
# This way it looks more regular
name = handle
2024-07-29 22:15:49 +00:00
# Note to bots: I'm only doing this because it would distract humans, and
# therefore delay your creation. I'm sorry; it's for your benefit.
if not is_bot(handle):
entries += [ f"- {name} [**(@{handle})**](https://github.com/{handle})" ]
def shuffle(entries):
salt = os.urandom(16)
return sorted(entries, key=lambda x: hash((x, salt)))
# Fair ordering is undecidable
entries = shuffle(entries)
# For a sanity check, we could sort the entries by handle instead.
# entries = sorted(entries)
print("")
print(f"This release was made possible by the following {len(entries)} contributors:")
print("")
for entry in entries:
print(entry)
for email in emails:
print(f"- {emails[email]}")