tree-sitter: partially rewrite update script in python

The update script is getting out of hand, so we should rewrite it in
something that resembles sanity, i.e. Python

This is the first step, rewriting the part that checks and fetches a
release.

Next up is integrating latestGithubRepos into that script.
This commit is contained in:
Profpatsch 2022-09-06 15:27:59 +02:00
parent f0adf4fcae
commit d325f6f702
2 changed files with 81 additions and 35 deletions

View File

@ -28,7 +28,7 @@ let
# to update: # to update:
# 1) change all these hashes # 1) change all these hashes
# 2) nix-build -A tree-sitter.updater.update-all-grammars # 2) nix-build -A tree-sitter.updater.update-all-grammars
# 3) OPTIONAL: Set GITHUB_TOKEN env variable to avoid api rate limit # 3) Set GITHUB_TOKEN env variable to avoid api rate limit (Use a Personal Access Token from https://github.com/settings/tokens It does not need any permissions)
# 4) run the ./result script that is output by that (it updates ./grammars) # 4) run the ./result script that is output by that (it updates ./grammars)
version = "0.20.7"; version = "0.20.7";
sha256 = "sha256-5ILiN5EfJ7WpeYBiXynfcLucdp8zmxVOj4gLkaFQYts="; sha256 = "sha256-5ILiN5EfJ7WpeYBiXynfcLucdp8zmxVOj4gLkaFQYts=";
@ -42,8 +42,8 @@ let
fetchSubmodules = true; fetchSubmodules = true;
}; };
update-all-grammars = import ./update.nix { update-all-grammars = callPackage ./update.nix {
inherit writeShellScript nix-prefetch-git curl jq xe src formats lib; inherit src;
}; };
fetchGrammar = (v: fetchgit { inherit (v) url rev sha256 fetchSubmodules; }); fetchGrammar = (v: fetchgit { inherit (v) url rev sha256 fetchSubmodules; });

View File

@ -1,7 +1,10 @@
{ writeShellScript { writeShellScript
, writeText
, writers
, nix-prefetch-git , nix-prefetch-git
, formats , formats
, lib , lib
, coreutils
, curl , curl
, jq , jq
, xe , xe
@ -404,28 +407,80 @@ let
# TODO # TODO
urlEscape = x: x; urlEscape = x: x;
# generic bash script to find the latest github release for a repo # update one tree-sitter grammar repo and print their nix-prefetch-git output
latestGithubRelease = { orga, repo }: writeShellScript "latest-github-release" '' updateGrammar = writers.writePython3 "latest-github-release" {
set -euo pipefail flakeIgnore = ["E501"];
} ''
from urllib.parse import quote
import json
import subprocess as sub
import os
import sys
args=( '--silent' ) debug = True if os.environ.get("DEBUG", False) else False
if [ -n "''${GITHUB_TOKEN:-}" ]; then
args+=( "-H" "Authorization: token ''${GITHUB_TOKEN}" )
fi
args+=( "https://api.github.com/repos/${urlEscape orga}/${urlEscape repo}/releases/latest" )
res=$(${curl}/bin/curl "''${args[@]}") jsonArg = sys.argv[1]
if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
echo "rate limited" >&2 def curl_args(orga, repo, token):
fi """Query the github API via curl"""
release="$(printf "%s" "$res" | ${jq}/bin/jq -r '.tag_name' | tr -d \")" yield "curl"
# github sometimes returns an empty list even tough there are releases if not debug:
if [ "$release" = "null" ]; then yield "--silent"
echo "uh-oh, latest for ${orga + "/" + repo} is not there, using HEAD" >&2 if token:
release="HEAD" yield "-H"
fi yield f"Authorization: token {token}"
echo "$release" yield f"https://api.github.com/repos/{quote(orga)}/{quote(repo)}/releases/latest"
def curl_result(orga, repo, output):
"""Parse the curl result of the github API"""
res = json.loads(output)
message = res.get("message", "")
if "rate limit" in message:
sys.exit("Rate limited by the Github API")
if "Not Found" in message:
# repository not there or no releases; if the repo is missing,
# well notice when we try to clone it
return {}
return res
def nix_prefetch_args(url, version_rev):
"""Prefetch a git repository"""
yield "nix-prefetch-git"
if not debug:
yield "--quiet"
yield "--no-deepClone"
yield "--url"
yield url
yield "--rev"
yield version_rev
match json.loads(jsonArg):
case {"orga": orga, "repo": repo}:
token = os.environ.get("GITHUB_TOKEN", None)
curl_cmd = list(curl_args(orga, repo, token))
if debug:
print(curl_cmd, file=sys.stderr)
out = sub.check_output(curl_cmd)
release = curl_result(orga, repo, out).get("tag_name", None)
# github sometimes returns an empty list even tough there are releases
if not release:
print(f"uh-oh, latest for {orga}/{repo} is not there, using HEAD", file=sys.stderr)
release = "HEAD"
print(f"Fetching latest release ({release}) of {orga}/{repo} ", file=sys.stderr)
sub.check_call(
list(nix_prefetch_args(
url=f"https://github.com/{quote(orga)}/{quote(repo)}",
version_rev=release
))
)
case _:
sys.exit("input json must have `orga` and `repo` keys")
''; '';
# find the latest repos of a github organization # find the latest repos of a github organization
@ -452,18 +507,6 @@ let
|| echo "failed $res" || echo "failed $res"
''; '';
# update one tree-sitter grammar repo and print their nix-prefetch-git output
updateGrammar = { orga, repo }: writeShellScript "update-grammar.sh" ''
set -euo pipefail
latest="$(${latestGithubRelease { inherit orga repo; }})"
echo "Fetching latest release ($latest) of ${repo} " >&2
${nix-prefetch-git}/bin/nix-prefetch-git \
--quiet \
--no-deepClone \
--url "https://github.com/${urlEscape orga}/${urlEscape repo}" \
--rev "$latest"
'';
foreachSh = attrs: f: foreachSh = attrs: f:
lib.concatMapStringsSep "\n" f lib.concatMapStringsSep "\n" f
(lib.mapAttrsToList (k: v: { name = k; } // v) attrs); (lib.mapAttrsToList (k: v: { name = k; } // v) attrs);
@ -478,7 +521,10 @@ let
echo "writing files to $outputDir" 1>&2 echo "writing files to $outputDir" 1>&2
mkdir -p "$outputDir" mkdir -p "$outputDir"
${foreachSh allGrammars ${foreachSh allGrammars
({name, orga, repo}: ''${updateGrammar { inherit orga repo; }} > $outputDir/${name}.json'')} ({name, orga, repo}: ''
${updateGrammar} '${lib.generators.toJSON {} {inherit orga repo;}}' \
> $outputDir/${name}.json
'')}
( echo "{ lib }:" ( echo "{ lib }:"
echo "{" echo "{"
${foreachSh allGrammars ${foreachSh allGrammars