2020-12-28 08:50:57 +00:00
|
|
|
{ writeShellScript, nix-prefetch-git, formats, lib
|
2019-08-17 14:50:45 +00:00
|
|
|
, curl, jq, xe
|
|
|
|
, src }:
|
|
|
|
|
|
|
|
let
|
2020-12-26 19:11:43 +00:00
|
|
|
# Grammars we want to fetch from the tree-sitter github orga
|
2020-12-28 08:50:57 +00:00
|
|
|
knownTreeSitterOrgGrammarRepos = [
|
2020-12-26 19:11:43 +00:00
|
|
|
"tree-sitter-javascript"
|
|
|
|
"tree-sitter-c"
|
|
|
|
"tree-sitter-swift"
|
|
|
|
"tree-sitter-json"
|
|
|
|
"tree-sitter-cpp"
|
|
|
|
"tree-sitter-ruby"
|
|
|
|
"tree-sitter-razor"
|
|
|
|
"tree-sitter-go"
|
|
|
|
"tree-sitter-c-sharp"
|
|
|
|
"tree-sitter-python"
|
|
|
|
"tree-sitter-typescript"
|
|
|
|
"tree-sitter-rust"
|
|
|
|
"tree-sitter-bash"
|
|
|
|
"tree-sitter-php"
|
|
|
|
"tree-sitter-java"
|
|
|
|
"tree-sitter-scala"
|
|
|
|
"tree-sitter-ocaml"
|
|
|
|
"tree-sitter-julia"
|
|
|
|
"tree-sitter-agda"
|
|
|
|
"tree-sitter-fluent"
|
|
|
|
"tree-sitter-html"
|
|
|
|
"tree-sitter-haskell"
|
|
|
|
"tree-sitter-regex"
|
|
|
|
"tree-sitter-css"
|
|
|
|
"tree-sitter-verilog"
|
|
|
|
"tree-sitter-jsdoc"
|
|
|
|
"tree-sitter-ql"
|
|
|
|
"tree-sitter-embedded-template"
|
|
|
|
];
|
2020-12-28 08:50:57 +00:00
|
|
|
knownTreeSitterOrgGrammarReposJson = jsonFile "known-tree-sitter-org-grammar-repos" knownTreeSitterOrgGrammarRepos;
|
2020-12-26 19:11:43 +00:00
|
|
|
|
|
|
|
# repos of the tree-sitter github orga we want to ignore (not grammars)
|
2020-12-28 08:50:57 +00:00
|
|
|
ignoredTreeSitterOrgRepos = [
|
2020-12-26 19:11:43 +00:00
|
|
|
"tree-sitter"
|
|
|
|
"tree-sitter-cli"
|
|
|
|
# this is the haskell language bindings, tree-sitter-haskell is the grammar
|
|
|
|
"haskell-tree-sitter"
|
|
|
|
# this is the ruby language bindings, tree-sitter-ruby is the grammar
|
|
|
|
"ruby-tree-sitter"
|
|
|
|
# this is the (unmaintained) rust language bindings, tree-sitter-rust is the grammar
|
|
|
|
"rust-tree-sitter"
|
|
|
|
# this is the nodejs language bindings, tree-sitter-javascript is the grammar
|
|
|
|
"node-tree-sitter"
|
|
|
|
# this is the python language bindings, tree-sitter-python is the grammar
|
|
|
|
"py-tree-sitter"
|
|
|
|
# afl fuzzing for tree sitter
|
|
|
|
"afl-tree-sitter"
|
|
|
|
# archived
|
|
|
|
"highlight-schema"
|
|
|
|
# website
|
|
|
|
"tree-sitter.github.io"
|
|
|
|
];
|
2020-12-28 08:50:57 +00:00
|
|
|
ignoredTreeSitterOrgReposJson = jsonFile "ignored-tree-sitter-org-repos" ignoredTreeSitterOrgRepos;
|
2020-12-26 19:11:43 +00:00
|
|
|
|
|
|
|
jsonFile = name: val: (formats.json {}).generate name val;
|
|
|
|
|
2020-12-26 19:41:19 +00:00
|
|
|
# check the tree-sitter orga repos
|
|
|
|
checkTreeSitterRepos = writeShellScript "get-grammars.sh" ''
|
2019-08-17 14:50:45 +00:00
|
|
|
set -euo pipefail
|
2020-12-24 02:01:10 +00:00
|
|
|
res=$(${jq}/bin/jq \
|
2020-12-28 08:50:57 +00:00
|
|
|
--slurpfile known "${knownTreeSitterOrgGrammarReposJson}" \
|
|
|
|
--slurpfile ignore "${ignoredTreeSitterOrgReposJson}" \
|
2020-12-26 19:11:43 +00:00
|
|
|
'. - ($known[0] + $ignore[0])' \
|
2020-12-24 02:01:10 +00:00
|
|
|
)
|
|
|
|
if [ ! "$res" == "[]" ]; then
|
|
|
|
echo "These repositories are neither known nor ignored:" 1>&2
|
|
|
|
echo "$res" 1>&2
|
|
|
|
exit 1
|
|
|
|
fi
|
2019-08-17 14:50:45 +00:00
|
|
|
'';
|
|
|
|
|
|
|
|
# TODO
|
|
|
|
urlEscape = x: x;
|
|
|
|
|
|
|
|
# generic bash script to find the latest github release for a repo
|
2020-12-28 08:50:57 +00:00
|
|
|
latestGithubRelease = { owner, repo }: writeShellScript "latest-github-release" ''
|
2019-08-17 14:50:45 +00:00
|
|
|
set -euo pipefail
|
|
|
|
res=$(${curl}/bin/curl \
|
|
|
|
--silent \
|
2020-12-28 08:50:57 +00:00
|
|
|
"https://api.github.com/repos/${urlEscape owner}/${urlEscape repo}/releases/latest")
|
2020-12-24 02:01:10 +00:00
|
|
|
if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
|
2019-08-17 14:50:45 +00:00
|
|
|
echo "rate limited" >&2
|
|
|
|
fi
|
|
|
|
release=$(printf "%s" "$res" | ${jq}/bin/jq '.tag_name')
|
|
|
|
# github sometimes returns an empty list even tough there are releases
|
|
|
|
if [ "$release" = "null" ]; then
|
2020-12-28 08:50:57 +00:00
|
|
|
echo "uh-oh, latest for ${owner + "/" + repo} is not there, using HEAD" >&2
|
2019-08-17 14:50:45 +00:00
|
|
|
release="HEAD"
|
|
|
|
fi
|
|
|
|
echo "$release"
|
|
|
|
'';
|
|
|
|
|
2020-12-24 02:01:10 +00:00
|
|
|
# find the latest repos of a github organization
|
|
|
|
latestGithubRepos = { orga }: writeShellScript "latest-github-repos" ''
|
|
|
|
set -euo pipefail
|
|
|
|
res=$(${curl}/bin/curl \
|
|
|
|
--silent \
|
2020-12-28 08:50:57 +00:00
|
|
|
'https://api.github.com/orgs/${urlEscape orga}/repos?per_page=100')
|
2020-12-24 02:01:10 +00:00
|
|
|
|
|
|
|
if [[ "$(printf "%s" "$res" | ${jq}/bin/jq '.message?')" =~ "rate limit" ]]; then
|
2020-12-28 08:50:57 +00:00
|
|
|
echo "rate limited" >&2 #
|
2020-12-24 02:01:10 +00:00
|
|
|
fi
|
|
|
|
|
|
|
|
printf "%s" "$res" | ${jq}/bin/jq 'map(.name)' \
|
|
|
|
|| echo "failed $res"
|
|
|
|
'';
|
|
|
|
|
2019-08-17 14:50:45 +00:00
|
|
|
# update one tree-sitter grammar repo and print their nix-prefetch-git output
|
2020-12-28 08:50:57 +00:00
|
|
|
updateGrammar = { owner, repo }: writeShellScript "update-grammar.sh" ''
|
2019-08-17 14:50:45 +00:00
|
|
|
set -euo pipefail
|
2020-12-28 08:50:57 +00:00
|
|
|
latest="$(${latestGithubRelease { inherit owner repo; }})"
|
|
|
|
echo "Fetching latest release ($latest) of ${repo} …" >&2
|
2019-08-17 14:50:45 +00:00
|
|
|
${nix-prefetch-git}/bin/nix-prefetch-git \
|
|
|
|
--quiet \
|
|
|
|
--no-deepClone \
|
2020-12-28 08:50:57 +00:00
|
|
|
--url "https://github.com/${urlEscape owner}/${urlEscape repo}" \
|
2019-08-17 14:50:45 +00:00
|
|
|
--rev "$latest"
|
|
|
|
'';
|
|
|
|
|
2020-12-28 08:50:57 +00:00
|
|
|
foreachSh = list: f: lib.concatMapStringsSep "\n" f list;
|
|
|
|
|
2019-08-17 14:50:45 +00:00
|
|
|
update-all-grammars = writeShellScript "update-all-grammars.sh" ''
|
|
|
|
set -euo pipefail
|
2020-12-24 02:01:10 +00:00
|
|
|
echo "fetching list of grammars" 1>&2
|
2020-12-26 19:41:19 +00:00
|
|
|
treeSitterRepos=$(${latestGithubRepos { orga = "tree-sitter"; }})
|
|
|
|
echo "checking the tree-sitter repo list against the grammars we know" 1>&2
|
|
|
|
printf '%s' "$treeSitterRepos" | ${checkTreeSitterRepos}
|
2019-08-17 14:50:45 +00:00
|
|
|
outputDir="${toString ./.}/grammars"
|
2020-12-28 08:50:57 +00:00
|
|
|
echo "writing files to $outputDir" 1>&2
|
2019-08-17 14:50:45 +00:00
|
|
|
mkdir -p "$outputDir"
|
2020-12-28 08:50:57 +00:00
|
|
|
${foreachSh knownTreeSitterOrgGrammarRepos
|
|
|
|
(repo: ''${updateGrammar { owner = "tree-sitter"; inherit repo; }} > $outputDir/${repo}.json'')}
|
2019-08-17 14:50:45 +00:00
|
|
|
( echo "{"
|
2020-12-28 08:50:57 +00:00
|
|
|
${foreachSh knownTreeSitterOrgGrammarRepos
|
|
|
|
(repo: ''
|
|
|
|
# indentation hack
|
|
|
|
printf " %s = (builtins.fromJSON (builtins.readFile ./%s.json));\n" "${repo}" "${repo}"'')}
|
2019-08-17 14:50:45 +00:00
|
|
|
echo "}" ) \
|
|
|
|
> "$outputDir/default.nix"
|
|
|
|
'';
|
|
|
|
|
|
|
|
in update-all-grammars
|