mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-12-25 15:13:46 +00:00
d03a07d5a7
This allow git checkout small parts of a large repo, and avoid fetching unnecessary blobs from server.
486 lines
13 KiB
Bash
Executable File
486 lines
13 KiB
Bash
Executable File
#! /usr/bin/env bash
|
|
|
|
set -e -o pipefail
|
|
|
|
url=
|
|
rev=
|
|
expHash=
|
|
hashType=$NIX_HASH_ALGO
|
|
deepClone=$NIX_PREFETCH_GIT_DEEP_CLONE
|
|
leaveDotGit=$NIX_PREFETCH_GIT_LEAVE_DOT_GIT
|
|
fetchSubmodules=
|
|
fetchLFS=
|
|
builder=
|
|
branchName=$NIX_PREFETCH_GIT_BRANCH_NAME
|
|
|
|
# ENV params
|
|
out=${out:-}
|
|
http_proxy=${http_proxy:-}
|
|
|
|
# allow overwritting cacert's ca-bundle.crt with a custom one
|
|
# this can be done by setting NIX_GIT_SSL_CAINFO and NIX_SSL_CERT_FILE enviroment variables for the nix-daemon
|
|
GIT_SSL_CAINFO=${NIX_GIT_SSL_CAINFO:-$GIT_SSL_CAINFO}
|
|
|
|
# populated by clone_user_rev()
|
|
fullRev=
|
|
humanReadableRev=
|
|
commitDate=
|
|
commitDateStrict8601=
|
|
|
|
if test -n "$deepClone"; then
|
|
deepClone=true
|
|
else
|
|
deepClone=
|
|
fi
|
|
|
|
if test "$leaveDotGit" != 1; then
|
|
leaveDotGit=
|
|
else
|
|
leaveDotGit=true
|
|
fi
|
|
|
|
usage(){
|
|
echo >&2 "syntax: nix-prefetch-git [options] [URL [REVISION [EXPECTED-HASH]]]
|
|
|
|
Options:
|
|
--out path Path where the output would be stored.
|
|
--url url Any url understood by 'git clone'.
|
|
--rev ref Any sha1 or references (such as refs/heads/master)
|
|
--hash h Expected hash.
|
|
--branch-name Branch name to check out into
|
|
--sparse-checkout Only fetch and checkout part of the repository.
|
|
--deepClone Clone the entire repository.
|
|
--no-deepClone Make a shallow clone of just the required ref.
|
|
--leave-dotGit Keep the .git directories.
|
|
--fetch-lfs Fetch git Large File Storage (LFS) files.
|
|
--fetch-submodules Fetch submodules.
|
|
--builder Clone as fetchgit does, but url, rev, and out option are mandatory.
|
|
--quiet Only print the final json summary.
|
|
"
|
|
exit 1
|
|
}
|
|
|
|
# some git commands print to stdout, which would contaminate our JSON output
|
|
clean_git(){
|
|
git "$@" >&2
|
|
}
|
|
|
|
argi=0
|
|
argfun=""
|
|
for arg; do
|
|
if test -z "$argfun"; then
|
|
case $arg in
|
|
--out) argfun=set_out;;
|
|
--url) argfun=set_url;;
|
|
--rev) argfun=set_rev;;
|
|
--hash) argfun=set_hashType;;
|
|
--branch-name) argfun=set_branchName;;
|
|
--deepClone) deepClone=true;;
|
|
--sparse-checkout) argfun=set_sparseCheckout;;
|
|
--quiet) QUIET=true;;
|
|
--no-deepClone) deepClone=;;
|
|
--leave-dotGit) leaveDotGit=true;;
|
|
--fetch-lfs) fetchLFS=true;;
|
|
--fetch-submodules) fetchSubmodules=true;;
|
|
--builder) builder=true;;
|
|
-h|--help) usage; exit;;
|
|
*)
|
|
: $((++argi))
|
|
case $argi in
|
|
1) url=$arg;;
|
|
2) rev=$arg;;
|
|
3) expHash=$arg;;
|
|
*) exit 1;;
|
|
esac
|
|
;;
|
|
esac
|
|
else
|
|
case $argfun in
|
|
set_*)
|
|
var=${argfun#set_}
|
|
eval "$var=$(printf %q "$arg")"
|
|
;;
|
|
esac
|
|
argfun=""
|
|
fi
|
|
done
|
|
|
|
if test -z "$url"; then
|
|
usage
|
|
fi
|
|
|
|
|
|
init_remote(){
|
|
local url=$1
|
|
clean_git init --initial-branch=master
|
|
clean_git remote add origin "$url"
|
|
if [ -n "$sparseCheckout" ]; then
|
|
git config remote.origin.partialclonefilter "blob:none"
|
|
echo "$sparseCheckout" | git sparse-checkout set --stdin
|
|
fi
|
|
( [ -n "$http_proxy" ] && clean_git config http.proxy "$http_proxy" ) || true
|
|
}
|
|
|
|
# Return the reference of an hash if it exists on the remote repository.
|
|
ref_from_hash(){
|
|
local hash=$1
|
|
git ls-remote origin | sed -n "\,$hash\t, { s,\(.*\)\t\(.*\),\2,; p; q}"
|
|
}
|
|
|
|
# Return the hash of a reference if it exists on the remote repository.
|
|
hash_from_ref(){
|
|
local ref=$1
|
|
git ls-remote origin | sed -n "\,\t$ref, { s,\(.*\)\t\(.*\),\1,; p; q}"
|
|
}
|
|
|
|
# Returns a name based on the url and reference
|
|
#
|
|
# This function needs to be in sync with nix's fetchgit implementation
|
|
# of urlToName() to re-use the same nix store paths.
|
|
url_to_name(){
|
|
local url=$1
|
|
local ref=$2
|
|
local base
|
|
base=$(basename "$url" .git | cut -d: -f2)
|
|
|
|
if [[ $ref =~ ^[a-z0-9]+$ ]]; then
|
|
echo "$base-${ref:0:7}"
|
|
else
|
|
echo "$base"
|
|
fi
|
|
}
|
|
|
|
# Fetch and checkout the right sha1
|
|
checkout_hash(){
|
|
local hash="$1"
|
|
local ref="$2"
|
|
|
|
if test -z "$hash"; then
|
|
hash=$(hash_from_ref "$ref")
|
|
fi
|
|
|
|
clean_git fetch ${builder:+--progress} --depth=1 origin "$hash" || \
|
|
clean_git fetch -t ${builder:+--progress} origin || return 1
|
|
|
|
local object_type=$(git cat-file -t "$hash")
|
|
if [[ "$object_type" == "commit" ]]; then
|
|
clean_git checkout -b "$branchName" "$hash" || return 1
|
|
elif [[ "$object_type" == "tree" ]]; then
|
|
clean_git config user.email "nix-prefetch-git@localhost"
|
|
clean_git config user.name "nix-prefetch-git"
|
|
local commit_id=$(git commit-tree "$hash" -m "Commit created from tree hash $hash")
|
|
clean_git checkout -b "$branchName" "$commit_id" || return 1
|
|
else
|
|
echo "Unrecognized git object type: $object_type"
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Fetch only a branch/tag and checkout it.
|
|
checkout_ref(){
|
|
local hash="$1"
|
|
local ref="$2"
|
|
|
|
if [[ -n "$deepClone" ]]; then
|
|
# The caller explicitly asked for a deep clone. Deep clones
|
|
# allow "git describe" and similar tools to work. See
|
|
# https://marc.info/?l=nix-dev&m=139641582514772
|
|
# for a discussion.
|
|
return 1
|
|
fi
|
|
|
|
if test -z "$ref"; then
|
|
ref=$(ref_from_hash "$hash")
|
|
fi
|
|
|
|
if test -n "$ref"; then
|
|
# --depth option is ignored on http repository.
|
|
clean_git fetch ${builder:+--progress} --depth 1 origin +"$ref" || return 1
|
|
clean_git checkout -b "$branchName" FETCH_HEAD || return 1
|
|
else
|
|
return 1
|
|
fi
|
|
}
|
|
|
|
# Update submodules
|
|
init_submodules(){
|
|
# Add urls into .git/config file
|
|
clean_git submodule init
|
|
|
|
# list submodule directories and their hashes
|
|
git submodule status |
|
|
while read -r l; do
|
|
local hash
|
|
local dir
|
|
local name
|
|
local url
|
|
|
|
# checkout each submodule
|
|
hash=$(echo "$l" | awk '{print $1}' | tr -d '-')
|
|
dir=$(echo "$l" | sed -n 's/^.[0-9a-f]\+ \(.*[^)]*\)\( (.*)\)\?$/\1/p')
|
|
name=$(
|
|
git config -f .gitmodules --get-regexp submodule\..*\.path |
|
|
sed -n "s,^\(.*\)\.path $dir\$,\\1,p")
|
|
url=$(git config --get "${name}.url")
|
|
|
|
clone "$dir" "$url" "$hash" ""
|
|
done
|
|
}
|
|
|
|
clone(){
|
|
local top=$PWD
|
|
local dir="$1"
|
|
local url="$2"
|
|
local hash="$3"
|
|
local ref="$4"
|
|
|
|
cd "$dir"
|
|
|
|
# Initialize the repository.
|
|
init_remote "$url"
|
|
|
|
# Download data from the repository.
|
|
checkout_ref "$hash" "$ref" ||
|
|
checkout_hash "$hash" "$ref" || (
|
|
echo 1>&2 "Unable to checkout $hash$ref from $url."
|
|
exit 1
|
|
)
|
|
|
|
# Checkout linked sources.
|
|
if test -n "$fetchSubmodules"; then
|
|
init_submodules
|
|
fi
|
|
|
|
if [ -z "$builder" ] && [ -f .topdeps ]; then
|
|
if tg help &>/dev/null; then
|
|
echo "populating TopGit branches..."
|
|
tg remote --populate origin
|
|
else
|
|
echo "WARNING: would populate TopGit branches but TopGit is not available" >&2
|
|
echo "WARNING: install TopGit to fix the problem" >&2
|
|
fi
|
|
fi
|
|
|
|
cd "$top"
|
|
}
|
|
|
|
# Remove all remote branches, remove tags not reachable from HEAD, do a full
|
|
# repack and then garbage collect unreferenced objects.
|
|
make_deterministic_repo(){
|
|
local repo="$1"
|
|
|
|
# run in sub-shell to not touch current working directory
|
|
(
|
|
cd "$repo"
|
|
# Remove files that contain timestamps or otherwise have non-deterministic
|
|
# properties.
|
|
rm -rf .git/logs/ .git/hooks/ .git/index .git/FETCH_HEAD .git/ORIG_HEAD \
|
|
.git/refs/remotes/origin/HEAD .git/config
|
|
|
|
# Remove all remote branches.
|
|
git branch -r | while read -r branch; do
|
|
clean_git branch -rD "$branch"
|
|
done
|
|
|
|
# Remove tags not reachable from HEAD. If we're exactly on a tag, don't
|
|
# delete it.
|
|
maybe_tag=$(git tag --points-at HEAD)
|
|
git tag --contains HEAD | while read -r tag; do
|
|
if [ "$tag" != "$maybe_tag" ]; then
|
|
clean_git tag -d "$tag"
|
|
fi
|
|
done
|
|
|
|
# Do a full repack. Must run single-threaded, or else we lose determinism.
|
|
clean_git config pack.threads 1
|
|
clean_git repack -A -d -f
|
|
rm -f .git/config
|
|
|
|
# Garbage collect unreferenced objects.
|
|
# Note: --keep-largest-pack prevents non-deterministic ordering of packs
|
|
# listed in .git/objects/info/packs by only using a single pack
|
|
clean_git gc --prune=all --keep-largest-pack
|
|
)
|
|
}
|
|
|
|
|
|
clone_user_rev() {
|
|
local dir="$1"
|
|
local url="$2"
|
|
local rev="${3:-HEAD}"
|
|
|
|
if [ -n "$fetchLFS" ]; then
|
|
tmpHomePath="$(mktemp -d "${TMPDIR:-/tmp}/nix-prefetch-git-tmp-home-XXXXXXXXXX")"
|
|
exit_handlers+=(remove_tmpHomePath)
|
|
HOME="$tmpHomePath"
|
|
git lfs install
|
|
fi
|
|
|
|
# Perform the checkout.
|
|
case "$rev" in
|
|
HEAD|refs/*)
|
|
clone "$dir" "$url" "" "$rev" 1>&2;;
|
|
*)
|
|
if test -z "$(echo "$rev" | tr -d 0123456789abcdef)"; then
|
|
clone "$dir" "$url" "$rev" "" 1>&2
|
|
else
|
|
# if revision is not hexadecimal it might be a tag
|
|
clone "$dir" "$url" "" "refs/tags/$rev" 1>&2
|
|
fi;;
|
|
esac
|
|
|
|
pushd "$dir" >/dev/null
|
|
fullRev=$( (git rev-parse "$rev" 2>/dev/null || git rev-parse "refs/heads/$branchName") | tail -n1)
|
|
humanReadableRev=$(git describe "$fullRev" 2> /dev/null || git describe --tags "$fullRev" 2> /dev/null || echo -- none --)
|
|
commitDate=$(git show -1 --no-patch --pretty=%ci "$fullRev")
|
|
commitDateStrict8601=$(git show -1 --no-patch --pretty=%cI "$fullRev")
|
|
popd >/dev/null
|
|
|
|
# Allow doing additional processing before .git removal
|
|
eval "$NIX_PREFETCH_GIT_CHECKOUT_HOOK"
|
|
if test -z "$leaveDotGit"; then
|
|
echo "removing \`.git'..." >&2
|
|
find "$dir" -name .git -print0 | xargs -0 rm -rf
|
|
else
|
|
find "$dir" -name .git | while read -r gitdir; do
|
|
make_deterministic_repo "$(readlink -f "$gitdir/..")"
|
|
done
|
|
fi
|
|
}
|
|
|
|
exit_handlers=()
|
|
|
|
run_exit_handlers() {
|
|
exit_status=$?
|
|
for handler in "${exit_handlers[@]}"; do
|
|
eval "$handler $exit_status"
|
|
done
|
|
}
|
|
|
|
trap run_exit_handlers EXIT
|
|
|
|
quiet_exit_handler() {
|
|
exec 2>&3 3>&-
|
|
if [ $1 -ne 0 ]; then
|
|
cat "$errfile" >&2
|
|
fi
|
|
rm -f "$errfile"
|
|
}
|
|
|
|
quiet_mode() {
|
|
errfile="$(mktemp "${TMPDIR:-/tmp}/git-checkout-err-XXXXXXXX")"
|
|
exit_handlers+=(quiet_exit_handler)
|
|
exec 3>&2 2>"$errfile"
|
|
}
|
|
|
|
json_escape() {
|
|
local s="$1"
|
|
s="${s//\\/\\\\}" # \
|
|
s="${s//\"/\\\"}" # "
|
|
s="${s//^H/\\\b}" # \b (backspace)
|
|
s="${s//^L/\\\f}" # \f (form feed)
|
|
s="${s//
|
|
/\\\n}" # \n (newline)
|
|
s="${s//^M/\\\r}" # \r (carriage return)
|
|
s="${s// /\\t}" # \t (tab)
|
|
echo "$s"
|
|
}
|
|
|
|
print_results() {
|
|
hash="$1"
|
|
if ! test -n "$QUIET"; then
|
|
echo "" >&2
|
|
echo "git revision is $fullRev" >&2
|
|
if test -n "$finalPath"; then
|
|
echo "path is $finalPath" >&2
|
|
fi
|
|
echo "git human-readable version is $humanReadableRev" >&2
|
|
echo "Commit date is $commitDate" >&2
|
|
if test -n "$hash"; then
|
|
echo "hash is $hash" >&2
|
|
fi
|
|
fi
|
|
if test -n "$hash"; then
|
|
cat <<EOF
|
|
{
|
|
"url": "$(json_escape "$url")",
|
|
"rev": "$(json_escape "$fullRev")",
|
|
"date": "$(json_escape "$commitDateStrict8601")",
|
|
"path": "$(json_escape "$finalPath")",
|
|
"$(json_escape "$hashType")": "$(json_escape "$hash")",
|
|
"fetchLFS": $([[ -n "$fetchLFS" ]] && echo true || echo false),
|
|
"fetchSubmodules": $([[ -n "$fetchSubmodules" ]] && echo true || echo false),
|
|
"deepClone": $([[ -n "$deepClone" ]] && echo true || echo false),
|
|
"leaveDotGit": $([[ -n "$leaveDotGit" ]] && echo true || echo false)
|
|
}
|
|
EOF
|
|
fi
|
|
}
|
|
|
|
remove_tmpPath() {
|
|
rm -rf "$tmpPath"
|
|
}
|
|
|
|
remove_tmpHomePath() {
|
|
rm -rf "$tmpHomePath"
|
|
}
|
|
|
|
if test -n "$QUIET"; then
|
|
quiet_mode
|
|
fi
|
|
|
|
if test -z "$branchName"; then
|
|
branchName=fetchgit
|
|
fi
|
|
|
|
if test -n "$builder"; then
|
|
test -n "$out" -a -n "$url" -a -n "$rev" || usage
|
|
mkdir -p "$out"
|
|
clone_user_rev "$out" "$url" "$rev"
|
|
else
|
|
if test -z "$hashType"; then
|
|
hashType=sha256
|
|
fi
|
|
|
|
# If the hash was given, a file with that hash may already be in the
|
|
# store.
|
|
if test -n "$expHash"; then
|
|
finalPath=$(nix-store --print-fixed-path --recursive "$hashType" "$expHash" "$(url_to_name "$url" "$rev")")
|
|
if ! nix-store --check-validity "$finalPath" 2> /dev/null; then
|
|
finalPath=
|
|
fi
|
|
hash=$expHash
|
|
fi
|
|
|
|
# If we don't know the hash or a path with that hash doesn't exist,
|
|
# download the file and add it to the store.
|
|
if test -z "$finalPath"; then
|
|
|
|
tmpPath="$(mktemp -d "${TMPDIR:-/tmp}/git-checkout-tmp-XXXXXXXX")"
|
|
exit_handlers+=(remove_tmpPath)
|
|
|
|
tmpFile="$tmpPath/$(url_to_name "$url" "$rev")"
|
|
mkdir -p "$tmpFile"
|
|
|
|
# Perform the checkout.
|
|
clone_user_rev "$tmpFile" "$url" "$rev"
|
|
|
|
# Compute the hash.
|
|
hash=$(nix-hash --type $hashType --base32 "$tmpFile")
|
|
|
|
# Add the downloaded file to the Nix store.
|
|
finalPath=$(nix-store --add-fixed --recursive "$hashType" "$tmpFile")
|
|
|
|
if test -n "$expHash" -a "$expHash" != "$hash"; then
|
|
echo "hash mismatch for URL \`$url'. Got \`$hash'; expected \`$expHash'." >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
print_results "$hash"
|
|
|
|
if test -n "$PRINT_PATH"; then
|
|
echo "$finalPath"
|
|
fi
|
|
fi
|