diff --git a/pkgs/build-support/rust/fetch-cargo-vendor-util.py b/pkgs/build-support/rust/fetch-cargo-vendor-util.py new file mode 100644 index 000000000000..33cac9b83d01 --- /dev/null +++ b/pkgs/build-support/rust/fetch-cargo-vendor-util.py @@ -0,0 +1,284 @@ +import functools +import hashlib +import json +import multiprocessing as mp +import re +import shutil +import subprocess +import sys +import tomllib +from pathlib import Path +from typing import Any, TypedDict, cast + +import requests + +eprint = functools.partial(print, file=sys.stderr) + + +def load_toml(path: Path) -> dict[str, Any]: + with open(path, "rb") as f: + return tomllib.load(f) + + +def download_file_with_checksum(url: str, destination_path: Path) -> str: + sha256_hash = hashlib.sha256() + with requests.get(url, stream=True) as response: + if not response.ok: + raise Exception(f"Failed to fetch file from {url}. Status code: {response.status_code}") + with open(destination_path, "wb") as file: + for chunk in response.iter_content(1024): # Download in chunks + if chunk: # Filter out keep-alive chunks + file.write(chunk) + sha256_hash.update(chunk) + + # Compute the final checksum + checksum = sha256_hash.hexdigest() + return checksum + + +def get_download_url_for_tarball(pkg: dict[str, Any]) -> str: + # TODO: support other registries + # maybe fetch config.json from the registry root and get the dl key + # See: https://doc.rust-lang.org/cargo/reference/registry-index.html#index-configuration + if pkg["source"] != "registry+https://github.com/rust-lang/crates.io-index": + raise Exception("Only the default crates.io registry is supported.") + + return f"https://crates.io/api/v1/crates/{pkg["name"]}/{pkg["version"]}/download" + + +def download_tarball(pkg: dict[str, Any], out_dir: Path) -> None: + + url = get_download_url_for_tarball(pkg) + filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz" + + # TODO: allow legacy checksum specification, see importCargoLock for example + # also, don't forget about the other usage of the checksum + expected_checksum = pkg["checksum"] + + tarball_out_dir = out_dir / "tarballs" / filename + eprint(f"Fetching {url} -> tarballs/{filename}") + + calculated_checksum = download_file_with_checksum(url, tarball_out_dir) + + if calculated_checksum != expected_checksum: + raise Exception(f"Hash mismatch! File fetched from {url} had checksum {calculated_checksum}, expected {expected_checksum}.") + + +def download_git_tree(url: str, git_sha_rev: str, out_dir: Path) -> None: + + tree_out_dir = out_dir / "git" / git_sha_rev + eprint(f"Fetching {url}#{git_sha_rev} -> git/{git_sha_rev}") + + cmd = ["nix-prefetch-git", "--builder", "--quiet", "--url", url, "--rev", git_sha_rev, "--out", str(tree_out_dir)] + subprocess.check_output(cmd) + + +GIT_SOURCE_REGEX = re.compile("git\\+(?P[^?]+)(\\?(?Prev|tag|branch)=(?P.*))?#(?P.*)") + + +class GitSourceInfo(TypedDict): + url: str + type: str | None + value: str | None + git_sha_rev: str + + +def parse_git_source(source: str) -> GitSourceInfo: + match = GIT_SOURCE_REGEX.match(source) + if match is None: + raise Exception(f"Unable to process git source: {source}.") + return cast(GitSourceInfo, match.groupdict(default=None)) + + +def create_vendor_staging(lockfile_path: Path, out_dir: Path) -> None: + cargo_toml = load_toml(lockfile_path) + + git_packages: list[dict[str, Any]] = [] + registry_packages: list[dict[str, Any]] = [] + + for pkg in cargo_toml["package"]: + # ignore local dependenices + if "source" not in pkg.keys(): + eprint(f"Skipping local dependency: {pkg["name"]}") + continue + source = pkg["source"] + + if source.startswith("git+"): + git_packages.append(pkg) + elif source.startswith("registry+"): + registry_packages.append(pkg) + else: + raise Exception(f"Can't process source: {source}.") + + git_sha_rev_to_url: dict[str, str] = {} + for pkg in git_packages: + source_info = parse_git_source(pkg["source"]) + git_sha_rev_to_url[source_info["git_sha_rev"]] = source_info["url"] + + out_dir.mkdir(exist_ok=True) + shutil.copy(lockfile_path, out_dir / "Cargo.lock") + + # create a pool with at most 10 concurrent jobs + with mp.Pool(min(10, mp.cpu_count())) as pool: + + if len(git_packages) != 0: + (out_dir / "git").mkdir() + # run download jobs in parallel + git_args_gen = ((url, git_sha_rev, out_dir) for git_sha_rev, url in git_sha_rev_to_url.items()) + pool.starmap(download_git_tree, git_args_gen) + + if len(registry_packages) != 0: + (out_dir / "tarballs").mkdir() + # run download jobs in parallel + tarball_args_gen = ((pkg, out_dir) for pkg in registry_packages) + pool.starmap(download_tarball, tarball_args_gen) + + +def get_manifest_metadata(manifest_path: Path) -> dict[str, Any]: + cmd = ["cargo", "metadata", "--format-version", "1", "--no-deps", "--manifest-path", str(manifest_path)] + output = subprocess.check_output(cmd) + return json.loads(output) + + +def try_get_crate_manifest_path_from_mainfest_path(manifest_path: Path, crate_name: str) -> Path | None: + metadata = get_manifest_metadata(manifest_path) + + for pkg in metadata["packages"]: + if pkg["name"] == crate_name: + return Path(pkg["manifest_path"]) + + return None + + +def find_crate_manifest_in_tree(tree: Path, crate_name: str) -> Path: + # in some cases Cargo.toml is not located at the top level, so we also look at subdirectories + manifest_paths = tree.glob("**/Cargo.toml") + + for manifest_path in manifest_paths: + res = try_get_crate_manifest_path_from_mainfest_path(manifest_path, crate_name) + if res is not None: + return res + + raise Exception(f"Couldn't find manifest for crate {crate_name} inside {tree}.") + + +def copy_and_patch_git_crate_subtree(git_tree: Path, crate_name: str, crate_out_dir: Path) -> None: + crate_manifest_path = find_crate_manifest_in_tree(git_tree, crate_name) + crate_tree = crate_manifest_path.parent + + eprint(f"Copying to {crate_out_dir}") + shutil.copytree(crate_tree, crate_out_dir) + crate_out_dir.chmod(0o755) + + with open(crate_manifest_path, "r") as f: + manifest_data = f.read() + + if "workspace" in manifest_data: + crate_manifest_metadata = get_manifest_metadata(crate_manifest_path) + workspace_root = Path(crate_manifest_metadata["workspace_root"]) + + root_manifest_path = workspace_root / "Cargo.toml" + manifest_path = crate_out_dir / "Cargo.toml" + + manifest_path.chmod(0o644) + eprint(f"Patching {manifest_path}") + + cmd = ["replace-workspace-values", str(manifest_path), str(root_manifest_path)] + subprocess.check_output(cmd) + + +def extract_crate_tarball_contents(tarball_path: Path, crate_out_dir: Path) -> None: + eprint(f"Unpacking to {crate_out_dir}") + crate_out_dir.mkdir() + cmd = ["tar", "xf", str(tarball_path), "-C", str(crate_out_dir), "--strip-components=1"] + subprocess.check_output(cmd) + + +def create_vendor(vendor_staging_dir: Path, out_dir: Path) -> None: + lockfile_path = vendor_staging_dir / "Cargo.lock" + out_dir.mkdir(exist_ok=True) + shutil.copy(lockfile_path, out_dir / "Cargo.lock") + + cargo_toml = load_toml(lockfile_path) + + config_lines = [ + '[source.vendored-sources]', + 'directory = "@vendor@"', + '[source.crates-io]', + 'replace-with = "vendored-sources"', + ] + + seen_source_keys = set() + for pkg in cargo_toml["package"]: + + # ignore local dependenices + if "source" not in pkg.keys(): + continue + + source: str = pkg["source"] + + dir_name = f"{pkg["name"]}-{pkg["version"]}" + crate_out_dir = out_dir / dir_name + + if source.startswith("git+"): + + source_info = parse_git_source(pkg["source"]) + git_sha_rev = source_info["git_sha_rev"] + git_tree = vendor_staging_dir / "git" / git_sha_rev + + copy_and_patch_git_crate_subtree(git_tree, pkg["name"], crate_out_dir) + + # git based crates allow having no checksum information + with open(crate_out_dir / ".cargo-checksum.json", "w") as f: + json.dump({"files": {}}, f) + + source_key = source[0:source.find("#")] + + if source_key in seen_source_keys: + continue + + seen_source_keys.add(source_key) + + config_lines.append(f'[source."{source_key}"]') + config_lines.append(f'git = "{source_info["url"]}"') + if source_info["type"] is not None: + config_lines.append(f'{source_info["type"]} = "{source_info["value"]}"') + config_lines.append('replace-with = "vendored-sources"') + + elif source.startswith("registry+"): + + filename = f"{pkg["name"]}-{pkg["version"]}.tar.gz" + tarball_path = vendor_staging_dir / "tarballs" / filename + + extract_crate_tarball_contents(tarball_path, crate_out_dir) + + # non-git based crates need the package checksum at minimum + with open(crate_out_dir / ".cargo-checksum.json", "w") as f: + json.dump({"files": {}, "package": pkg["checksum"]}, f) + + else: + raise Exception(f"Can't process source: {source}.") + + (out_dir / ".cargo").mkdir() + with open(out_dir / ".cargo" / "config.toml", "w") as config_file: + config_file.writelines(line + "\n" for line in config_lines) + + +def main() -> None: + subcommand = sys.argv[1] + + subcommand_func_dict = { + "create-vendor-staging": lambda: create_vendor_staging(lockfile_path=Path(sys.argv[2]), out_dir=Path(sys.argv[3])), + "create-vendor": lambda: create_vendor(vendor_staging_dir=Path(sys.argv[2]), out_dir=Path(sys.argv[3])) + } + + subcommand_func = subcommand_func_dict.get(subcommand) + + if subcommand_func is None: + raise Exception(f"Unknown subcommand: '{subcommand}'. Must be one of {list(subcommand_func_dict.keys())}") + + subcommand_func() + + +if __name__ == "__main__": + main() diff --git a/pkgs/build-support/rust/fetch-cargo-vendor.nix b/pkgs/build-support/rust/fetch-cargo-vendor.nix new file mode 100644 index 000000000000..4197e0b62e1b --- /dev/null +++ b/pkgs/build-support/rust/fetch-cargo-vendor.nix @@ -0,0 +1,92 @@ +{ + lib, + stdenvNoCC, + runCommand, + writers, + python3Packages, + cargo, + nix-prefetch-git, + cacert, +}: + +let + replaceWorkspaceValues = writers.writePython3Bin "replace-workspace-values" { + libraries = with python3Packages; [ + tomli + tomli-w + ]; + flakeIgnore = [ + "E501" + "W503" + ]; + } (builtins.readFile ./replace-workspace-values.py); + + fetchCargoVendorUtil = writers.writePython3Bin "fetch-cargo-vendor-util" { + libraries = with python3Packages; [ + requests + ]; + flakeIgnore = [ + "E501" + ]; + } (builtins.readFile ./fetch-cargo-vendor-util.py); +in + +{ + name ? if args ? pname && args ? version then "${args.pname}-${args.version}" else "cargo-deps", + hash ? (throw "fetchCargoVendor requires a `hash` value to be set for ${name}"), + nativeBuildInputs ? [ ], + ... +}@args: + +# TODO: add asserts about pname version and name + +let + removedArgs = [ + "name" + "pname" + "version" + "nativeBuildInputs" + "hash" + ]; + + vendorStaging = stdenvNoCC.mkDerivation ( + { + name = "${name}-vendor-staging"; + + nativeBuildInputs = [ + fetchCargoVendorUtil + nix-prefetch-git + cacert + ] ++ nativeBuildInputs; + + buildPhase = '' + runHook preBuild + + fetch-cargo-vendor-util create-vendor-staging ./Cargo.lock "$out" + + runHook postBuild + ''; + + dontInstall = true; + dontFixup = true; + + outputHash = hash; + outputHashAlgo = if hash == "" then "sha256" else null; + outputHashMode = "recursive"; + } + // builtins.removeAttrs args removedArgs + ); +in + +runCommand "${name}-vendor" + { + inherit vendorStaging; + nativeBuildInputs = [ + fetchCargoVendorUtil + cargo + replaceWorkspaceValues + ]; + } + '' + fetch-cargo-vendor-util create-vendor "$vendorStaging" "$out" + '' diff --git a/pkgs/development/compilers/rust/make-rust-platform.nix b/pkgs/development/compilers/rust/make-rust-platform.nix index 6ed724aae821..81c1ac960f9a 100644 --- a/pkgs/development/compilers/rust/make-rust-platform.nix +++ b/pkgs/development/compilers/rust/make-rust-platform.nix @@ -18,6 +18,8 @@ rec { inherit cargo; }; + fetchCargoVendor = buildPackages.callPackage ../../../build-support/rust/fetch-cargo-vendor.nix { inherit cargo; }; + buildRustPackage = callPackage ../../../build-support/rust/build-rust-package { inherit stdenv cargoBuildHook cargoCheckHook cargoInstallHook cargoNextestHook cargoSetupHook fetchCargoTarball importCargoLock rustc cargo cargo-auditable;