mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-23 15:33:13 +00:00
prefetch-npm-deps: fix reproducibility
v1 lockfiles can contain multiple references to the same version of a package, and these references can contain different `integrity` values, such as one having SHA-1 and SHA-512, while another just has SHA-512. Given that HashMap iteration order isn't defined, this causes reproducibility issues, as a different integrity value could be chosen each time. Thanks to @lilyinstarlight for discovering this issue originally, as well as the idea for the sorting-based implementation.
This commit is contained in:
parent
d6b863fd9b
commit
7efebca89c
@ -105,7 +105,7 @@ fn main() -> anyhow::Result<()> {
|
||||
eprintln!("{}", package.name);
|
||||
|
||||
let tarball = package.tarball()?;
|
||||
let integrity = package.integrity();
|
||||
let integrity = package.integrity().map(ToString::to_string);
|
||||
|
||||
cache
|
||||
.put(
|
||||
|
@ -1,7 +1,14 @@
|
||||
use anyhow::{bail, Context};
|
||||
use anyhow::{anyhow, bail, Context};
|
||||
use rayon::slice::ParallelSliceMut;
|
||||
use serde::Deserialize;
|
||||
use std::{collections::HashMap, fmt};
|
||||
use serde::{
|
||||
de::{self, Visitor},
|
||||
Deserialize, Deserializer,
|
||||
};
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{HashMap, HashSet},
|
||||
fmt,
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
|
||||
@ -33,6 +40,13 @@ pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
|
||||
x.resolved
|
||||
.partial_cmp(&y.resolved)
|
||||
.expect("resolved should be comparable")
|
||||
.then(
|
||||
// v1 lockfiles can contain multiple references to the same version of a package, with
|
||||
// different integrity values (e.g. a SHA-1 and a SHA-512 in one, but just a SHA-512 in another)
|
||||
y.integrity
|
||||
.partial_cmp(&x.integrity)
|
||||
.expect("integrity should be comparable"),
|
||||
)
|
||||
});
|
||||
|
||||
packages.dedup_by(|x, y| x.resolved == y.resolved);
|
||||
@ -54,7 +68,7 @@ struct OldPackage {
|
||||
#[serde(default)]
|
||||
bundled: bool,
|
||||
resolved: Option<UrlOrString>,
|
||||
integrity: Option<String>,
|
||||
integrity: Option<HashCollection>,
|
||||
dependencies: Option<HashMap<String, OldPackage>>,
|
||||
}
|
||||
|
||||
@ -63,7 +77,7 @@ pub(super) struct Package {
|
||||
#[serde(default)]
|
||||
pub(super) name: Option<String>,
|
||||
pub(super) resolved: Option<UrlOrString>,
|
||||
pub(super) integrity: Option<String>,
|
||||
pub(super) integrity: Option<HashCollection>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||
@ -82,6 +96,102 @@ impl fmt::Display for UrlOrString {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(super) struct HashCollection(HashSet<Hash>);
|
||||
|
||||
impl HashCollection {
|
||||
pub(super) fn into_best(self) -> Option<Hash> {
|
||||
self.0.into_iter().max()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for HashCollection {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||
let lhs = self.0.iter().max()?;
|
||||
let rhs = other.0.iter().max()?;
|
||||
|
||||
lhs.partial_cmp(rhs)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for HashCollection {
|
||||
fn deserialize<D>(deserializer: D) -> Result<HashCollection, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
deserializer.deserialize_string(HashCollectionVisitor)
|
||||
}
|
||||
}
|
||||
|
||||
struct HashCollectionVisitor;
|
||||
|
||||
impl<'de> Visitor<'de> for HashCollectionVisitor {
|
||||
type Value = HashCollection;
|
||||
|
||||
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||
formatter.write_str("a single SRI hash or a collection of them (separated by spaces)")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, value: &str) -> Result<HashCollection, E>
|
||||
where
|
||||
E: de::Error,
|
||||
{
|
||||
let hashes = value
|
||||
.split_ascii_whitespace()
|
||||
.map(Hash::new)
|
||||
.collect::<anyhow::Result<_>>()
|
||||
.map_err(E::custom)?;
|
||||
|
||||
Ok(HashCollection(hashes))
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, PartialEq, Eq, Hash)]
|
||||
pub struct Hash(String);
|
||||
|
||||
// Hash algorithms, in ascending preference.
|
||||
const ALGOS: &[&str] = &["sha1", "sha512"];
|
||||
|
||||
impl Hash {
|
||||
fn new(s: impl AsRef<str>) -> anyhow::Result<Hash> {
|
||||
let algo = s
|
||||
.as_ref()
|
||||
.split_once('-')
|
||||
.ok_or_else(|| anyhow!("expected SRI hash, got {:?}", s.as_ref()))?
|
||||
.0;
|
||||
|
||||
if ALGOS.iter().any(|&a| algo == a) {
|
||||
Ok(Hash(s.as_ref().to_string()))
|
||||
} else {
|
||||
Err(anyhow!("unknown hash algorithm {algo:?}"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Hash {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialOrd for Hash {
|
||||
fn partial_cmp(&self, other: &Hash) -> Option<Ordering> {
|
||||
let lhs = self.0.split_once('-')?.0;
|
||||
let rhs = other.0.split_once('-')?.0;
|
||||
|
||||
ALGOS
|
||||
.iter()
|
||||
.position(|&s| lhs == s)?
|
||||
.partial_cmp(&ALGOS.iter().position(|&s| rhs == s)?)
|
||||
}
|
||||
}
|
||||
|
||||
impl Ord for Hash {
|
||||
fn cmp(&self, other: &Hash) -> Ordering {
|
||||
self.partial_cmp(other).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::case_sensitive_file_extension_comparisons)]
|
||||
fn to_new_packages(
|
||||
old_packages: HashMap<String, OldPackage>,
|
||||
@ -149,8 +259,13 @@ fn get_initial_url() -> anyhow::Result<Url> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{get_initial_url, to_new_packages, OldPackage, Package, UrlOrString};
|
||||
use std::collections::HashMap;
|
||||
use super::{
|
||||
get_initial_url, to_new_packages, Hash, HashCollection, OldPackage, Package, UrlOrString,
|
||||
};
|
||||
use std::{
|
||||
cmp::Ordering,
|
||||
collections::{HashMap, HashSet},
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
@ -188,4 +303,23 @@ mod tests {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hash_preference() {
|
||||
assert_eq!(
|
||||
Hash(String::from("sha1-foo")).partial_cmp(&Hash(String::from("sha512-foo"))),
|
||||
Some(Ordering::Less)
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
HashCollection({
|
||||
let mut set = HashSet::new();
|
||||
set.insert(Hash(String::from("sha512-foo")));
|
||||
set.insert(Hash(String::from("sha1-bar")));
|
||||
set
|
||||
})
|
||||
.into_best(),
|
||||
Some(Hash(String::from("sha512-foo")))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@ -87,7 +87,7 @@ pub struct Package {
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Specifics {
|
||||
Registry { integrity: String },
|
||||
Registry { integrity: lock::Hash },
|
||||
Git { workdir: TempDir },
|
||||
}
|
||||
|
||||
@ -134,11 +134,11 @@ impl Package {
|
||||
Specifics::Git { workdir }
|
||||
}
|
||||
None => Specifics::Registry {
|
||||
integrity: get_ideal_hash(
|
||||
&pkg.integrity
|
||||
.expect("non-git dependencies should have assosciated integrity"),
|
||||
)?
|
||||
.to_string(),
|
||||
integrity: pkg
|
||||
.integrity
|
||||
.expect("non-git dependencies should have assosciated integrity")
|
||||
.into_best()
|
||||
.expect("non-git dependencies should have non-empty assosciated integrity"),
|
||||
},
|
||||
};
|
||||
|
||||
@ -181,9 +181,9 @@ impl Package {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn integrity(&self) -> Option<String> {
|
||||
pub fn integrity(&self) -> Option<&lock::Hash> {
|
||||
match &self.specifics {
|
||||
Specifics::Registry { integrity } => Some(integrity.clone()),
|
||||
Specifics::Registry { integrity } => Some(integrity),
|
||||
Specifics::Git { .. } => None,
|
||||
}
|
||||
}
|
||||
@ -304,25 +304,9 @@ fn get_hosted_git_url(url: &Url) -> anyhow::Result<Option<Url>> {
|
||||
}
|
||||
}
|
||||
|
||||
fn get_ideal_hash(integrity: &str) -> anyhow::Result<&str> {
|
||||
let split: Vec<_> = integrity.split_ascii_whitespace().collect();
|
||||
|
||||
if split.len() == 1 {
|
||||
Ok(split[0])
|
||||
} else {
|
||||
for hash in ["sha512-", "sha1-"] {
|
||||
if let Some(h) = split.iter().find(|s| s.starts_with(hash)) {
|
||||
return Ok(h);
|
||||
}
|
||||
}
|
||||
|
||||
Err(anyhow!("not sure which hash to select out of {split:?}"))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{get_hosted_git_url, get_ideal_hash};
|
||||
use super::get_hosted_git_url;
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
@ -353,18 +337,4 @@ mod tests {
|
||||
"GitLab URLs should be marked as invalid (lol)"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ideal_hashes() {
|
||||
for (input, expected) in [
|
||||
("sha512-foo sha1-bar", Some("sha512-foo")),
|
||||
("sha1-bar md5-foo", Some("sha1-bar")),
|
||||
("sha1-bar", Some("sha1-bar")),
|
||||
("sha512-foo", Some("sha512-foo")),
|
||||
("foo-bar sha1-bar", Some("sha1-bar")),
|
||||
("foo-bar baz-foo", None),
|
||||
] {
|
||||
assert_eq!(get_ideal_hash(input).ok(), expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user