mirror of
https://github.com/NixOS/nixpkgs.git
synced 2024-11-23 15:33:13 +00:00
prefetch-npm-deps: fix reproducibility
v1 lockfiles can contain multiple references to the same version of a package, and these references can contain different `integrity` values, such as one having SHA-1 and SHA-512, while another just has SHA-512. Given that HashMap iteration order isn't defined, this causes reproducibility issues, as a different integrity value could be chosen each time. Thanks to @lilyinstarlight for discovering this issue originally, as well as the idea for the sorting-based implementation.
This commit is contained in:
parent
d6b863fd9b
commit
7efebca89c
@ -105,7 +105,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
eprintln!("{}", package.name);
|
eprintln!("{}", package.name);
|
||||||
|
|
||||||
let tarball = package.tarball()?;
|
let tarball = package.tarball()?;
|
||||||
let integrity = package.integrity();
|
let integrity = package.integrity().map(ToString::to_string);
|
||||||
|
|
||||||
cache
|
cache
|
||||||
.put(
|
.put(
|
||||||
|
@ -1,7 +1,14 @@
|
|||||||
use anyhow::{bail, Context};
|
use anyhow::{anyhow, bail, Context};
|
||||||
use rayon::slice::ParallelSliceMut;
|
use rayon::slice::ParallelSliceMut;
|
||||||
use serde::Deserialize;
|
use serde::{
|
||||||
use std::{collections::HashMap, fmt};
|
de::{self, Visitor},
|
||||||
|
Deserialize, Deserializer,
|
||||||
|
};
|
||||||
|
use std::{
|
||||||
|
cmp::Ordering,
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
fmt,
|
||||||
|
};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
|
pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
|
||||||
@ -33,6 +40,13 @@ pub(super) fn packages(content: &str) -> anyhow::Result<Vec<Package>> {
|
|||||||
x.resolved
|
x.resolved
|
||||||
.partial_cmp(&y.resolved)
|
.partial_cmp(&y.resolved)
|
||||||
.expect("resolved should be comparable")
|
.expect("resolved should be comparable")
|
||||||
|
.then(
|
||||||
|
// v1 lockfiles can contain multiple references to the same version of a package, with
|
||||||
|
// different integrity values (e.g. a SHA-1 and a SHA-512 in one, but just a SHA-512 in another)
|
||||||
|
y.integrity
|
||||||
|
.partial_cmp(&x.integrity)
|
||||||
|
.expect("integrity should be comparable"),
|
||||||
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
packages.dedup_by(|x, y| x.resolved == y.resolved);
|
packages.dedup_by(|x, y| x.resolved == y.resolved);
|
||||||
@ -54,7 +68,7 @@ struct OldPackage {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
bundled: bool,
|
bundled: bool,
|
||||||
resolved: Option<UrlOrString>,
|
resolved: Option<UrlOrString>,
|
||||||
integrity: Option<String>,
|
integrity: Option<HashCollection>,
|
||||||
dependencies: Option<HashMap<String, OldPackage>>,
|
dependencies: Option<HashMap<String, OldPackage>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -63,7 +77,7 @@ pub(super) struct Package {
|
|||||||
#[serde(default)]
|
#[serde(default)]
|
||||||
pub(super) name: Option<String>,
|
pub(super) name: Option<String>,
|
||||||
pub(super) resolved: Option<UrlOrString>,
|
pub(super) resolved: Option<UrlOrString>,
|
||||||
pub(super) integrity: Option<String>,
|
pub(super) integrity: Option<HashCollection>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
@ -82,6 +96,102 @@ impl fmt::Display for UrlOrString {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
pub(super) struct HashCollection(HashSet<Hash>);
|
||||||
|
|
||||||
|
impl HashCollection {
|
||||||
|
pub(super) fn into_best(self) -> Option<Hash> {
|
||||||
|
self.0.into_iter().max()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for HashCollection {
|
||||||
|
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
|
||||||
|
let lhs = self.0.iter().max()?;
|
||||||
|
let rhs = other.0.iter().max()?;
|
||||||
|
|
||||||
|
lhs.partial_cmp(rhs)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'de> Deserialize<'de> for HashCollection {
|
||||||
|
fn deserialize<D>(deserializer: D) -> Result<HashCollection, D::Error>
|
||||||
|
where
|
||||||
|
D: Deserializer<'de>,
|
||||||
|
{
|
||||||
|
deserializer.deserialize_string(HashCollectionVisitor)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct HashCollectionVisitor;
|
||||||
|
|
||||||
|
impl<'de> Visitor<'de> for HashCollectionVisitor {
|
||||||
|
type Value = HashCollection;
|
||||||
|
|
||||||
|
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
formatter.write_str("a single SRI hash or a collection of them (separated by spaces)")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_str<E>(self, value: &str) -> Result<HashCollection, E>
|
||||||
|
where
|
||||||
|
E: de::Error,
|
||||||
|
{
|
||||||
|
let hashes = value
|
||||||
|
.split_ascii_whitespace()
|
||||||
|
.map(Hash::new)
|
||||||
|
.collect::<anyhow::Result<_>>()
|
||||||
|
.map_err(E::custom)?;
|
||||||
|
|
||||||
|
Ok(HashCollection(hashes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Deserialize, PartialEq, Eq, Hash)]
|
||||||
|
pub struct Hash(String);
|
||||||
|
|
||||||
|
// Hash algorithms, in ascending preference.
|
||||||
|
const ALGOS: &[&str] = &["sha1", "sha512"];
|
||||||
|
|
||||||
|
impl Hash {
|
||||||
|
fn new(s: impl AsRef<str>) -> anyhow::Result<Hash> {
|
||||||
|
let algo = s
|
||||||
|
.as_ref()
|
||||||
|
.split_once('-')
|
||||||
|
.ok_or_else(|| anyhow!("expected SRI hash, got {:?}", s.as_ref()))?
|
||||||
|
.0;
|
||||||
|
|
||||||
|
if ALGOS.iter().any(|&a| algo == a) {
|
||||||
|
Ok(Hash(s.as_ref().to_string()))
|
||||||
|
} else {
|
||||||
|
Err(anyhow!("unknown hash algorithm {algo:?}"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Hash {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
self.0.fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialOrd for Hash {
|
||||||
|
fn partial_cmp(&self, other: &Hash) -> Option<Ordering> {
|
||||||
|
let lhs = self.0.split_once('-')?.0;
|
||||||
|
let rhs = other.0.split_once('-')?.0;
|
||||||
|
|
||||||
|
ALGOS
|
||||||
|
.iter()
|
||||||
|
.position(|&s| lhs == s)?
|
||||||
|
.partial_cmp(&ALGOS.iter().position(|&s| rhs == s)?)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ord for Hash {
|
||||||
|
fn cmp(&self, other: &Hash) -> Ordering {
|
||||||
|
self.partial_cmp(other).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(clippy::case_sensitive_file_extension_comparisons)]
|
#[allow(clippy::case_sensitive_file_extension_comparisons)]
|
||||||
fn to_new_packages(
|
fn to_new_packages(
|
||||||
old_packages: HashMap<String, OldPackage>,
|
old_packages: HashMap<String, OldPackage>,
|
||||||
@ -149,8 +259,13 @@ fn get_initial_url() -> anyhow::Result<Url> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{get_initial_url, to_new_packages, OldPackage, Package, UrlOrString};
|
use super::{
|
||||||
use std::collections::HashMap;
|
get_initial_url, to_new_packages, Hash, HashCollection, OldPackage, Package, UrlOrString,
|
||||||
|
};
|
||||||
|
use std::{
|
||||||
|
cmp::Ordering,
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
};
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -188,4 +303,23 @@ mod tests {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn hash_preference() {
|
||||||
|
assert_eq!(
|
||||||
|
Hash(String::from("sha1-foo")).partial_cmp(&Hash(String::from("sha512-foo"))),
|
||||||
|
Some(Ordering::Less)
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
HashCollection({
|
||||||
|
let mut set = HashSet::new();
|
||||||
|
set.insert(Hash(String::from("sha512-foo")));
|
||||||
|
set.insert(Hash(String::from("sha1-bar")));
|
||||||
|
set
|
||||||
|
})
|
||||||
|
.into_best(),
|
||||||
|
Some(Hash(String::from("sha512-foo")))
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -87,7 +87,7 @@ pub struct Package {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
enum Specifics {
|
enum Specifics {
|
||||||
Registry { integrity: String },
|
Registry { integrity: lock::Hash },
|
||||||
Git { workdir: TempDir },
|
Git { workdir: TempDir },
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -134,11 +134,11 @@ impl Package {
|
|||||||
Specifics::Git { workdir }
|
Specifics::Git { workdir }
|
||||||
}
|
}
|
||||||
None => Specifics::Registry {
|
None => Specifics::Registry {
|
||||||
integrity: get_ideal_hash(
|
integrity: pkg
|
||||||
&pkg.integrity
|
.integrity
|
||||||
.expect("non-git dependencies should have assosciated integrity"),
|
.expect("non-git dependencies should have assosciated integrity")
|
||||||
)?
|
.into_best()
|
||||||
.to_string(),
|
.expect("non-git dependencies should have non-empty assosciated integrity"),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -181,9 +181,9 @@ impl Package {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn integrity(&self) -> Option<String> {
|
pub fn integrity(&self) -> Option<&lock::Hash> {
|
||||||
match &self.specifics {
|
match &self.specifics {
|
||||||
Specifics::Registry { integrity } => Some(integrity.clone()),
|
Specifics::Registry { integrity } => Some(integrity),
|
||||||
Specifics::Git { .. } => None,
|
Specifics::Git { .. } => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -304,25 +304,9 @@ fn get_hosted_git_url(url: &Url) -> anyhow::Result<Option<Url>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_ideal_hash(integrity: &str) -> anyhow::Result<&str> {
|
|
||||||
let split: Vec<_> = integrity.split_ascii_whitespace().collect();
|
|
||||||
|
|
||||||
if split.len() == 1 {
|
|
||||||
Ok(split[0])
|
|
||||||
} else {
|
|
||||||
for hash in ["sha512-", "sha1-"] {
|
|
||||||
if let Some(h) = split.iter().find(|s| s.starts_with(hash)) {
|
|
||||||
return Ok(h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Err(anyhow!("not sure which hash to select out of {split:?}"))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{get_hosted_git_url, get_ideal_hash};
|
use super::get_hosted_git_url;
|
||||||
use url::Url;
|
use url::Url;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -353,18 +337,4 @@ mod tests {
|
|||||||
"GitLab URLs should be marked as invalid (lol)"
|
"GitLab URLs should be marked as invalid (lol)"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn ideal_hashes() {
|
|
||||||
for (input, expected) in [
|
|
||||||
("sha512-foo sha1-bar", Some("sha512-foo")),
|
|
||||||
("sha1-bar md5-foo", Some("sha1-bar")),
|
|
||||||
("sha1-bar", Some("sha1-bar")),
|
|
||||||
("sha512-foo", Some("sha512-foo")),
|
|
||||||
("foo-bar sha1-bar", Some("sha1-bar")),
|
|
||||||
("foo-bar baz-foo", None),
|
|
||||||
] {
|
|
||||||
assert_eq!(get_ideal_hash(input).ok(), expected);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user