Merge pull request #205190 from NixOS/lib.path.relativeNormalise

lib.path.subpath.{isValid,normalise}: init
This commit is contained in:
Robert Hensing 2023-01-03 13:46:11 +01:00 committed by GitHub
commit f61d4d346b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 906 additions and 10 deletions

1
.github/CODEOWNERS vendored
View File

@ -28,6 +28,7 @@
/lib/cli.nix @edolstra @nbp @Profpatsch
/lib/debug.nix @edolstra @nbp @Profpatsch
/lib/asserts.nix @edolstra @nbp @Profpatsch
/lib/path.* @infinisil @fricklerhandwerk
# Nixpkgs Internals
/default.nix @nbp

View File

@ -12,6 +12,7 @@ let
{ name = "lists"; description = "list manipulation functions"; }
{ name = "debug"; description = "debugging functions"; }
{ name = "options"; description = "NixOS / nixpkgs option handling"; }
{ name = "path"; description = "path functions"; }
{ name = "filesystem"; description = "filesystem functions"; }
{ name = "sources"; description = "source filtering functions"; }
{ name = "cli"; description = "command-line serialization functions"; }

View File

@ -10,7 +10,11 @@ with pkgs; stdenv.mkDerivation {
installPhase = ''
function docgen {
# TODO: wrap lib.$1 in <literal>, make nixdoc not escape it
nixdoc -c "$1" -d "lib.$1: $2" -f "$1.nix" > "$out/$1.xml"
if [[ -e "../lib/$1.nix" ]]; then
nixdoc -c "$1" -d "lib.$1: $2" -f "$1.nix" > "$out/$1.xml"
else
nixdoc -c "$1" -d "lib.$1: $2" -f "$1/default.nix" > "$out/$1.xml"
fi
echo "<xi:include href='$1.xml' />" >> "$out/index.xml"
}

View File

@ -2,19 +2,21 @@
let
revision = pkgs.lib.trivial.revisionWithDefault (nixpkgs.revision or "master");
libDefPos = set:
builtins.map
(name: {
name = name;
libDefPos = prefix: set:
builtins.concatMap
(name: [{
name = builtins.concatStringsSep "." (prefix ++ [name]);
location = builtins.unsafeGetAttrPos name set;
})
(builtins.attrNames set);
}] ++ nixpkgsLib.optionals
(builtins.length prefix == 0 && builtins.isAttrs set.${name})
(libDefPos (prefix ++ [name]) set.${name})
) (builtins.attrNames set);
libset = toplib:
builtins.map
(subsetname: {
subsetname = subsetname;
functions = libDefPos toplib.${subsetname};
functions = libDefPos [] toplib.${subsetname};
})
(builtins.map (x: x.name) libsets);

View File

@ -27,7 +27,6 @@ let
maintainers = import ../maintainers/maintainer-list.nix;
teams = callLibs ../maintainers/team-list.nix;
meta = callLibs ./meta.nix;
sources = callLibs ./sources.nix;
versions = callLibs ./versions.nix;
# module system
@ -53,7 +52,9 @@ let
fetchers = callLibs ./fetchers.nix;
# Eval-time filesystem handling
path = callLibs ./path;
filesystem = callLibs ./filesystem.nix;
sources = callLibs ./sources.nix;
# back-compat aliases
platforms = self.systems.doubles;

196
lib/path/README.md Normal file
View File

@ -0,0 +1,196 @@
# Path library
This document explains why the `lib.path` library is designed the way it is.
The purpose of this library is to process [filesystem paths]. It does not read files from the filesystem.
It exists to support the native Nix [path value type] with extra functionality.
[filesystem paths]: https://en.m.wikipedia.org/wiki/Path_(computing)
[path value type]: https://nixos.org/manual/nix/stable/language/values.html#type-path
As an extension of the path value type, it inherits the same intended use cases and limitations:
- Only use paths to access files at evaluation time, such as the local project source.
- Paths cannot point to derivations, so they are unfit to represent dependencies.
- A path implicitly imports the referenced files into the Nix store when interpolated to a string. Therefore paths are not suitable to access files at build- or run-time, as you risk importing the path from the evaluation system instead.
Overall, this library works with two types of paths:
- Absolute paths are represented with the Nix [path value type]. Nix automatically normalises these paths.
- Subpaths are represented with the [string value type] since path value types don't support relative paths. This library normalises these paths as safely as possible. Absolute paths in strings are not supported.
A subpath refers to a specific file or directory within an absolute base directory.
It is a stricter form of a relative path, notably [without support for `..` components][parents] since those could escape the base directory.
[string value type]: https://nixos.org/manual/nix/stable/language/values.html#type-string
This library is designed to be as safe and intuitive as possible, throwing errors when operations are attempted that would produce surprising results, and giving the expected result otherwise.
This library is designed to work well as a dependency for the `lib.filesystem` and `lib.sources` library components. Contrary to these library components, `lib.path` does not read any paths from the filesystem.
This library makes only these assumptions about paths and no others:
- `dirOf path` returns the path to the parent directory of `path`, unless `path` is the filesystem root, in which case `path` is returned.
- There can be multiple filesystem roots: `p == dirOf p` and `q == dirOf q` does not imply `p == q`.
- While there's only a single filesystem root in stable Nix, the [lazy trees feature](https://github.com/NixOS/nix/pull/6530) introduces [additional filesystem roots](https://github.com/NixOS/nix/pull/6530#discussion_r1041442173).
- `path + ("/" + string)` returns the path to the `string` subdirectory in `path`.
- If `string` contains no `/` characters, then `dirOf (path + ("/" + string)) == path`.
- If `string` contains no `/` characters, then `baseNameOf (path + ("/" + string)) == string`.
- `path1 == path2` returns `true` only if `path1` points to the same filesystem path as `path2`.
Notably we do not make the assumption that we can turn paths into strings using `toString path`.
## Design decisions
Each subsection here contains a decision along with arguments and counter-arguments for (+) and against (-) that decision.
### Leading dots for relative paths
[leading-dots]: #leading-dots-for-relative-paths
Observing: Since subpaths are a form of relative paths, they can have a leading `./` to indicate it being a relative path, this is generally not necessary for tools though.
Considering: Paths should be as explicit, consistent and unambiguous as possible.
Decision: Returned subpaths should always have a leading `./`.
<details>
<summary>Arguments</summary>
- (+) In shells, just running `foo` as a command wouldn't execute the file `foo`, whereas `./foo` would execute the file. In contrast, `foo/bar` does execute that file without the need for `./`. This can lead to confusion about when a `./` needs to be prefixed. If a `./` is always included, this becomes a non-issue. This effectively then means that paths don't overlap with command names.
- (+) Prepending with `./` makes the subpaths always valid as relative Nix path expressions.
- (+) Using paths in command line arguments could give problems if not escaped properly, e.g. if a path was `--version`. This is not a problem with `./--version`. This effectively then means that paths don't overlap with GNU-style command line options.
- (-) `./` is not required to resolve relative paths, resolution always has an implicit `./` as prefix.
- (-) It's less noisy without the `./`, e.g. in error messages.
- (+) But similarly, it could be confusing whether something was even a path.
e.g. `foo` could be anything, but `./foo` is more clearly a path.
- (+) Makes it more uniform with absolute paths (those always start with `/`).
- (-) That is not relevant for practical purposes.
- (+) `find` also outputs results with `./`.
- (-) But only if you give it an argument of `.`. If you give it the argument `some-directory`, it won't prefix that.
- (-) `realpath --relative-to` doesn't prefix relative paths with `./`.
- (+) There is no need to return the same result as `realpath`.
</details>
### Representation of the current directory
[curdir]: #representation-of-the-current-directory
Observing: The subpath that produces the base directory can be represented with `.` or `./` or `./.`.
Considering: Paths should be as consistent and unambiguous as possible.
Decision: It should be `./.`.
<details>
<summary>Arguments</summary>
- (+) `./` would be inconsistent with [the decision to not persist trailing slashes][trailing-slashes].
- (-) `.` is how `realpath` normalises paths.
- (+) `.` can be interpreted as a shell command (it's a builtin for sourcing files in `bash` and `zsh`).
- (+) `.` would be the only path without a `/`. It could not be used as a Nix path expression, since those require at least one `/` to be parsed as such.
- (-) `./.` is rather long.
- (-) We don't require users to type this though, as it's only output by the library.
As inputs all three variants are supported for subpaths (and we can't do anything about absolute paths)
- (-) `builtins.dirOf "foo" == "."`, so `.` would be consistent with that.
- (+) `./.` is consistent with the [decision to have leading `./`][leading-dots].
- (+) `./.` is a valid Nix path expression, although this property does not hold for every relative path or subpath.
</details>
### Subpath representation
[relrepr]: #subpath-representation
Observing: Subpaths such as `foo/bar` can be represented in various ways:
- string: `"foo/bar"`
- list with all the components: `[ "foo" "bar" ]`
- attribute set: `{ type = "relative-path"; components = [ "foo" "bar" ]; }`
Considering: Paths should be as safe to use as possible. We should generate string outputs in the library and not encourage users to do that themselves.
Decision: Paths are represented as strings.
<details>
<summary>Arguments</summary>
- (+) It's simpler for the users of the library. One doesn't have to convert a path a string before it can be used.
- (+) Naively converting the list representation to a string with `concatStringsSep "/"` would break for `[]`, requiring library users to be more careful.
- (+) It doesn't encourage people to do their own path processing and instead use the library.
With a list representation it would seem easy to just use `lib.lists.init` to get the parent directory, but then it breaks for `.`, which would be represented as `[ ]`.
- (+) `+` is convenient and doesn't work on lists and attribute sets.
- (-) Shouldn't use `+` anyways, we export safer functions for path manipulation.
</details>
### Parent directory
[parents]: #parent-directory
Observing: Relative paths can have `..` components, which refer to the parent directory.
Considering: Paths should be as safe and unambiguous as possible.
Decision: `..` path components in string paths are not supported, neither as inputs nor as outputs. Hence, string paths are called subpaths, rather than relative paths.
<details>
<summary>Arguments</summary>
- (+) If we wanted relative paths to behave according to the "physical" interpretation (as a directory tree with relations between nodes), it would require resolving symlinks, since e.g. `foo/..` would not be the same as `.` if `foo` is a symlink.
- (-) The "logical" interpretation is also valid (treating paths as a sequence of names), and is used by some software. It is simpler, and not using symlinks at all is safer.
- (+) Mixing both models can lead to surprises.
- (+) We can't resolve symlinks without filesystem access.
- (+) Nix also doesn't support reading symlinks at evaluation time.
- (-) We could just not handle such cases, e.g. `equals "foo" "foo/bar/.. == false`. The paths are different, we don't need to check whether the paths point to the same thing.
- (+) Assume we said `relativeTo /foo /bar == "../bar"`. If this is used like `/bar/../foo` in the end, and `bar` turns out to be a symlink to somewhere else, this won't be accurate.
- (-) We could decide to not support such ambiguous operations, or mark them as such, e.g. the normal `relativeTo` will error on such a case, but there could be `extendedRelativeTo` supporting that.
- (-) `..` are a part of paths, a path library should therefore support it.
- (+) If we can convincingly argue that all such use cases are better done e.g. with runtime tools, the library not supporting it can nudge people towards using those.
- (-) We could allow "..", but only in the prefix.
- (+) Then we'd have to throw an error for doing `append /some/path "../foo"`, making it non-composable.
- (+) The same is for returning paths with `..`: `relativeTo /foo /bar => "../bar"` would produce a non-composable path.
- (+) We argue that `..` is not needed at the Nix evaluation level, since we'd always start evaluation from the project root and don't go up from there.
- (+) `..` is supported in Nix paths, turning them into absolute paths.
- (-) This is ambiguous in the presence of symlinks.
- (+) If you need `..` for building or runtime, you can use build-/run-time tooling to create those (e.g. `realpath` with `--relative-to`), or use absolute paths instead.
This also gives you the ability to correctly handle symlinks.
</details>
### Trailing slashes
[trailing-slashes]: #trailing-slashes
Observing: Subpaths can contain trailing slashes, like `foo/`, indicating that the path points to a directory and not a file.
Considering: Paths should be as consistent as possible, there should only be a single normalisation for the same path.
Decision: All functions remove trailing slashes in their results.
<details>
<summary>Arguments</summary>
- (+) It allows normalisations to be unique, in that there's only a single normalisation for the same path. If trailing slashes were preserved, both `foo/bar` and `foo/bar/` would be valid but different normalisations for the same path.
- Comparison to other frameworks to figure out the least surprising behavior:
- (+) Nix itself doesn't support trailing slashes when parsing and doesn't preserve them when appending paths.
- (-) [Rust's std::path](https://doc.rust-lang.org/std/path/index.html) does preserve them during [construction](https://doc.rust-lang.org/std/path/struct.Path.html#method.new).
- (+) Doesn't preserve them when returning individual [components](https://doc.rust-lang.org/std/path/struct.Path.html#method.components).
- (+) Doesn't preserve them when [canonicalizing](https://doc.rust-lang.org/std/path/struct.Path.html#method.canonicalize).
- (+) [Python 3's pathlib](https://docs.python.org/3/library/pathlib.html#module-pathlib) doesn't preserve them during [construction](https://docs.python.org/3/library/pathlib.html#pathlib.PurePath).
- Notably it represents the individual components as a list internally.
- (-) [Haskell's filepath](https://hackage.haskell.org/package/filepath-1.4.100.0) has [explicit support](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#g:6) for handling trailing slashes.
- (-) Does preserve them for [normalisation](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html#v:normalise).
- (-) [NodeJS's Path library](https://nodejs.org/api/path.html) preserves trailing slashes for [normalisation](https://nodejs.org/api/path.html#pathnormalizepath).
- (+) For [parsing a path](https://nodejs.org/api/path.html#pathparsepath) into its significant elements, trailing slashes are not preserved.
- (+) Nix's builtin function `dirOf` gives an unexpected result for paths with trailing slashes: `dirOf "foo/bar/" == "foo/bar"`.
Inconsistently, `baseNameOf` works correctly though: `baseNameOf "foo/bar/" == "bar"`.
- (-) We are writing a path library to improve handling of paths though, so we shouldn't use these functions and discourage their use.
- (-) Unexpected result when normalising intermediate paths, like `relative.normalise ("foo" + "/") + "bar" == "foobar"`.
- (+) This is not a practical use case though.
- (+) Don't use `+` to append paths, this library has a `join` function for that.
- (-) Users might use `+` out of habit though.
- (+) The `realpath` command also removes trailing slashes.
- (+) Even with a trailing slash, the path is the same, it's only an indication that it's a directory.
</details>
## Other implementations and references
- [Rust](https://doc.rust-lang.org/std/path/struct.Path.html)
- [Python](https://docs.python.org/3/library/pathlib.html)
- [Haskell](https://hackage.haskell.org/package/filepath-1.4.100.0/docs/System-FilePath.html)
- [Nodejs](https://nodejs.org/api/path.html)
- [POSIX.1-2017](https://pubs.opengroup.org/onlinepubs/9699919799/nframe.html)

218
lib/path/default.nix Normal file
View File

@ -0,0 +1,218 @@
# Functions for working with paths, see ./path.md
{ lib }:
let
inherit (builtins)
isString
split
match
;
inherit (lib.lists)
length
head
last
genList
elemAt
;
inherit (lib.strings)
concatStringsSep
substring
;
inherit (lib.asserts)
assertMsg
;
# Return the reason why a subpath is invalid, or `null` if it's valid
subpathInvalidReason = value:
if ! isString value then
"The given value is of type ${builtins.typeOf value}, but a string was expected"
else if value == "" then
"The given string is empty"
else if substring 0 1 value == "/" then
"The given string \"${value}\" starts with a `/`, representing an absolute path"
# We don't support ".." components, see ./path.md#parent-directory
else if match "(.*/)?\\.\\.(/.*)?" value != null then
"The given string \"${value}\" contains a `..` component, which is not allowed in subpaths"
else null;
# Split and normalise a relative path string into its components.
# Error for ".." components and doesn't include "." components
splitRelPath = path:
let
# Split the string into its parts using regex for efficiency. This regex
# matches patterns like "/", "/./", "/././", with arbitrarily many "/"s
# together. These are the main special cases:
# - Leading "./" gets split into a leading "." part
# - Trailing "/." or "/" get split into a trailing "." or ""
# part respectively
#
# These are the only cases where "." and "" parts can occur
parts = split "/+(\\./+)*" path;
# `split` creates a list of 2 * k + 1 elements, containing the k +
# 1 parts, interleaved with k matches where k is the number of
# (non-overlapping) matches. This calculation here gets the number of parts
# back from the list length
# floor( (2 * k + 1) / 2 ) + 1 == floor( k + 1/2 ) + 1 == k + 1
partCount = length parts / 2 + 1;
# To assemble the final list of components we want to:
# - Skip a potential leading ".", normalising "./foo" to "foo"
# - Skip a potential trailing "." or "", normalising "foo/" and "foo/." to
# "foo". See ./path.md#trailing-slashes
skipStart = if head parts == "." then 1 else 0;
skipEnd = if last parts == "." || last parts == "" then 1 else 0;
# We can now know the length of the result by removing the number of
# skipped parts from the total number
componentCount = partCount - skipEnd - skipStart;
in
# Special case of a single "." path component. Such a case leaves a
# componentCount of -1 due to the skipStart/skipEnd not verifying that
# they don't refer to the same character
if path == "." then []
# Generate the result list directly. This is more efficient than a
# combination of `filter`, `init` and `tail`, because here we don't
# allocate any intermediate lists
else genList (index:
# To get to the element we need to add the number of parts we skip and
# multiply by two due to the interleaved layout of `parts`
elemAt parts ((skipStart + index) * 2)
) componentCount;
# Join relative path components together
joinRelPath = components:
# Always return relative paths with `./` as a prefix (./path.md#leading-dots-for-relative-paths)
"./" +
# An empty string is not a valid relative path, so we need to return a `.` when we have no components
(if components == [] then "." else concatStringsSep "/" components);
in /* No rec! Add dependencies on this file at the top. */ {
/* Whether a value is a valid subpath string.
- The value is a string
- The string is not empty
- The string doesn't start with a `/`
- The string doesn't contain any `..` path components
Type:
subpath.isValid :: String -> Bool
Example:
# Not a string
subpath.isValid null
=> false
# Empty string
subpath.isValid ""
=> false
# Absolute path
subpath.isValid "/foo"
=> false
# Contains a `..` path component
subpath.isValid "../foo"
=> false
# Valid subpath
subpath.isValid "foo/bar"
=> true
# Doesn't need to be normalised
subpath.isValid "./foo//bar/"
=> true
*/
subpath.isValid = value:
subpathInvalidReason value == null;
/* Normalise a subpath. Throw an error if the subpath isn't valid, see
`lib.path.subpath.isValid`
- Limit repeating `/` to a single one
- Remove redundant `.` components
- Remove trailing `/` and `/.`
- Add leading `./`
Laws:
- (Idempotency) Normalising multiple times gives the same result:
subpath.normalise (subpath.normalise p) == subpath.normalise p
- (Uniqueness) There's only a single normalisation for the paths that lead to the same file system node:
subpath.normalise p != subpath.normalise q -> $(realpath ${p}) != $(realpath ${q})
- Don't change the result when appended to a Nix path value:
base + ("/" + p) == base + ("/" + subpath.normalise p)
- Don't change the path according to `realpath`:
$(realpath ${p}) == $(realpath ${subpath.normalise p})
- Only error on invalid subpaths:
builtins.tryEval (subpath.normalise p)).success == subpath.isValid p
Type:
subpath.normalise :: String -> String
Example:
# limit repeating `/` to a single one
subpath.normalise "foo//bar"
=> "./foo/bar"
# remove redundant `.` components
subpath.normalise "foo/./bar"
=> "./foo/bar"
# add leading `./`
subpath.normalise "foo/bar"
=> "./foo/bar"
# remove trailing `/`
subpath.normalise "foo/bar/"
=> "./foo/bar"
# remove trailing `/.`
subpath.normalise "foo/bar/."
=> "./foo/bar"
# Return the current directory as `./.`
subpath.normalise "."
=> "./."
# error on `..` path components
subpath.normalise "foo/../bar"
=> <error>
# error on empty string
subpath.normalise ""
=> <error>
# error on absolute path
subpath.normalise "/foo"
=> <error>
*/
subpath.normalise = path:
assert assertMsg (subpathInvalidReason path == null)
"lib.path.subpath.normalise: Argument is not a valid subpath string: ${subpathInvalidReason path}";
joinRelPath (splitRelPath path);
}

View File

@ -0,0 +1,34 @@
{
nixpkgs ? ../../..,
system ? builtins.currentSystem,
pkgs ? import nixpkgs {
config = {};
overlays = [];
inherit system;
},
libpath ? ../..,
# Random seed
seed ? null,
}:
pkgs.runCommand "lib-path-tests" {
nativeBuildInputs = with pkgs; [
nix
jq
bc
];
} ''
# Needed to make Nix evaluation work
export NIX_STATE_DIR=$(mktemp -d)
cp -r ${libpath} lib
export TEST_LIB=$PWD/lib
echo "Running unit tests lib/path/tests/unit.nix"
nix-instantiate --eval lib/path/tests/unit.nix \
--argstr libpath "$TEST_LIB"
echo "Running property tests lib/path/tests/prop.sh"
bash lib/path/tests/prop.sh ${toString seed}
touch $out
''

View File

@ -0,0 +1,64 @@
# Generate random path-like strings, separated by null characters.
#
# Invocation:
#
# awk -f ./generate.awk -v <variable>=<value> | tr '\0' '\n'
#
# Customizable variables (all default to 0):
# - seed: Deterministic random seed to use for generation
# - count: Number of paths to generate
# - extradotweight: Give extra weight to dots being generated
# - extraslashweight: Give extra weight to slashes being generated
# - extranullweight: Give extra weight to null being generated, making paths shorter
BEGIN {
# Random seed, passed explicitly for reproducibility
srand(seed)
# Don't include special characters below 32
minascii = 32
# Don't include DEL at 128
maxascii = 127
upperascii = maxascii - minascii
# add extra weight for ., in addition to the one weight from the ascii range
upperdot = upperascii + extradotweight
# add extra weight for /, in addition to the one weight from the ascii range
upperslash = upperdot + extraslashweight
# add extra weight for null, indicating the end of the string
# Must be at least 1 to have strings end at all
total = upperslash + 1 + extranullweight
# new=1 indicates that it's a new string
new=1
while (count > 0) {
# Random integer between [0, total)
value = int(rand() * total)
if (value < upperascii) {
# Ascii range
printf("%c", value + minascii)
new=0
} else if (value < upperdot) {
# Dot range
printf "."
new=0
} else if (value < upperslash) {
# If it's the start of a new path, only generate a / in 10% of cases
# This is always an invalid subpath, which is not a very interesting case
if (new && rand() > 0.1) continue
printf "/"
} else {
# Do not generate empty strings
if (new) continue
printf "\x00"
count--
new=1
}
}
}

60
lib/path/tests/prop.nix Normal file
View File

@ -0,0 +1,60 @@
# Given a list of path-like strings, check some properties of the path library
# using those paths and return a list of attribute sets of the following form:
#
# { <string> = <lib.path.subpath.normalise string>; }
#
# If `normalise` fails to evaluate, the attribute value is set to `""`.
# If not, the resulting value is normalised again and an appropriate attribute set added to the output list.
{
# The path to the nixpkgs lib to use
libpath,
# A flat directory containing files with randomly-generated
# path-like values
dir,
}:
let
lib = import libpath;
# read each file into a string
strings = map (name:
builtins.readFile (dir + "/${name}")
) (builtins.attrNames (builtins.readDir dir));
inherit (lib.path.subpath) normalise isValid;
inherit (lib.asserts) assertMsg;
normaliseAndCheck = str:
let
originalValid = isValid str;
tryOnce = builtins.tryEval (normalise str);
tryTwice = builtins.tryEval (normalise tryOnce.value);
absConcatOrig = /. + ("/" + str);
absConcatNormalised = /. + ("/" + tryOnce.value);
in
# Check the lib.path.subpath.normalise property to only error on invalid subpaths
assert assertMsg
(originalValid -> tryOnce.success)
"Even though string \"${str}\" is valid as a subpath, the normalisation for it failed";
assert assertMsg
(! originalValid -> ! tryOnce.success)
"Even though string \"${str}\" is invalid as a subpath, the normalisation for it succeeded";
# Check normalisation idempotency
assert assertMsg
(originalValid -> tryTwice.success)
"For valid subpath \"${str}\", the normalisation \"${tryOnce.value}\" was not a valid subpath";
assert assertMsg
(originalValid -> tryOnce.value == tryTwice.value)
"For valid subpath \"${str}\", normalising it once gives \"${tryOnce.value}\" but normalising it twice gives a different result: \"${tryTwice.value}\"";
# Check that normalisation doesn't change a string when appended to an absolute Nix path value
assert assertMsg
(originalValid -> absConcatOrig == absConcatNormalised)
"For valid subpath \"${str}\", appending to an absolute Nix path value gives \"${absConcatOrig}\", but appending the normalised result \"${tryOnce.value}\" gives a different value \"${absConcatNormalised}\"";
# Return an empty string when failed
if tryOnce.success then tryOnce.value else "";
in lib.genAttrs strings normaliseAndCheck

179
lib/path/tests/prop.sh Executable file
View File

@ -0,0 +1,179 @@
#!/usr/bin/env bash
# Property tests for the `lib.path` library
#
# It generates random path-like strings and runs the functions on
# them, checking that the expected laws of the functions hold
set -euo pipefail
shopt -s inherit_errexit
# https://stackoverflow.com/a/246128
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
if test -z "${TEST_LIB:-}"; then
TEST_LIB=$SCRIPT_DIR/../..
fi
tmp="$(mktemp -d)"
clean_up() {
rm -rf "$tmp"
}
trap clean_up EXIT
mkdir -p "$tmp/work"
cd "$tmp/work"
# Defaulting to a random seed but the first argument can override this
seed=${1:-$RANDOM}
echo >&2 "Using seed $seed, use \`lib/path/tests/prop.sh $seed\` to reproduce this result"
# The number of random paths to generate. This specific number was chosen to
# be fast enough while still generating enough variety to detect bugs.
count=500
debug=0
# debug=1 # print some extra info
# debug=2 # print generated values
# Fine tuning parameters to balance the number of generated invalid paths
# to the variance in generated paths.
extradotweight=64 # Larger value: more dots
extraslashweight=64 # Larger value: more slashes
extranullweight=16 # Larger value: shorter strings
die() {
echo >&2 "test case failed: " "$@"
exit 1
}
if [[ "$debug" -ge 1 ]]; then
echo >&2 "Generating $count random path-like strings"
fi
# Read stream of null-terminated strings entry-by-entry into bash,
# write it to a file and the `strings` array.
declare -a strings=()
mkdir -p "$tmp/strings"
while IFS= read -r -d $'\0' str; do
echo -n "$str" > "$tmp/strings/${#strings[@]}"
strings+=("$str")
done < <(awk \
-f "$SCRIPT_DIR"/generate.awk \
-v seed="$seed" \
-v count="$count" \
-v extradotweight="$extradotweight" \
-v extraslashweight="$extraslashweight" \
-v extranullweight="$extranullweight")
if [[ "$debug" -ge 1 ]]; then
echo >&2 "Trying to normalise the generated path-like strings with Nix"
fi
# Precalculate all normalisations with a single Nix call. Calling Nix for each
# string individually would take way too long
nix-instantiate --eval --strict --json \
--argstr libpath "$TEST_LIB" \
--argstr dir "$tmp/strings" \
"$SCRIPT_DIR"/prop.nix \
>"$tmp/result.json"
# Uses some jq magic to turn the resulting attribute set into an associative
# bash array assignment
declare -A normalised_result="($(jq '
to_entries
| map("[\(.key | @sh)]=\(.value | @sh)")
| join(" \n")' -r < "$tmp/result.json"))"
# Looks up a normalisation result for a string
# Checks that the normalisation is only failing iff it's an invalid subpath
# For valid subpaths, returns 0 and prints the normalisation result
# For invalid subpaths, returns 1
normalise() {
local str=$1
# Uses the same check for validity as in the library implementation
if [[ "$str" == "" || "$str" == /* || "$str" =~ ^(.*/)?\.\.(/.*)?$ ]]; then
valid=
else
valid=1
fi
normalised=${normalised_result[$str]}
# An empty string indicates failure, this is encoded in ./prop.nix
if [[ -n "$normalised" ]]; then
if [[ -n "$valid" ]]; then
echo "$normalised"
else
die "For invalid subpath \"$str\", lib.path.subpath.normalise returned this result: \"$normalised\""
fi
else
if [[ -n "$valid" ]]; then
die "For valid subpath \"$str\", lib.path.subpath.normalise failed"
else
if [[ "$debug" -ge 2 ]]; then
echo >&2 "String \"$str\" is not a valid subpath"
fi
# Invalid and it correctly failed, we let the caller continue if they catch the exit code
return 1
fi
fi
}
# Intermediate result populated by test_idempotency_realpath
# and used in test_normalise_uniqueness
#
# Contains a mapping from a normalised subpath to the realpath result it represents
declare -A norm_to_real
test_idempotency_realpath() {
if [[ "$debug" -ge 1 ]]; then
echo >&2 "Checking idempotency of each result and making sure the realpath result isn't changed"
fi
# Count invalid subpaths to display stats
invalid=0
for str in "${strings[@]}"; do
if ! result=$(normalise "$str"); then
((invalid++)) || true
continue
fi
# Check the law that it doesn't change the result of a realpath
mkdir -p -- "$str" "$result"
real_orig=$(realpath -- "$str")
real_norm=$(realpath -- "$result")
if [[ "$real_orig" != "$real_norm" ]]; then
die "realpath of the original string \"$str\" (\"$real_orig\") is not the same as realpath of the normalisation \"$result\" (\"$real_norm\")"
fi
if [[ "$debug" -ge 2 ]]; then
echo >&2 "String \"$str\" gets normalised to \"$result\" and file path \"$real_orig\""
fi
norm_to_real["$result"]="$real_orig"
done
if [[ "$debug" -ge 1 ]]; then
echo >&2 "$(bc <<< "scale=1; 100 / $count * $invalid")% of the total $count generated strings were invalid subpath strings, and were therefore ignored"
fi
}
test_normalise_uniqueness() {
if [[ "$debug" -ge 1 ]]; then
echo >&2 "Checking for the uniqueness law"
fi
for norm_p in "${!norm_to_real[@]}"; do
real_p=${norm_to_real["$norm_p"]}
for norm_q in "${!norm_to_real[@]}"; do
real_q=${norm_to_real["$norm_q"]}
# Checks normalisation uniqueness law for each pair of values
if [[ "$norm_p" != "$norm_q" && "$real_p" == "$real_q" ]]; then
die "Normalisations \"$norm_p\" and \"$norm_q\" are different, but the realpath of them is the same: \"$real_p\""
fi
done
done
}
test_idempotency_realpath
test_normalise_uniqueness
echo >&2 tests ok

125
lib/path/tests/unit.nix Normal file
View File

@ -0,0 +1,125 @@
# Unit tests for lib.path functions. Use `nix-build` in this directory to
# run these
{ libpath }:
let
lib = import libpath;
inherit (lib.path) subpath;
cases = lib.runTests {
testSubpathIsValidExample1 = {
expr = subpath.isValid null;
expected = false;
};
testSubpathIsValidExample2 = {
expr = subpath.isValid "";
expected = false;
};
testSubpathIsValidExample3 = {
expr = subpath.isValid "/foo";
expected = false;
};
testSubpathIsValidExample4 = {
expr = subpath.isValid "../foo";
expected = false;
};
testSubpathIsValidExample5 = {
expr = subpath.isValid "foo/bar";
expected = true;
};
testSubpathIsValidExample6 = {
expr = subpath.isValid "./foo//bar/";
expected = true;
};
testSubpathIsValidTwoDotsEnd = {
expr = subpath.isValid "foo/..";
expected = false;
};
testSubpathIsValidTwoDotsMiddle = {
expr = subpath.isValid "foo/../bar";
expected = false;
};
testSubpathIsValidTwoDotsPrefix = {
expr = subpath.isValid "..foo";
expected = true;
};
testSubpathIsValidTwoDotsSuffix = {
expr = subpath.isValid "foo..";
expected = true;
};
testSubpathIsValidTwoDotsPrefixComponent = {
expr = subpath.isValid "foo/..bar/baz";
expected = true;
};
testSubpathIsValidTwoDotsSuffixComponent = {
expr = subpath.isValid "foo/bar../baz";
expected = true;
};
testSubpathIsValidThreeDots = {
expr = subpath.isValid "...";
expected = true;
};
testSubpathIsValidFourDots = {
expr = subpath.isValid "....";
expected = true;
};
testSubpathIsValidThreeDotsComponent = {
expr = subpath.isValid "foo/.../bar";
expected = true;
};
testSubpathIsValidFourDotsComponent = {
expr = subpath.isValid "foo/..../bar";
expected = true;
};
testSubpathNormaliseExample1 = {
expr = subpath.normalise "foo//bar";
expected = "./foo/bar";
};
testSubpathNormaliseExample2 = {
expr = subpath.normalise "foo/./bar";
expected = "./foo/bar";
};
testSubpathNormaliseExample3 = {
expr = subpath.normalise "foo/bar";
expected = "./foo/bar";
};
testSubpathNormaliseExample4 = {
expr = subpath.normalise "foo/bar/";
expected = "./foo/bar";
};
testSubpathNormaliseExample5 = {
expr = subpath.normalise "foo/bar/.";
expected = "./foo/bar";
};
testSubpathNormaliseExample6 = {
expr = subpath.normalise ".";
expected = "./.";
};
testSubpathNormaliseExample7 = {
expr = (builtins.tryEval (subpath.normalise "foo/../bar")).success;
expected = false;
};
testSubpathNormaliseExample8 = {
expr = (builtins.tryEval (subpath.normalise "")).success;
expected = false;
};
testSubpathNormaliseExample9 = {
expr = (builtins.tryEval (subpath.normalise "/foo")).success;
expected = false;
};
testSubpathNormaliseIsValidDots = {
expr = subpath.normalise "./foo/.bar/.../baz...qux";
expected = "./foo/.bar/.../baz...qux";
};
testSubpathNormaliseWrongType = {
expr = (builtins.tryEval (subpath.normalise null)).success;
expected = false;
};
testSubpathNormaliseTwoDots = {
expr = (builtins.tryEval (subpath.normalise "..")).success;
expected = false;
};
};
in
if cases == [] then "Unit tests successful"
else throw "Path unit tests failed: ${lib.generators.toPretty {} cases}"

View File

@ -15,6 +15,9 @@ pkgs.runCommand "nixpkgs-lib-tests" {
inherit pkgs;
lib = import ../.;
})
(import ../path/tests {
inherit pkgs;
})
];
} ''
datadir="${pkgs.nix}/share"

View File

@ -1,4 +1,4 @@
{ lib, stdenv, fetchFromGitHub, rustPlatform, darwin }:
{ lib, stdenv, fetchFromGitHub, fetchpatch, rustPlatform, darwin }:
rustPlatform.buildRustPackage rec {
pname = "nixdoc";
@ -11,6 +11,14 @@ rustPlatform.buildRustPackage rec {
sha256 = "14d4dq06jdqazxvv7fq5872zy0capxyb0fdkp8qg06gxl1iw201s";
};
patches = [
# Support nested identifiers https://github.com/nix-community/nixdoc/pull/27
(fetchpatch {
url = "https://github.com/nix-community/nixdoc/pull/27/commits/ea542735bf675fe2ccd37edaffb9138d1a8c1b7e.patch";
sha256 = "1fmz44jv2r9qsnjxvkkjfb0safy69l4x4vx1g5gisrp8nwdn94rj";
})
];
buildInputs = lib.optionals stdenv.isDarwin [ darwin.Security ];
cargoSha256 = "1nv6g8rmjjbwqmjkrpqncypqvx5c7xp2zlx5h6rw2j9d1wlys0v5";