diff --git a/Makefile b/Makefile index d3542c3e9..33c52865e 100644 --- a/Makefile +++ b/Makefile @@ -18,6 +18,7 @@ makefiles = \ src/libexpr/local.mk \ src/libcmd/local.mk \ src/nix/local.mk \ + src/nix-find-roots/local.mk \ src/resolve-system-dependencies/local.mk \ scripts/local.mk \ misc/bash/local.mk \ diff --git a/flake.nix b/flake.nix index 0bc70768e..17a7850ae 100644 --- a/flake.nix +++ b/flake.nix @@ -152,6 +152,24 @@ ''; }; + nix-find-roots = prev.stdenv.mkDerivation { + name = "nix-find-roots-${version}"; + inherit version; + + src = "${self}/src/nix-find-roots"; + + CXXFLAGS = prev.lib.optionalString prev.stdenv.hostPlatform.isStatic "-static"; + + buildPhase = '' + $CXX $CXXFLAGS -std=c++17 *.cc **/*.cc -I lib -o nix-find-roots + ''; + + installPhase = '' + mkdir -p $out/bin + cp nix-find-roots $out/bin/ + ''; + }; + libgit2-nix = final.libgit2.overrideAttrs (attrs: { src = libgit2; version = libgit2.lastModifiedDate; @@ -366,6 +384,7 @@ default = nix; } // (lib.optionalAttrs (builtins.elem system linux64BitSystems) { nix-static = nixpkgsFor.${system}.static.nix; + nix-find-roots = nixpkgsFor.${system}.static.nix-find-roots; dockerImage = let pkgs = nixpkgsFor.${system}.native; diff --git a/src/nix-find-roots/.gitignore b/src/nix-find-roots/.gitignore new file mode 100644 index 000000000..fbbee5484 --- /dev/null +++ b/src/nix-find-roots/.gitignore @@ -0,0 +1 @@ +nix-find-roots diff --git a/src/nix-find-roots/lib/find-roots.cc b/src/nix-find-roots/lib/find-roots.cc new file mode 100644 index 000000000..f57b06326 --- /dev/null +++ b/src/nix-find-roots/lib/find-roots.cc @@ -0,0 +1,242 @@ +/* + * A very simple utility to trace all the gc roots through the file-system + * The reason for this program is that tracing these roots is the only part of + * Nix that requires to run as root (because it requires reading through the + * user home directories to resolve the indirect roots) + * + * This program intentionnally doesnt depend on any Nix library to reduce the attack surface. + */ + +#include +#include +#include +#include +#include +#include + +#include "find-roots.hh" + + +namespace nix::roots_tracer { +namespace fs = std::filesystem; +using std::set, std::string; + +string quoteRegexChars(const string & raw) +{ + static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); + return std::regex_replace(raw, specialRegex, R"(\$&)"); +} +std::regex storePathRegex(const fs::path storeDir) +{ + return std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); +} + +bool isInStore(fs::path storeDir, fs::path dir) +{ + return (std::search(dir.begin(), dir.end(), storeDir.begin(), storeDir.end()) == dir.begin()); +} + +void traceStaticRoot( + const TracerConfig & opts, + int recursionsLeft, + TraceResult & res, + const fs::path & root, + const fs::file_status & status + ) +{ + opts.debug("Considering file " + root.string()); + + if (recursionsLeft < 0) + return; + + switch (status.type()) { + case fs::file_type::directory: + { + auto directory_iterator = fs::recursive_directory_iterator(root); + for (auto & child : directory_iterator) + traceStaticRoot(opts, recursionsLeft, res, child.path(), child.symlink_status()); + } + break; + case fs::file_type::symlink: + { + auto target = root.parent_path() / fs::read_symlink(root); + auto not_found = [&](std::string msg) { + opts.debug("Error accessing the file " + target.string() + ": " + msg); + opts.debug("(When resolving the symlink " + root.string() + ")"); + res.deadLinks.insert(root); + }; + try { + auto target_status = fs::symlink_status(target); + if (target_status.type() == fs::file_type::not_found) + not_found("Not found"); + + if (isInStore(opts.storeDir, target)) { + res.storeRoots[target].insert(root); + return; + } else { + traceStaticRoot(opts, recursionsLeft - 1, res, target, target_status); + } + + } catch (fs::filesystem_error & e) { + not_found(e.what()); + } + } + break; + case fs::file_type::regular: + { + auto possibleStorePath = opts.storeDir / root.filename(); + if (fs::exists(possibleStorePath)) + res.storeRoots[possibleStorePath].insert(root); + } + break; + case fs::file_type::not_found: + case fs::file_type::block: + case fs::file_type::character: + case fs::file_type::fifo: + case fs::file_type::socket: + case fs::file_type::unknown: + case fs::file_type::none: + default: + break; + } +} + +void traceStaticRoot( + const TracerConfig & opts, + int recursionsLeft, + TraceResult & res, + const fs::path & root) +{ + try { + auto status = fs::symlink_status(root); + traceStaticRoot(opts, recursionsLeft, res, root, status); + } catch (fs::filesystem_error & e) { + opts.debug("Error accessing the file " + root.string() + ": " + e.what()); + } +} + +/* + * Return the set of all the store paths that are reachable from the given set + * of filesystem paths, by: + * - descending into the directories + * - following the symbolic links (at most twice) + * - reading the name of regular files (when encountering a file + * `/foo/bar/abcdef`, the algorithm will try to access `/nix/store/abcdef`) + * + * Also returns the set of all dead links encountered during the process (so + * that they can be removed if it makes sense). + */ +TraceResult traceStaticRoots(TracerConfig opts, set roots) +{ + int maxRecursionLevel = 2; + TraceResult res; + for (auto & root : roots) + traceStaticRoot(opts, maxRecursionLevel, res, root); + return res; +} + +/** + * Scan the content of the given file for al the occurences of something that looks + * like a store path (i.e. that matches `storePathRegex(opts.storeDir)`) and add them + * to `res` + */ +void scanFileContent(const TracerConfig & opts, const fs::path & fileToScan, Roots & res) +{ + if (!fs::exists(fileToScan)) + return; + + std::ostringstream contentStream; + { + std::ifstream fs; + fs.open(fileToScan); + fs >> contentStream.rdbuf(); + } + std::string content = contentStream.str(); + auto regex = storePathRegex(opts.storeDir); + auto firstMatch + = std::sregex_iterator { content.begin(), content.end(), regex }; + auto fileEnd = std::sregex_iterator{}; + for (auto i = firstMatch; i != fileEnd; ++i) + res[i->str()].emplace(fileToScan); +} + +/** + * Scan the content of a `/proc/[pid]/maps` file for regions that are mmaped to + * a store path + */ +void scanMapsFile(const TracerConfig & opts, const fs::path & mapsFile, Roots & res) +{ + if (!fs::exists(mapsFile)) + return; + + static auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); + std::stringstream mappedFile; + { + std::ifstream fs; + fs.open(mapsFile); + fs >> mappedFile.rdbuf(); + } + std::string line; + while (std::getline(mappedFile, line)) { + auto match = std::smatch{}; + if (std::regex_match(line, match, mapRegex)) { + auto matchedPath = fs::path(match[1]); + if (isInStore(opts.storeDir, matchedPath)) + res[fs::path(match[1])].emplace(mapsFile); + } + } + +} + +Roots getRuntimeRoots(TracerConfig opts) +{ + auto procDir = fs::path("/proc"); + if (!fs::exists(procDir)) + return {}; + Roots res; + auto digitsRegex = std::regex(R"(^\d+$)"); + for (auto & procEntry : fs::directory_iterator(procDir)) { + // Only the directories whose name is a sequence of digits represent + // pids + if (!std::regex_match(procEntry.path().filename().string(), digitsRegex) + || !procEntry.is_directory()) + continue; + + opts.debug("Considering path " + procEntry.path().string()); + + // A set of paths used by the executable and possibly symlinks to a + // path in the store + set pathsToConsider; + pathsToConsider.insert(procEntry.path()/"exe"); + pathsToConsider.insert(procEntry.path()/"cwd"); + try { + auto fdDir = procEntry.path()/"fd"; + for (auto & fdFile : fs::directory_iterator(fdDir)) + pathsToConsider.insert(fdFile.path()); + } catch (fs::filesystem_error & e) { + if (e.code().value() != ENOENT && e.code().value() != EACCES) + throw; + } + for (auto & path : pathsToConsider) try { + auto realPath = fs::read_symlink(path); + if (isInStore(opts.storeDir, realPath)) + res[realPath].insert(path); + } catch (fs::filesystem_error &e) { + opts.debug(e.what()); + } + + // Scan the environment of the executable + scanFileContent(opts, procEntry.path()/"environ", res); + scanMapsFile(opts, procEntry.path()/"maps", res); + } + + // Mostly useful for NixOS, but doesn’t hurt to check on other systems + // anyways + scanFileContent(opts, "/proc/sys/kernel/modprobe", res); + scanFileContent(opts, "/proc/sys/kernel/fbsplash", res); + scanFileContent(opts, "/proc/sys/kernel/poweroff_cmd", res); + + return res; +} + +} diff --git a/src/nix-find-roots/lib/find-roots.hh b/src/nix-find-roots/lib/find-roots.hh new file mode 100644 index 000000000..504fdd768 --- /dev/null +++ b/src/nix-find-roots/lib/find-roots.hh @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +namespace nix::roots_tracer { +namespace fs = std::filesystem; +using std::set, std::map, std::string; + +class Error : public std::exception { +private: + const string message; + +public: + Error(std::string message) + : message(message) + {} + + const char* what() const noexcept override + { + return message.c_str(); + } +}; + +inline void logNone(std::string_view) +{ } + +struct TracerConfig { + const fs::path storeDir = "/nix/store"; + const fs::path stateDir = "/nix/var/nix"; + const fs::path socketPath = "/nix/var/nix/gc-socket/socket"; + + std::function log = logNone; + std::function debug = logNone; +}; + +/* + * A value of type `Roots` is a mapping from a store path to the set of roots that keep it alive + */ +typedef map> Roots; +struct TraceResult { + Roots storeRoots; + set deadLinks; +}; + +TraceResult traceStaticRoots(TracerConfig opts, set initialRoots); +Roots getRuntimeRoots(TracerConfig opts); +} diff --git a/src/nix-find-roots/local.mk b/src/nix-find-roots/local.mk new file mode 100644 index 000000000..f7abecff5 --- /dev/null +++ b/src/nix-find-roots/local.mk @@ -0,0 +1,24 @@ +libraries += libfindroots + +libfindroots_NAME = libnixfindroots + +libfindroots_DIR := $(d)/lib + +libfindroots_SOURCES := $(wildcard $(d)/lib/*.cc) + +ifdef HOST_DARWIN +libfindroots_LDFLAGS += -lc++fs +endif + +programs += nix-find-roots + +nix-find-roots_DIR := $(d) + +nix-find-roots_SOURCES := $(d)/main.cc + +nix-find-roots_LIBS := libfindroots + +nix-find-roots_CXXFLAGS += \ + -I src/nix-find-roots/lib + +nix-find-roots_INSTALL_DIR := $(libexecdir)/nix diff --git a/src/nix-find-roots/main.cc b/src/nix-find-roots/main.cc new file mode 100644 index 000000000..c319759a7 --- /dev/null +++ b/src/nix-find-roots/main.cc @@ -0,0 +1,175 @@ +#include "find-roots.hh" +#include +#include +#include +#include +#include +#include +#include + +using namespace nix::roots_tracer; + +void logStderr(std::string_view msg) +{ + std::cerr << msg << std::endl; +} + +TracerConfig parseCmdLine(int argc, char** argv) +{ + std::function log = logStderr; + std::function debug = logNone; + fs::path storeDir = "/nix/store"; + fs::path stateDir = "/nix/var/nix"; + fs::path socketPath = "/nix/var/nix/gc-trace-socket/socket"; + + auto usage = [&]() { + std::cerr << "Usage: " << string(argv[0]) << " [--verbose|-v] [-s storeDir] [-d stateDir] [-l socketPath]" << std::endl; + exit(1); + }; + static struct option long_options[] = { + { "verbose", no_argument, 0, 'v' }, + { "socket_path", required_argument, 0, 'l' }, + { "store_dir", required_argument, 0, 's' }, + { "state_dir", required_argument, 0, 'd' }, + { "help", no_argument, 0, 'h' }, + { 0, 0, 0, 0 }, + }; + + int option_index = 0; + int opt_char; + while((opt_char = getopt_long(argc, argv, "vd:s:l:h", + long_options, &option_index)) != -1) { + switch (opt_char) { + case 0: + break; + break; + case '?': + case 'h': + usage(); + break; + case 'v': + debug = logStderr; + break; + case 's': + storeDir = fs::path(optarg); + break; + case 'd': + stateDir = fs::path(optarg); + break; + case 'l': + socketPath = fs::path(optarg); + break; + default: + std::cerr << "Got invalid char: " << (char)opt_char << std::endl; + abort(); + } + }; + return TracerConfig { + .storeDir = storeDir, + .stateDir = stateDir, + .socketPath = socketPath, + .debug = debug, + }; +} + +/** + * Return `original` with every newline or tab character escaped + */ +std::string escape(std::string original) +{ + map replacements = { + {"\n", "\\n"}, + {"\n", "\\t"}, + }; + for (auto [oldStr, newStr] : replacements) { + size_t currentPos = 0; + while ((currentPos = original.find(oldStr)) != std::string::npos) { + original.replace(currentPos, oldStr.length(), newStr); + currentPos += newStr.length(); + } + } + + return original; +} + +#define SD_LISTEN_FDS_START 3 // Like in systemd + +int main(int argc, char * * argv) +{ + const TracerConfig opts = parseCmdLine(argc, argv); + const set standardRoots = { + opts.stateDir / fs::path("profiles"), + opts.stateDir / fs::path("gcroots"), + }; + + int mySock; + + // Handle socket-based activation by systemd. + auto rawListenFds = std::getenv("LISTEN_FDS"); + if (rawListenFds) { + auto listenFds = std::string(rawListenFds); + if (std::getenv("LISTEN_PID") != std::to_string(getpid()) || listenFds != "1") + throw Error("unexpected systemd environment variables"); + mySock = SD_LISTEN_FDS_START; + } else { + mySock = socket(PF_UNIX, SOCK_STREAM, 0); + if (mySock == 0) { + throw Error("Cannot create Unix domain socket"); + } + struct sockaddr_un addr; + addr.sun_family = AF_UNIX; + + unlink(opts.socketPath.c_str()); + strcpy(addr.sun_path, opts.socketPath.c_str()); + if (bind(mySock, (struct sockaddr*) &addr, sizeof(addr)) == -1) { + throw Error("Cannot bind to socket"); + } + + if (listen(mySock, 5) == -1) + throw Error("cannot listen on socket " + opts.socketPath.string()); + } + + // Ignore SIGPIPE so that an interrupted connection doesn’t stop the daemon + signal(SIGPIPE, SIG_IGN); + + while (1) { + struct sockaddr_un remoteAddr; + socklen_t remoteAddrLen = sizeof(remoteAddr); + int remoteSocket = accept( + mySock, + (struct sockaddr*) & remoteAddr, + &remoteAddrLen + ); + + if (remoteSocket == -1) { + if (errno == EINTR) continue; + throw Error("Error accepting the connection"); + } + + opts.log("accepted connection"); + + auto printToSocket = [&](std::string_view s) { + send(remoteSocket, s.data(), s.size(), 0); + }; + + auto traceResult = traceStaticRoots(opts, standardRoots); + auto runtimeRoots = getRuntimeRoots(opts); + traceResult.storeRoots.insert(runtimeRoots.begin(), runtimeRoots.end()); + for (auto & [rootInStore, externalRoots] : traceResult.storeRoots) { + for (auto & externalRoot : externalRoots) { + printToSocket(escape(rootInStore.string())); + printToSocket("\t"); + printToSocket(escape(externalRoot.string())); + printToSocket("\n"); + } + + } + printToSocket("\n"); + for (auto & deadLink : traceResult.deadLinks) { + printToSocket(escape(deadLink.string())); + printToSocket("\n"); + } + + close(remoteSocket); + } +}