Add an external executable to trace the gc roots back to the store

This commit is contained in:
Théophane Hufschmitt 2022-04-11 10:20:36 +02:00
parent 6a5210f48e
commit 9bbf398d71
7 changed files with 510 additions and 0 deletions

View File

@ -18,6 +18,7 @@ makefiles = \
src/libexpr/local.mk \
src/libcmd/local.mk \
src/nix/local.mk \
src/nix-find-roots/local.mk \
src/resolve-system-dependencies/local.mk \
scripts/local.mk \
misc/bash/local.mk \

View File

@ -152,6 +152,24 @@
'';
};
nix-find-roots = prev.stdenv.mkDerivation {
name = "nix-find-roots-${version}";
inherit version;
src = "${self}/src/nix-find-roots";
CXXFLAGS = prev.lib.optionalString prev.stdenv.hostPlatform.isStatic "-static";
buildPhase = ''
$CXX $CXXFLAGS -std=c++17 *.cc **/*.cc -I lib -o nix-find-roots
'';
installPhase = ''
mkdir -p $out/bin
cp nix-find-roots $out/bin/
'';
};
libgit2-nix = final.libgit2.overrideAttrs (attrs: {
src = libgit2;
version = libgit2.lastModifiedDate;
@ -366,6 +384,7 @@
default = nix;
} // (lib.optionalAttrs (builtins.elem system linux64BitSystems) {
nix-static = nixpkgsFor.${system}.static.nix;
nix-find-roots = nixpkgsFor.${system}.static.nix-find-roots;
dockerImage =
let
pkgs = nixpkgsFor.${system}.native;

1
src/nix-find-roots/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
nix-find-roots

View File

@ -0,0 +1,242 @@
/*
* A very simple utility to trace all the gc roots through the file-system
* The reason for this program is that tracing these roots is the only part of
* Nix that requires to run as root (because it requires reading through the
* user home directories to resolve the indirect roots)
*
* This program intentionnally doesnt depend on any Nix library to reduce the attack surface.
*/
#include <regex>
#include <unistd.h>
#include <vector>
#include <algorithm>
#include <fstream>
#include <optional>
#include "find-roots.hh"
namespace nix::roots_tracer {
namespace fs = std::filesystem;
using std::set, std::string;
string quoteRegexChars(const string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
}
std::regex storePathRegex(const fs::path storeDir)
{
return std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
}
bool isInStore(fs::path storeDir, fs::path dir)
{
return (std::search(dir.begin(), dir.end(), storeDir.begin(), storeDir.end()) == dir.begin());
}
void traceStaticRoot(
const TracerConfig & opts,
int recursionsLeft,
TraceResult & res,
const fs::path & root,
const fs::file_status & status
)
{
opts.debug("Considering file " + root.string());
if (recursionsLeft < 0)
return;
switch (status.type()) {
case fs::file_type::directory:
{
auto directory_iterator = fs::recursive_directory_iterator(root);
for (auto & child : directory_iterator)
traceStaticRoot(opts, recursionsLeft, res, child.path(), child.symlink_status());
}
break;
case fs::file_type::symlink:
{
auto target = root.parent_path() / fs::read_symlink(root);
auto not_found = [&](std::string msg) {
opts.debug("Error accessing the file " + target.string() + ": " + msg);
opts.debug("(When resolving the symlink " + root.string() + ")");
res.deadLinks.insert(root);
};
try {
auto target_status = fs::symlink_status(target);
if (target_status.type() == fs::file_type::not_found)
not_found("Not found");
if (isInStore(opts.storeDir, target)) {
res.storeRoots[target].insert(root);
return;
} else {
traceStaticRoot(opts, recursionsLeft - 1, res, target, target_status);
}
} catch (fs::filesystem_error & e) {
not_found(e.what());
}
}
break;
case fs::file_type::regular:
{
auto possibleStorePath = opts.storeDir / root.filename();
if (fs::exists(possibleStorePath))
res.storeRoots[possibleStorePath].insert(root);
}
break;
case fs::file_type::not_found:
case fs::file_type::block:
case fs::file_type::character:
case fs::file_type::fifo:
case fs::file_type::socket:
case fs::file_type::unknown:
case fs::file_type::none:
default:
break;
}
}
void traceStaticRoot(
const TracerConfig & opts,
int recursionsLeft,
TraceResult & res,
const fs::path & root)
{
try {
auto status = fs::symlink_status(root);
traceStaticRoot(opts, recursionsLeft, res, root, status);
} catch (fs::filesystem_error & e) {
opts.debug("Error accessing the file " + root.string() + ": " + e.what());
}
}
/*
* Return the set of all the store paths that are reachable from the given set
* of filesystem paths, by:
* - descending into the directories
* - following the symbolic links (at most twice)
* - reading the name of regular files (when encountering a file
* `/foo/bar/abcdef`, the algorithm will try to access `/nix/store/abcdef`)
*
* Also returns the set of all dead links encountered during the process (so
* that they can be removed if it makes sense).
*/
TraceResult traceStaticRoots(TracerConfig opts, set<fs::path> roots)
{
int maxRecursionLevel = 2;
TraceResult res;
for (auto & root : roots)
traceStaticRoot(opts, maxRecursionLevel, res, root);
return res;
}
/**
* Scan the content of the given file for al the occurences of something that looks
* like a store path (i.e. that matches `storePathRegex(opts.storeDir)`) and add them
* to `res`
*/
void scanFileContent(const TracerConfig & opts, const fs::path & fileToScan, Roots & res)
{
if (!fs::exists(fileToScan))
return;
std::ostringstream contentStream;
{
std::ifstream fs;
fs.open(fileToScan);
fs >> contentStream.rdbuf();
}
std::string content = contentStream.str();
auto regex = storePathRegex(opts.storeDir);
auto firstMatch
= std::sregex_iterator { content.begin(), content.end(), regex };
auto fileEnd = std::sregex_iterator{};
for (auto i = firstMatch; i != fileEnd; ++i)
res[i->str()].emplace(fileToScan);
}
/**
* Scan the content of a `/proc/[pid]/maps` file for regions that are mmaped to
* a store path
*/
void scanMapsFile(const TracerConfig & opts, const fs::path & mapsFile, Roots & res)
{
if (!fs::exists(mapsFile))
return;
static auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
std::stringstream mappedFile;
{
std::ifstream fs;
fs.open(mapsFile);
fs >> mappedFile.rdbuf();
}
std::string line;
while (std::getline(mappedFile, line)) {
auto match = std::smatch{};
if (std::regex_match(line, match, mapRegex)) {
auto matchedPath = fs::path(match[1]);
if (isInStore(opts.storeDir, matchedPath))
res[fs::path(match[1])].emplace(mapsFile);
}
}
}
Roots getRuntimeRoots(TracerConfig opts)
{
auto procDir = fs::path("/proc");
if (!fs::exists(procDir))
return {};
Roots res;
auto digitsRegex = std::regex(R"(^\d+$)");
for (auto & procEntry : fs::directory_iterator(procDir)) {
// Only the directories whose name is a sequence of digits represent
// pids
if (!std::regex_match(procEntry.path().filename().string(), digitsRegex)
|| !procEntry.is_directory())
continue;
opts.debug("Considering path " + procEntry.path().string());
// A set of paths used by the executable and possibly symlinks to a
// path in the store
set<fs::path> pathsToConsider;
pathsToConsider.insert(procEntry.path()/"exe");
pathsToConsider.insert(procEntry.path()/"cwd");
try {
auto fdDir = procEntry.path()/"fd";
for (auto & fdFile : fs::directory_iterator(fdDir))
pathsToConsider.insert(fdFile.path());
} catch (fs::filesystem_error & e) {
if (e.code().value() != ENOENT && e.code().value() != EACCES)
throw;
}
for (auto & path : pathsToConsider) try {
auto realPath = fs::read_symlink(path);
if (isInStore(opts.storeDir, realPath))
res[realPath].insert(path);
} catch (fs::filesystem_error &e) {
opts.debug(e.what());
}
// Scan the environment of the executable
scanFileContent(opts, procEntry.path()/"environ", res);
scanMapsFile(opts, procEntry.path()/"maps", res);
}
// Mostly useful for NixOS, but doesnt hurt to check on other systems
// anyways
scanFileContent(opts, "/proc/sys/kernel/modprobe", res);
scanFileContent(opts, "/proc/sys/kernel/fbsplash", res);
scanFileContent(opts, "/proc/sys/kernel/poweroff_cmd", res);
return res;
}
}

View File

@ -0,0 +1,48 @@
#include <filesystem>
#include <set>
#include <map>
#include <functional>
namespace nix::roots_tracer {
namespace fs = std::filesystem;
using std::set, std::map, std::string;
class Error : public std::exception {
private:
const string message;
public:
Error(std::string message)
: message(message)
{}
const char* what() const noexcept override
{
return message.c_str();
}
};
inline void logNone(std::string_view)
{ }
struct TracerConfig {
const fs::path storeDir = "/nix/store";
const fs::path stateDir = "/nix/var/nix";
const fs::path socketPath = "/nix/var/nix/gc-socket/socket";
std::function<void(std::string_view msg)> log = logNone;
std::function<void(std::string_view msg)> debug = logNone;
};
/*
* A value of type `Roots` is a mapping from a store path to the set of roots that keep it alive
*/
typedef map<fs::path, std::set<fs::path>> Roots;
struct TraceResult {
Roots storeRoots;
set<fs::path> deadLinks;
};
TraceResult traceStaticRoots(TracerConfig opts, set<fs::path> initialRoots);
Roots getRuntimeRoots(TracerConfig opts);
}

View File

@ -0,0 +1,24 @@
libraries += libfindroots
libfindroots_NAME = libnixfindroots
libfindroots_DIR := $(d)/lib
libfindroots_SOURCES := $(wildcard $(d)/lib/*.cc)
ifdef HOST_DARWIN
libfindroots_LDFLAGS += -lc++fs
endif
programs += nix-find-roots
nix-find-roots_DIR := $(d)
nix-find-roots_SOURCES := $(d)/main.cc
nix-find-roots_LIBS := libfindroots
nix-find-roots_CXXFLAGS += \
-I src/nix-find-roots/lib
nix-find-roots_INSTALL_DIR := $(libexecdir)/nix

175
src/nix-find-roots/main.cc Normal file
View File

@ -0,0 +1,175 @@
#include "find-roots.hh"
#include <getopt.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <iostream>
#include <unistd.h>
#include <cstring>
#include <signal.h>
using namespace nix::roots_tracer;
void logStderr(std::string_view msg)
{
std::cerr << msg << std::endl;
}
TracerConfig parseCmdLine(int argc, char** argv)
{
std::function<void(std::string_view msg)> log = logStderr;
std::function<void(std::string_view msg)> debug = logNone;
fs::path storeDir = "/nix/store";
fs::path stateDir = "/nix/var/nix";
fs::path socketPath = "/nix/var/nix/gc-trace-socket/socket";
auto usage = [&]() {
std::cerr << "Usage: " << string(argv[0]) << " [--verbose|-v] [-s storeDir] [-d stateDir] [-l socketPath]" << std::endl;
exit(1);
};
static struct option long_options[] = {
{ "verbose", no_argument, 0, 'v' },
{ "socket_path", required_argument, 0, 'l' },
{ "store_dir", required_argument, 0, 's' },
{ "state_dir", required_argument, 0, 'd' },
{ "help", no_argument, 0, 'h' },
{ 0, 0, 0, 0 },
};
int option_index = 0;
int opt_char;
while((opt_char = getopt_long(argc, argv, "vd:s:l:h",
long_options, &option_index)) != -1) {
switch (opt_char) {
case 0:
break;
break;
case '?':
case 'h':
usage();
break;
case 'v':
debug = logStderr;
break;
case 's':
storeDir = fs::path(optarg);
break;
case 'd':
stateDir = fs::path(optarg);
break;
case 'l':
socketPath = fs::path(optarg);
break;
default:
std::cerr << "Got invalid char: " << (char)opt_char << std::endl;
abort();
}
};
return TracerConfig {
.storeDir = storeDir,
.stateDir = stateDir,
.socketPath = socketPath,
.debug = debug,
};
}
/**
* Return `original` with every newline or tab character escaped
*/
std::string escape(std::string original)
{
map<string, string> replacements = {
{"\n", "\\n"},
{"\n", "\\t"},
};
for (auto [oldStr, newStr] : replacements) {
size_t currentPos = 0;
while ((currentPos = original.find(oldStr)) != std::string::npos) {
original.replace(currentPos, oldStr.length(), newStr);
currentPos += newStr.length();
}
}
return original;
}
#define SD_LISTEN_FDS_START 3 // Like in systemd
int main(int argc, char * * argv)
{
const TracerConfig opts = parseCmdLine(argc, argv);
const set<fs::path> standardRoots = {
opts.stateDir / fs::path("profiles"),
opts.stateDir / fs::path("gcroots"),
};
int mySock;
// Handle socket-based activation by systemd.
auto rawListenFds = std::getenv("LISTEN_FDS");
if (rawListenFds) {
auto listenFds = std::string(rawListenFds);
if (std::getenv("LISTEN_PID") != std::to_string(getpid()) || listenFds != "1")
throw Error("unexpected systemd environment variables");
mySock = SD_LISTEN_FDS_START;
} else {
mySock = socket(PF_UNIX, SOCK_STREAM, 0);
if (mySock == 0) {
throw Error("Cannot create Unix domain socket");
}
struct sockaddr_un addr;
addr.sun_family = AF_UNIX;
unlink(opts.socketPath.c_str());
strcpy(addr.sun_path, opts.socketPath.c_str());
if (bind(mySock, (struct sockaddr*) &addr, sizeof(addr)) == -1) {
throw Error("Cannot bind to socket");
}
if (listen(mySock, 5) == -1)
throw Error("cannot listen on socket " + opts.socketPath.string());
}
// Ignore SIGPIPE so that an interrupted connection doesnt stop the daemon
signal(SIGPIPE, SIG_IGN);
while (1) {
struct sockaddr_un remoteAddr;
socklen_t remoteAddrLen = sizeof(remoteAddr);
int remoteSocket = accept(
mySock,
(struct sockaddr*) & remoteAddr,
&remoteAddrLen
);
if (remoteSocket == -1) {
if (errno == EINTR) continue;
throw Error("Error accepting the connection");
}
opts.log("accepted connection");
auto printToSocket = [&](std::string_view s) {
send(remoteSocket, s.data(), s.size(), 0);
};
auto traceResult = traceStaticRoots(opts, standardRoots);
auto runtimeRoots = getRuntimeRoots(opts);
traceResult.storeRoots.insert(runtimeRoots.begin(), runtimeRoots.end());
for (auto & [rootInStore, externalRoots] : traceResult.storeRoots) {
for (auto & externalRoot : externalRoots) {
printToSocket(escape(rootInStore.string()));
printToSocket("\t");
printToSocket(escape(externalRoot.string()));
printToSocket("\n");
}
}
printToSocket("\n");
for (auto & deadLink : traceResult.deadLinks) {
printToSocket(escape(deadLink.string()));
printToSocket("\n");
}
close(remoteSocket);
}
}