add stack sampling profiler for flamegraphs

This is code is a bit rough in the sense that I have taken shortcuts to
quickly iterate.
This is not ready for merging yet, but I think it's already useful for
people that want to generate flamegraphs for things as large as NixOS.

Usage:

NIX_PROFILE_FILE=/tmp/nixos-trace nix eval -v --no-eval-cache
.#nixosConfigurations.turingmachine.config.system.build.toplevel

The result can be imported in tools that support folded stacks i.e.
https://www.speedscope.app/ or the original flamegraph script
(https://github.com/brendangregg/FlameGraph)

The profiler records stack trace of the nix evaluation every 10ms (100Hz).
The resulting file is 1.9GB uncompressed and 5.4MB compressed with zstd.

Change-Id: I3484d3bd832e612747d02c251f2763e0c133a5dc
This commit is contained in:
Jörg Thalheim 2024-08-26 10:36:40 +02:00
parent 9ae7140beb
commit 794007cab7
2 changed files with 58 additions and 1 deletions

View File

@ -364,6 +364,36 @@ EvalState::EvalState(
EvalState::~EvalState()
{
auto profileFile = getEnv("NIX_PROFILE_FILE");
std::map<PosIdx, std::string> cachedPositions;
if (!profileFile.has_value()) {
return;
}
std::ofstream profileStream(profileFile.value());
if (!profileStream) {
return;
}
for (auto & [stack, count] : callCount) {
auto first = true;
for (auto & pos : stack) {
if (first) {
first = false;
} else {
profileStream << ";";
}
if (auto it = cachedPositions.find(pos); it != cachedPositions.end()) {
profileStream << it->second;
} else {
std::stringstream posStr;
posStr << positions[pos];
cachedPositions[pos] = posStr.str();
profileStream << posStr.str();
}
}
profileStream << " " << count << std::endl;
}
}
@ -1484,15 +1514,36 @@ void ExprOpHasAttr::eval(EvalState & state, Env & env, Value & v)
v.mkBool(true);
}
void ExprLambda::eval(EvalState & state, Env & env, Value & v)
{
v.mkLambda(&env, this);
}
static const std::chrono::duration SAMPLE_INTERVAL = std::chrono::microseconds(10);
namespace {
class SampleStack {
EvalState & state;
public:
SampleStack(EvalState & state, const PosIdx pos) : state(state) {
state.stack.push_back(pos);
}
~SampleStack() {
auto now = std::chrono::high_resolution_clock::now();
if (now - state.lastStackSample > SAMPLE_INTERVAL) {
state.callCount[state.stack] += 1;
state.lastStackSample = now;
}
if (state.stack.size() > 0) {
state.stack.pop_back();
}
}
};
};
void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & vRes, const PosIdx pos)
{
auto _level = addCallDepth(pos);
SampleStack _sample(*this, pos);
auto trace = settings.traceFunctionCalls
? std::make_unique<FunctionCallTrace>(positions[pos])

View File

@ -15,6 +15,7 @@
#include "search-path.hh"
#include "repl-exit-status.hh"
#include "ref.hh"
#include "pos-idx.hh"
#include <map>
#include <optional>
@ -375,6 +376,10 @@ private:
public:
std::vector<PosIdx> stack = {};
std::map<std::vector<PosIdx>, int> callCount = {};
std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample = std::chrono::high_resolution_clock::now();
EvalState(
const LookupPath & _lookupPath,
ref<Store> store,
@ -850,6 +855,7 @@ private:
bool countCalls;
typedef std::map<std::string, size_t> PrimOpCalls;
PrimOpCalls primOpCalls;