From 794007cab725bb4ff55ed54e854b11a37a9166dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Mon, 26 Aug 2024 10:36:40 +0200 Subject: [PATCH] add stack sampling profiler for flamegraphs This is code is a bit rough in the sense that I have taken shortcuts to quickly iterate. This is not ready for merging yet, but I think it's already useful for people that want to generate flamegraphs for things as large as NixOS. Usage: NIX_PROFILE_FILE=/tmp/nixos-trace nix eval -v --no-eval-cache .#nixosConfigurations.turingmachine.config.system.build.toplevel The result can be imported in tools that support folded stacks i.e. https://www.speedscope.app/ or the original flamegraph script (https://github.com/brendangregg/FlameGraph) The profiler records stack trace of the nix evaluation every 10ms (100Hz). The resulting file is 1.9GB uncompressed and 5.4MB compressed with zstd. Change-Id: I3484d3bd832e612747d02c251f2763e0c133a5dc --- src/libexpr/eval.cc | 53 ++++++++++++++++++++++++++++++++++++++++++++- src/libexpr/eval.hh | 6 +++++ 2 files changed, 58 insertions(+), 1 deletion(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 2420f15c1..c5a5fa5cc 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -364,6 +364,36 @@ EvalState::EvalState( EvalState::~EvalState() { + auto profileFile = getEnv("NIX_PROFILE_FILE"); + + std::map cachedPositions; + + if (!profileFile.has_value()) { + return; + } + std::ofstream profileStream(profileFile.value()); + if (!profileStream) { + return; + } + for (auto & [stack, count] : callCount) { + auto first = true; + for (auto & pos : stack) { + if (first) { + first = false; + } else { + profileStream << ";"; + } + if (auto it = cachedPositions.find(pos); it != cachedPositions.end()) { + profileStream << it->second; + } else { + std::stringstream posStr; + posStr << positions[pos]; + cachedPositions[pos] = posStr.str(); + profileStream << posStr.str(); + } + } + profileStream << " " << count << std::endl; + } } @@ -1484,15 +1514,36 @@ void ExprOpHasAttr::eval(EvalState & state, Env & env, Value & v) v.mkBool(true); } - void ExprLambda::eval(EvalState & state, Env & env, Value & v) { v.mkLambda(&env, this); } +static const std::chrono::duration SAMPLE_INTERVAL = std::chrono::microseconds(10); +namespace { +class SampleStack { + EvalState & state; +public: + SampleStack(EvalState & state, const PosIdx pos) : state(state) { + state.stack.push_back(pos); + } + ~SampleStack() { + auto now = std::chrono::high_resolution_clock::now(); + if (now - state.lastStackSample > SAMPLE_INTERVAL) { + state.callCount[state.stack] += 1; + state.lastStackSample = now; + } + if (state.stack.size() > 0) { + state.stack.pop_back(); + } + } +}; +}; + void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & vRes, const PosIdx pos) { auto _level = addCallDepth(pos); + SampleStack _sample(*this, pos); auto trace = settings.traceFunctionCalls ? std::make_unique(positions[pos]) diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index da9dd2087..87002390d 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -15,6 +15,7 @@ #include "search-path.hh" #include "repl-exit-status.hh" #include "ref.hh" +#include "pos-idx.hh" #include #include @@ -375,6 +376,10 @@ private: public: + std::vector stack = {}; + std::map, int> callCount = {}; + std::chrono::time_point lastStackSample = std::chrono::high_resolution_clock::now(); + EvalState( const LookupPath & _lookupPath, ref store, @@ -850,6 +855,7 @@ private: bool countCalls; + typedef std::map PrimOpCalls; PrimOpCalls primOpCalls;