mirror of
https://github.com/NixOS/nix.git
synced 2024-11-21 22:32:26 +00:00
WIP: accumulate attrset updates, use k-way merge
This seems to be slower as of yet. TODO - [ ] remove merge pass 2 into 1: - pass 1: fill UpdateQueue - pass 2: stats (like max output size) - pass 3: k-way merge - [ ] maybe tracking sortedness is useful and cheap? in pass 1, building the UpdateQueue - [ ] make it actually fast; different algorithm depending on size distribution and number of update ops? - currently solution is the min-heap or priority queue approach - divide and conquer (see also Nixpkgs which manually implements this for pkgs/by-name) binary tree-shaped sorted merge operations - can we take the size of attrsets into account to balance the work? 1000 + (1 + (1 + 1)) is better than ((1000+1)+1)+1 (where number refers to attrset size) - iterative pairwise merging is more or less what we had, the tree of updates is flattened. Probably worse. - perhaps some combination of solutions, depending on a heuristic - [ ] split callFunction into two parts, effectively - call2Thunk (create the new scope's Env etc) - eval that thunk (evaluate the body) - [ ] implement ExprApply::evalForUpdate - easy after the split - [ ] make foldl' (//) work? - [ ] optimize listToAttrs and other primops?
This commit is contained in:
parent
59def6c23b
commit
2cc8387cb3
@ -999,10 +999,6 @@ void EvalState::mkSingleDerivedPathString(
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Create a thunk for the delayed computation of the given expression
|
|
||||||
in the given environment. But if the expression is a variable,
|
|
||||||
then look it up right away. This significantly reduces the number
|
|
||||||
of thunks allocated. */
|
|
||||||
Value * Expr::maybeThunk(EvalState & state, Env & env)
|
Value * Expr::maybeThunk(EvalState & state, Env & env)
|
||||||
{
|
{
|
||||||
Value * v = state.allocValue();
|
Value * v = state.allocValue();
|
||||||
@ -1899,6 +1895,112 @@ void ExprOpImpl::eval(EvalState & state, Env & env, Value & v)
|
|||||||
|
|
||||||
void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
|
void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
|
||||||
{
|
{
|
||||||
|
|
||||||
|
UpdateQueue q;
|
||||||
|
|
||||||
|
e1->evalForUpdate(state, env, q);
|
||||||
|
e2->evalForUpdate(state, env, q);
|
||||||
|
|
||||||
|
// k-way merge
|
||||||
|
|
||||||
|
size_t capacity = 0;
|
||||||
|
|
||||||
|
struct BindingsCursor {
|
||||||
|
const Bindings * bindings;
|
||||||
|
Bindings::size_t offset;
|
||||||
|
|
||||||
|
const Attr & front() {
|
||||||
|
return (*bindings)[offset];
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* @return false if there are no more elements
|
||||||
|
*/
|
||||||
|
bool pop_front() {
|
||||||
|
offset++;
|
||||||
|
return offset < bindings->size();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// a nested queue where the key is equal to the first and lowest name
|
||||||
|
std::map<std::pair<Symbol, int>, BindingsCursor> next;
|
||||||
|
|
||||||
|
// first fill next
|
||||||
|
int bias = 0;
|
||||||
|
for (auto b : q) {
|
||||||
|
if (b->size() == 0) continue;
|
||||||
|
capacity += b->size();
|
||||||
|
next.insert_or_assign({b->begin()->name, bias}, BindingsCursor {b, 0});
|
||||||
|
|
||||||
|
// The `//` operator is "right-biased", which means `{ a = "L"; } // { a = "R"; }`
|
||||||
|
// will result in `{ a = "R"; }`.
|
||||||
|
// This means that we want to find the right most value first, so that
|
||||||
|
// we can then ignore the earlier attributes of the same name.
|
||||||
|
// Decreasing the bias integer achieves this.
|
||||||
|
bias--;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto r = state.allocBindings(capacity);
|
||||||
|
|
||||||
|
{
|
||||||
|
auto it = state.nrMultiConcats.find(capacity);
|
||||||
|
if (it != state.nrMultiConcats.end()) {
|
||||||
|
it->second++;
|
||||||
|
} else {
|
||||||
|
state.nrMultiConcats[capacity] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// std::cerr << "capacity: " << r->capacity() << std::endl;
|
||||||
|
|
||||||
|
// Consume the lowest item until empty.
|
||||||
|
// This changes the order of next, so we need to reinsert it each time.
|
||||||
|
|
||||||
|
// The last inserted name, so that we can easily ignore all the values that aren't right-most in the sequence of `//` operations.
|
||||||
|
// Symbol() is a _null object_ that doesn't match any real symbols.
|
||||||
|
Symbol lastInsertedName;
|
||||||
|
while (true) {
|
||||||
|
auto cur = next.begin();
|
||||||
|
if (cur == next.end()) break;
|
||||||
|
|
||||||
|
// std::cerr << "size: " << r->size() << std::endl;
|
||||||
|
|
||||||
|
// std::cerr << "processing attr: " << state.symbols[cur->first] << std::endl;
|
||||||
|
|
||||||
|
// Grow `r`
|
||||||
|
if (lastInsertedName != cur->first.first) {
|
||||||
|
r->push_back(cur->second.front());
|
||||||
|
lastInsertedName = cur->first.first;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shrink `next`
|
||||||
|
int bias = cur->first.second;
|
||||||
|
|
||||||
|
if (cur->second.pop_front()) {
|
||||||
|
std::pair<Symbol, int> key = {cur->second.front().name, bias};
|
||||||
|
next.emplace(key, std::move(cur->second));
|
||||||
|
}
|
||||||
|
next.erase(cur);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO check the size of the bindings. If significantly below capacity, shrink it.
|
||||||
|
|
||||||
|
if (capacity > 0) {
|
||||||
|
auto capacityFraction = (100 * r->size()) / capacity;
|
||||||
|
auto it = state.nrMultiConcatCapacityPercent.find(capacityFraction);
|
||||||
|
auto wastedAttrs = capacity - r->size();
|
||||||
|
if (it != state.nrMultiConcatCapacityPercent.end()) {
|
||||||
|
it->second += wastedAttrs;
|
||||||
|
} else {
|
||||||
|
state.nrMultiConcatCapacityPercent[capacityFraction] = wastedAttrs;
|
||||||
|
}
|
||||||
|
state.nrMultiConcatCapacityTotalWaste += wastedAttrs;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
v.mkAttrs(r);
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
Value v1, v2;
|
Value v1, v2;
|
||||||
state.evalAttrs(env, e1, v1, pos, "in the left operand of the update (//) operator");
|
state.evalAttrs(env, e1, v1, pos, "in the left operand of the update (//) operator");
|
||||||
state.evalAttrs(env, e2, v2, pos, "in the right operand of the update (//) operator");
|
state.evalAttrs(env, e2, v2, pos, "in the right operand of the update (//) operator");
|
||||||
@ -1932,6 +2034,22 @@ void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
|
|||||||
v.mkAttrs(attrs.alreadySorted());
|
v.mkAttrs(attrs.alreadySorted());
|
||||||
|
|
||||||
state.nrOpUpdateValuesCopied += v.attrs()->size();
|
state.nrOpUpdateValuesCopied += v.attrs()->size();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void Expr::evalForUpdate(EvalState & state, Env & env, UpdateQueue & q)
|
||||||
|
{
|
||||||
|
Value vTmp;
|
||||||
|
eval(state, env, vTmp);
|
||||||
|
// TODO add pos and errorCtx params
|
||||||
|
state.forceAttrs(vTmp, noPos, "while evaluating an attribute set merge operand");
|
||||||
|
q.push_back(vTmp.attrs());
|
||||||
|
}
|
||||||
|
|
||||||
|
void ExprOpUpdate::evalForUpdate(EvalState &state, Env &env, UpdateQueue &q)
|
||||||
|
{
|
||||||
|
e1->evalForUpdate(state, env, q);
|
||||||
|
e2->evalForUpdate(state, env, q);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -2905,6 +3023,26 @@ void EvalState::printStatistics()
|
|||||||
};
|
};
|
||||||
topObj["nrOpUpdates"] = nrOpUpdates;
|
topObj["nrOpUpdates"] = nrOpUpdates;
|
||||||
topObj["nrOpUpdateValuesCopied"] = nrOpUpdateValuesCopied;
|
topObj["nrOpUpdateValuesCopied"] = nrOpUpdateValuesCopied;
|
||||||
|
topObj["nrOpMultiUpdate"] = {};
|
||||||
|
for (auto [k, n] : nrMultiConcats)
|
||||||
|
topObj["nrsKWayUpdate"][fmt("%05i", k)] = n;
|
||||||
|
topObj["nrMultiConcatCapacityPercent"] = {};
|
||||||
|
for (auto [k, n] : nrMultiConcatCapacityPercent)
|
||||||
|
topObj["nrMultiConcatCapacityPercent"][fmt("%03i", k)] = n;
|
||||||
|
|
||||||
|
topObj["nrMultiConcatCapacityCumulativePercent"] = {};
|
||||||
|
{
|
||||||
|
auto c = 0;
|
||||||
|
for (auto [k, n] : nrMultiConcatCapacityPercent) {
|
||||||
|
c += n;
|
||||||
|
topObj["nrMultiConcatCapacityCumulativePercent"][fmt("%03i", k)] = c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
topObj["nrMultiConcatCapacityTotalWaste"] = nrMultiConcatCapacityTotalWaste;
|
||||||
|
#if HAVE_BOEHMGC
|
||||||
|
topObj["nrMultiConcatCapacityTotalWastePercent"] = nrMultiConcatCapacityTotalWaste * sizeof(Attr) * 100.0 / totalBytes;
|
||||||
|
#endif
|
||||||
|
|
||||||
topObj["nrThunks"] = nrThunks;
|
topObj["nrThunks"] = nrThunks;
|
||||||
topObj["nrAvoided"] = nrAvoided;
|
topObj["nrAvoided"] = nrAvoided;
|
||||||
topObj["nrLookups"] = nrLookups;
|
topObj["nrLookups"] = nrLookups;
|
||||||
|
@ -819,6 +819,9 @@ private:
|
|||||||
unsigned long nrOpUpdates = 0;
|
unsigned long nrOpUpdates = 0;
|
||||||
unsigned long nrOpUpdateValuesCopied = 0;
|
unsigned long nrOpUpdateValuesCopied = 0;
|
||||||
unsigned long nrListConcats = 0;
|
unsigned long nrListConcats = 0;
|
||||||
|
std::map<int, int> nrMultiConcats;
|
||||||
|
std::map<int, int> nrMultiConcatCapacityPercent;
|
||||||
|
unsigned long nrMultiConcatCapacityTotalWaste = 0;
|
||||||
unsigned long nrPrimOpCalls = 0;
|
unsigned long nrPrimOpCalls = 0;
|
||||||
unsigned long nrFunctionCalls = 0;
|
unsigned long nrFunctionCalls = 0;
|
||||||
|
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "gc-small-vector.hh"
|
||||||
#include "value.hh"
|
#include "value.hh"
|
||||||
#include "symbol-table.hh"
|
#include "symbol-table.hh"
|
||||||
#include "eval-error.hh"
|
#include "eval-error.hh"
|
||||||
@ -74,6 +75,7 @@ typedef std::vector<AttrName> AttrPath;
|
|||||||
|
|
||||||
std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath);
|
std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath);
|
||||||
|
|
||||||
|
using UpdateQueue = SmallVector<const Bindings *, 64>;
|
||||||
|
|
||||||
/* Abstract syntax of Nix expressions. */
|
/* Abstract syntax of Nix expressions. */
|
||||||
|
|
||||||
@ -91,8 +93,25 @@ struct Expr
|
|||||||
virtual ~Expr() { };
|
virtual ~Expr() { };
|
||||||
virtual void show(const SymbolTable & symbols, std::ostream & str) const;
|
virtual void show(const SymbolTable & symbols, std::ostream & str) const;
|
||||||
virtual void bindVars(EvalState & es, const std::shared_ptr<const StaticEnv> & env);
|
virtual void bindVars(EvalState & es, const std::shared_ptr<const StaticEnv> & env);
|
||||||
|
|
||||||
|
/** Normal evaluation, implemented directly by all subclasses. */
|
||||||
virtual void eval(EvalState & state, Env & env, Value & v);
|
virtual void eval(EvalState & state, Env & env, Value & v);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a thunk for the delayed computation of the given expression
|
||||||
|
* in the given environment. But if the expression is a variable,
|
||||||
|
* then look it up right away. This significantly reduces the number
|
||||||
|
* of thunks allocated.
|
||||||
|
*/
|
||||||
virtual Value * maybeThunk(EvalState & state, Env & env);
|
virtual Value * maybeThunk(EvalState & state, Env & env);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only called when performing an attrset update: `//` or similar.
|
||||||
|
* Instead of writing to a Value &, this function writes to an UpdateQueue.
|
||||||
|
* This allows the expression to perform multiple updates in a delayed manner, gathering up all the updates before applying them.
|
||||||
|
*/
|
||||||
|
virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q);
|
||||||
|
|
||||||
virtual void setName(Symbol name);
|
virtual void setName(Symbol name);
|
||||||
virtual void setDocComment(DocComment docComment) { };
|
virtual void setDocComment(DocComment docComment) { };
|
||||||
virtual PosIdx getPos() const { return noPos; }
|
virtual PosIdx getPos() const { return noPos; }
|
||||||
@ -430,9 +449,45 @@ MakeBinOp(ExprOpNEq, "!=")
|
|||||||
MakeBinOp(ExprOpAnd, "&&")
|
MakeBinOp(ExprOpAnd, "&&")
|
||||||
MakeBinOp(ExprOpOr, "||")
|
MakeBinOp(ExprOpOr, "||")
|
||||||
MakeBinOp(ExprOpImpl, "->")
|
MakeBinOp(ExprOpImpl, "->")
|
||||||
MakeBinOp(ExprOpUpdate, "//")
|
|
||||||
MakeBinOp(ExprOpConcatLists, "++")
|
MakeBinOp(ExprOpConcatLists, "++")
|
||||||
|
|
||||||
|
struct ExprOpUpdate : Expr
|
||||||
|
{
|
||||||
|
PosIdx pos;
|
||||||
|
Expr *e1, *e2;
|
||||||
|
ExprOpUpdate(Expr * e1, Expr * e2)
|
||||||
|
: e1(e1)
|
||||||
|
, e2(e2){};
|
||||||
|
ExprOpUpdate(const PosIdx & pos, Expr * e1, Expr * e2)
|
||||||
|
: pos(pos)
|
||||||
|
, e1(e1)
|
||||||
|
, e2(e2){};
|
||||||
|
void show(const SymbolTable & symbols, std ::ostream & str) const override
|
||||||
|
{
|
||||||
|
str << "(";
|
||||||
|
e1->show(symbols, str);
|
||||||
|
str << " "
|
||||||
|
"//"
|
||||||
|
" ";
|
||||||
|
e2->show(symbols, str);
|
||||||
|
str << ")";
|
||||||
|
}
|
||||||
|
void bindVars(EvalState & es, const std ::shared_ptr<const StaticEnv> & env) override
|
||||||
|
{
|
||||||
|
e1->bindVars(es, env);
|
||||||
|
e2->bindVars(es, env);
|
||||||
|
}
|
||||||
|
void eval(EvalState & state, Env & env, Value & v) override;
|
||||||
|
PosIdx getPos() const override
|
||||||
|
{
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q) override;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
struct ExprConcatStrings : Expr
|
struct ExprConcatStrings : Expr
|
||||||
{
|
{
|
||||||
PosIdx pos;
|
PosIdx pos;
|
||||||
|
Loading…
Reference in New Issue
Block a user