mirror of
https://github.com/NixOS/nix.git
synced 2024-11-21 22:32:26 +00:00
WIP: accumulate attrset updates, use k-way merge
This seems to be slower as of yet. TODO - [ ] remove merge pass 2 into 1: - pass 1: fill UpdateQueue - pass 2: stats (like max output size) - pass 3: k-way merge - [ ] maybe tracking sortedness is useful and cheap? in pass 1, building the UpdateQueue - [ ] make it actually fast; different algorithm depending on size distribution and number of update ops? - currently solution is the min-heap or priority queue approach - divide and conquer (see also Nixpkgs which manually implements this for pkgs/by-name) binary tree-shaped sorted merge operations - can we take the size of attrsets into account to balance the work? 1000 + (1 + (1 + 1)) is better than ((1000+1)+1)+1 (where number refers to attrset size) - iterative pairwise merging is more or less what we had, the tree of updates is flattened. Probably worse. - perhaps some combination of solutions, depending on a heuristic - [ ] split callFunction into two parts, effectively - call2Thunk (create the new scope's Env etc) - eval that thunk (evaluate the body) - [ ] implement ExprApply::evalForUpdate - easy after the split - [ ] make foldl' (//) work? - [ ] optimize listToAttrs and other primops?
This commit is contained in:
parent
59def6c23b
commit
2cc8387cb3
@ -999,10 +999,6 @@ void EvalState::mkSingleDerivedPathString(
|
||||
}
|
||||
|
||||
|
||||
/* Create a thunk for the delayed computation of the given expression
|
||||
in the given environment. But if the expression is a variable,
|
||||
then look it up right away. This significantly reduces the number
|
||||
of thunks allocated. */
|
||||
Value * Expr::maybeThunk(EvalState & state, Env & env)
|
||||
{
|
||||
Value * v = state.allocValue();
|
||||
@ -1899,6 +1895,112 @@ void ExprOpImpl::eval(EvalState & state, Env & env, Value & v)
|
||||
|
||||
void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
|
||||
{
|
||||
|
||||
UpdateQueue q;
|
||||
|
||||
e1->evalForUpdate(state, env, q);
|
||||
e2->evalForUpdate(state, env, q);
|
||||
|
||||
// k-way merge
|
||||
|
||||
size_t capacity = 0;
|
||||
|
||||
struct BindingsCursor {
|
||||
const Bindings * bindings;
|
||||
Bindings::size_t offset;
|
||||
|
||||
const Attr & front() {
|
||||
return (*bindings)[offset];
|
||||
}
|
||||
/**
|
||||
* @return false if there are no more elements
|
||||
*/
|
||||
bool pop_front() {
|
||||
offset++;
|
||||
return offset < bindings->size();
|
||||
}
|
||||
};
|
||||
|
||||
// a nested queue where the key is equal to the first and lowest name
|
||||
std::map<std::pair<Symbol, int>, BindingsCursor> next;
|
||||
|
||||
// first fill next
|
||||
int bias = 0;
|
||||
for (auto b : q) {
|
||||
if (b->size() == 0) continue;
|
||||
capacity += b->size();
|
||||
next.insert_or_assign({b->begin()->name, bias}, BindingsCursor {b, 0});
|
||||
|
||||
// The `//` operator is "right-biased", which means `{ a = "L"; } // { a = "R"; }`
|
||||
// will result in `{ a = "R"; }`.
|
||||
// This means that we want to find the right most value first, so that
|
||||
// we can then ignore the earlier attributes of the same name.
|
||||
// Decreasing the bias integer achieves this.
|
||||
bias--;
|
||||
}
|
||||
|
||||
auto r = state.allocBindings(capacity);
|
||||
|
||||
{
|
||||
auto it = state.nrMultiConcats.find(capacity);
|
||||
if (it != state.nrMultiConcats.end()) {
|
||||
it->second++;
|
||||
} else {
|
||||
state.nrMultiConcats[capacity] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// std::cerr << "capacity: " << r->capacity() << std::endl;
|
||||
|
||||
// Consume the lowest item until empty.
|
||||
// This changes the order of next, so we need to reinsert it each time.
|
||||
|
||||
// The last inserted name, so that we can easily ignore all the values that aren't right-most in the sequence of `//` operations.
|
||||
// Symbol() is a _null object_ that doesn't match any real symbols.
|
||||
Symbol lastInsertedName;
|
||||
while (true) {
|
||||
auto cur = next.begin();
|
||||
if (cur == next.end()) break;
|
||||
|
||||
// std::cerr << "size: " << r->size() << std::endl;
|
||||
|
||||
// std::cerr << "processing attr: " << state.symbols[cur->first] << std::endl;
|
||||
|
||||
// Grow `r`
|
||||
if (lastInsertedName != cur->first.first) {
|
||||
r->push_back(cur->second.front());
|
||||
lastInsertedName = cur->first.first;
|
||||
}
|
||||
|
||||
// Shrink `next`
|
||||
int bias = cur->first.second;
|
||||
|
||||
if (cur->second.pop_front()) {
|
||||
std::pair<Symbol, int> key = {cur->second.front().name, bias};
|
||||
next.emplace(key, std::move(cur->second));
|
||||
}
|
||||
next.erase(cur);
|
||||
}
|
||||
|
||||
// TODO check the size of the bindings. If significantly below capacity, shrink it.
|
||||
|
||||
if (capacity > 0) {
|
||||
auto capacityFraction = (100 * r->size()) / capacity;
|
||||
auto it = state.nrMultiConcatCapacityPercent.find(capacityFraction);
|
||||
auto wastedAttrs = capacity - r->size();
|
||||
if (it != state.nrMultiConcatCapacityPercent.end()) {
|
||||
it->second += wastedAttrs;
|
||||
} else {
|
||||
state.nrMultiConcatCapacityPercent[capacityFraction] = wastedAttrs;
|
||||
}
|
||||
state.nrMultiConcatCapacityTotalWaste += wastedAttrs;
|
||||
}
|
||||
|
||||
|
||||
v.mkAttrs(r);
|
||||
|
||||
|
||||
#if 0
|
||||
Value v1, v2;
|
||||
state.evalAttrs(env, e1, v1, pos, "in the left operand of the update (//) operator");
|
||||
state.evalAttrs(env, e2, v2, pos, "in the right operand of the update (//) operator");
|
||||
@ -1932,6 +2034,22 @@ void ExprOpUpdate::eval(EvalState & state, Env & env, Value & v)
|
||||
v.mkAttrs(attrs.alreadySorted());
|
||||
|
||||
state.nrOpUpdateValuesCopied += v.attrs()->size();
|
||||
#endif
|
||||
}
|
||||
|
||||
void Expr::evalForUpdate(EvalState & state, Env & env, UpdateQueue & q)
|
||||
{
|
||||
Value vTmp;
|
||||
eval(state, env, vTmp);
|
||||
// TODO add pos and errorCtx params
|
||||
state.forceAttrs(vTmp, noPos, "while evaluating an attribute set merge operand");
|
||||
q.push_back(vTmp.attrs());
|
||||
}
|
||||
|
||||
void ExprOpUpdate::evalForUpdate(EvalState &state, Env &env, UpdateQueue &q)
|
||||
{
|
||||
e1->evalForUpdate(state, env, q);
|
||||
e2->evalForUpdate(state, env, q);
|
||||
}
|
||||
|
||||
|
||||
@ -2905,6 +3023,26 @@ void EvalState::printStatistics()
|
||||
};
|
||||
topObj["nrOpUpdates"] = nrOpUpdates;
|
||||
topObj["nrOpUpdateValuesCopied"] = nrOpUpdateValuesCopied;
|
||||
topObj["nrOpMultiUpdate"] = {};
|
||||
for (auto [k, n] : nrMultiConcats)
|
||||
topObj["nrsKWayUpdate"][fmt("%05i", k)] = n;
|
||||
topObj["nrMultiConcatCapacityPercent"] = {};
|
||||
for (auto [k, n] : nrMultiConcatCapacityPercent)
|
||||
topObj["nrMultiConcatCapacityPercent"][fmt("%03i", k)] = n;
|
||||
|
||||
topObj["nrMultiConcatCapacityCumulativePercent"] = {};
|
||||
{
|
||||
auto c = 0;
|
||||
for (auto [k, n] : nrMultiConcatCapacityPercent) {
|
||||
c += n;
|
||||
topObj["nrMultiConcatCapacityCumulativePercent"][fmt("%03i", k)] = c;
|
||||
}
|
||||
}
|
||||
topObj["nrMultiConcatCapacityTotalWaste"] = nrMultiConcatCapacityTotalWaste;
|
||||
#if HAVE_BOEHMGC
|
||||
topObj["nrMultiConcatCapacityTotalWastePercent"] = nrMultiConcatCapacityTotalWaste * sizeof(Attr) * 100.0 / totalBytes;
|
||||
#endif
|
||||
|
||||
topObj["nrThunks"] = nrThunks;
|
||||
topObj["nrAvoided"] = nrAvoided;
|
||||
topObj["nrLookups"] = nrLookups;
|
||||
|
@ -819,6 +819,9 @@ private:
|
||||
unsigned long nrOpUpdates = 0;
|
||||
unsigned long nrOpUpdateValuesCopied = 0;
|
||||
unsigned long nrListConcats = 0;
|
||||
std::map<int, int> nrMultiConcats;
|
||||
std::map<int, int> nrMultiConcatCapacityPercent;
|
||||
unsigned long nrMultiConcatCapacityTotalWaste = 0;
|
||||
unsigned long nrPrimOpCalls = 0;
|
||||
unsigned long nrFunctionCalls = 0;
|
||||
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include "gc-small-vector.hh"
|
||||
#include "value.hh"
|
||||
#include "symbol-table.hh"
|
||||
#include "eval-error.hh"
|
||||
@ -74,6 +75,7 @@ typedef std::vector<AttrName> AttrPath;
|
||||
|
||||
std::string showAttrPath(const SymbolTable & symbols, const AttrPath & attrPath);
|
||||
|
||||
using UpdateQueue = SmallVector<const Bindings *, 64>;
|
||||
|
||||
/* Abstract syntax of Nix expressions. */
|
||||
|
||||
@ -91,8 +93,25 @@ struct Expr
|
||||
virtual ~Expr() { };
|
||||
virtual void show(const SymbolTable & symbols, std::ostream & str) const;
|
||||
virtual void bindVars(EvalState & es, const std::shared_ptr<const StaticEnv> & env);
|
||||
|
||||
/** Normal evaluation, implemented directly by all subclasses. */
|
||||
virtual void eval(EvalState & state, Env & env, Value & v);
|
||||
|
||||
/**
|
||||
* Create a thunk for the delayed computation of the given expression
|
||||
* in the given environment. But if the expression is a variable,
|
||||
* then look it up right away. This significantly reduces the number
|
||||
* of thunks allocated.
|
||||
*/
|
||||
virtual Value * maybeThunk(EvalState & state, Env & env);
|
||||
|
||||
/**
|
||||
* Only called when performing an attrset update: `//` or similar.
|
||||
* Instead of writing to a Value &, this function writes to an UpdateQueue.
|
||||
* This allows the expression to perform multiple updates in a delayed manner, gathering up all the updates before applying them.
|
||||
*/
|
||||
virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q);
|
||||
|
||||
virtual void setName(Symbol name);
|
||||
virtual void setDocComment(DocComment docComment) { };
|
||||
virtual PosIdx getPos() const { return noPos; }
|
||||
@ -430,9 +449,45 @@ MakeBinOp(ExprOpNEq, "!=")
|
||||
MakeBinOp(ExprOpAnd, "&&")
|
||||
MakeBinOp(ExprOpOr, "||")
|
||||
MakeBinOp(ExprOpImpl, "->")
|
||||
MakeBinOp(ExprOpUpdate, "//")
|
||||
MakeBinOp(ExprOpConcatLists, "++")
|
||||
|
||||
struct ExprOpUpdate : Expr
|
||||
{
|
||||
PosIdx pos;
|
||||
Expr *e1, *e2;
|
||||
ExprOpUpdate(Expr * e1, Expr * e2)
|
||||
: e1(e1)
|
||||
, e2(e2){};
|
||||
ExprOpUpdate(const PosIdx & pos, Expr * e1, Expr * e2)
|
||||
: pos(pos)
|
||||
, e1(e1)
|
||||
, e2(e2){};
|
||||
void show(const SymbolTable & symbols, std ::ostream & str) const override
|
||||
{
|
||||
str << "(";
|
||||
e1->show(symbols, str);
|
||||
str << " "
|
||||
"//"
|
||||
" ";
|
||||
e2->show(symbols, str);
|
||||
str << ")";
|
||||
}
|
||||
void bindVars(EvalState & es, const std ::shared_ptr<const StaticEnv> & env) override
|
||||
{
|
||||
e1->bindVars(es, env);
|
||||
e2->bindVars(es, env);
|
||||
}
|
||||
void eval(EvalState & state, Env & env, Value & v) override;
|
||||
PosIdx getPos() const override
|
||||
{
|
||||
return pos;
|
||||
}
|
||||
|
||||
virtual void evalForUpdate(EvalState & state, Env & env, UpdateQueue & q) override;
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct ExprConcatStrings : Expr
|
||||
{
|
||||
PosIdx pos;
|
||||
|
Loading…
Reference in New Issue
Block a user