Store::queryMissing(): Use a thread pool

For one particular NixOS configuration, this cut the runtime of
"nix-store -r --dry-run" from 6m51s to 3.4s. It also fixes a bug in
the size calculation that was causing certain paths to be counted
twice, e.g. before:

  these paths will be fetched (1249.98 MiB download, 2995.74 MiB unpacked):

and after:

  these paths will be fetched (1219.56 MiB download, 2862.17 MiB unpacked):
This commit is contained in:
Eelco Dolstra 2016-07-21 17:40:40 +02:00
parent 6f70fcd1c5
commit d57981bac4

View file

@ -72,48 +72,87 @@ void Store::computeFSClosure(const Path & path,
void Store::queryMissing(const PathSet & targets, void Store::queryMissing(const PathSet & targets,
PathSet & willBuild, PathSet & willSubstitute, PathSet & unknown, PathSet & willBuild_, PathSet & willSubstitute_, PathSet & unknown_,
unsigned long long & downloadSize, unsigned long long & narSize) unsigned long long & downloadSize_, unsigned long long & narSize_)
{ {
downloadSize = narSize = 0; downloadSize_ = narSize_ = 0;
PathSet todo(targets.begin(), targets.end()), done; ThreadPool pool;
/* Getting substitute info has high latency when using the binary struct State
cache substituter. Thus it's essential to do substitute {
queries in parallel as much as possible. To accomplish this PathSet done;
we do the following: PathSet & unknown, & willSubstitute, & willBuild;
unsigned long long & downloadSize;
unsigned long long & narSize;
};
- For all paths still to be processed (todo), we add all struct DrvState
paths for which we need info to the set query. For an {
unbuilt derivation this is the output paths; otherwise, it's size_t left;
the path itself. bool done = false;
PathSet outPaths;
DrvState(size_t left) : left(left) { }
};
- We get info about all paths in query in parallel. Sync<State> state_(State{PathSet(), unknown_, willSubstitute_, willBuild_, downloadSize_, narSize_});
- We process the results and add new items to todo if std::function<void(Path)> doPath;
necessary. E.g. if a path is substitutable, then we need to
get info on its references.
- Repeat until todo is empty. auto mustBuildDrv = [&](const Path & drvPath, const Derivation & drv) {
*/ {
auto state(state_.lock());
state->willBuild.insert(drvPath);
}
while (!todo.empty()) { for (auto & i : drv.inputDrvs)
pool.enqueue(std::bind(doPath, makeDrvPathWithOutputs(i.first, i.second)));
};
PathSet query, todoDrv, todoNonDrv; auto checkOutput = [&](
const Path & drvPath, ref<Derivation> drv, const Path & outPath, ref<Sync<DrvState>> drvState_)
{
if (drvState_->lock()->done) return;
for (auto & i : todo) { SubstitutablePathInfos infos;
if (done.find(i) != done.end()) continue; querySubstitutablePathInfos({outPath}, infos);
done.insert(i);
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i); if (infos.empty()) {
drvState_->lock()->done = true;
mustBuildDrv(drvPath, *drv);
} else {
{
auto drvState(drvState_->lock());
if (drvState->done) return;
assert(drvState->left);
drvState->left--;
drvState->outPaths.insert(outPath);
if (!drvState->left) {
for (auto & path : drvState->outPaths)
pool.enqueue(std::bind(doPath, path));
}
}
}
};
doPath = [&](const Path & path) {
{
auto state(state_.lock());
if (state->done.count(path)) return;
state->done.insert(path);
}
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(path);
if (isDerivation(i2.first)) { if (isDerivation(i2.first)) {
if (!isValidPath(i2.first)) { if (!isValidPath(i2.first)) {
// FIXME: we could try to substitute p. // FIXME: we could try to substitute the derivation.
unknown.insert(i); auto state(state_.lock());
continue; state->unknown.insert(path);
return;
} }
Derivation drv = derivationFromPath(i2.first); Derivation drv = derivationFromPath(i2.first);
PathSet invalid; PathSet invalid;
@ -121,67 +160,47 @@ void Store::queryMissing(const PathSet & targets,
if (wantOutput(j.first, i2.second) if (wantOutput(j.first, i2.second)
&& !isValidPath(j.second.path)) && !isValidPath(j.second.path))
invalid.insert(j.second.path); invalid.insert(j.second.path);
if (invalid.empty()) continue; if (invalid.empty()) return;
todoDrv.insert(i); if (settings.useSubstitutes && drv.substitutesAllowed()) {
if (settings.useSubstitutes && drv.substitutesAllowed()) auto drvState = make_ref<Sync<DrvState>>(DrvState(invalid.size()));
query.insert(invalid.begin(), invalid.end()); for (auto & output : invalid)
} pool.enqueue(std::bind(checkOutput, i2.first, make_ref<Derivation>(drv), output, drvState));
} else
mustBuildDrv(i2.first, drv);
else { } else {
if (isValidPath(i)) continue;
query.insert(i);
todoNonDrv.insert(i);
}
}
todo.clear(); if (isValidPath(path)) return;
SubstitutablePathInfos infos; SubstitutablePathInfos infos;
querySubstitutablePathInfos(query, infos); querySubstitutablePathInfos({path}, infos);
for (auto & i : todoDrv) { if (infos.empty()) {
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i); auto state(state_.lock());
state->unknown.insert(path);
// FIXME: cache this return;
Derivation drv = derivationFromPath(i2.first);
PathSet outputs;
bool mustBuild = false;
if (settings.useSubstitutes && drv.substitutesAllowed()) {
for (auto & j : drv.outputs) {
if (!wantOutput(j.first, i2.second)) continue;
if (!isValidPath(j.second.path)) {
if (infos.find(j.second.path) == infos.end())
mustBuild = true;
else
outputs.insert(j.second.path);
}
}
} else
mustBuild = true;
if (mustBuild) {
willBuild.insert(i2.first);
todo.insert(drv.inputSrcs.begin(), drv.inputSrcs.end());
for (auto & j : drv.inputDrvs)
todo.insert(makeDrvPathWithOutputs(j.first, j.second));
} else
todoNonDrv.insert(outputs.begin(), outputs.end());
} }
for (auto & i : todoNonDrv) { auto info = infos.find(path);
done.insert(i); assert(info != infos.end());
SubstitutablePathInfos::iterator info = infos.find(i);
if (info != infos.end()) { {
willSubstitute.insert(i); auto state(state_.lock());
downloadSize += info->second.downloadSize; state->willSubstitute.insert(path);
narSize += info->second.narSize; state->downloadSize += info->second.downloadSize;
todo.insert(info->second.references.begin(), info->second.references.end()); state->narSize += info->second.narSize;
} else
unknown.insert(i);
} }
for (auto & ref : info->second.references)
pool.enqueue(std::bind(doPath, ref));
} }
};
for (auto & path : targets)
pool.enqueue(std::bind(doPath, path));
pool.process();
} }