Store::queryMissing(): Use a thread pool

For one particular NixOS configuration, this cut the runtime of
"nix-store -r --dry-run" from 6m51s to 3.4s. It also fixes a bug in
the size calculation that was causing certain paths to be counted
twice, e.g. before:

  these paths will be fetched (1249.98 MiB download, 2995.74 MiB unpacked):

and after:

  these paths will be fetched (1219.56 MiB download, 2862.17 MiB unpacked):
This commit is contained in:
Eelco Dolstra 2016-07-21 17:40:40 +02:00
parent 6f70fcd1c5
commit d57981bac4

View file

@ -72,116 +72,135 @@ void Store::computeFSClosure(const Path & path,
void Store::queryMissing(const PathSet & targets, void Store::queryMissing(const PathSet & targets,
PathSet & willBuild, PathSet & willSubstitute, PathSet & unknown, PathSet & willBuild_, PathSet & willSubstitute_, PathSet & unknown_,
unsigned long long & downloadSize, unsigned long long & narSize) unsigned long long & downloadSize_, unsigned long long & narSize_)
{ {
downloadSize = narSize = 0; downloadSize_ = narSize_ = 0;
PathSet todo(targets.begin(), targets.end()), done; ThreadPool pool;
/* Getting substitute info has high latency when using the binary struct State
cache substituter. Thus it's essential to do substitute {
queries in parallel as much as possible. To accomplish this PathSet done;
we do the following: PathSet & unknown, & willSubstitute, & willBuild;
unsigned long long & downloadSize;
unsigned long long & narSize;
};
- For all paths still to be processed (todo), we add all struct DrvState
paths for which we need info to the set query. For an {
unbuilt derivation this is the output paths; otherwise, it's size_t left;
the path itself. bool done = false;
PathSet outPaths;
DrvState(size_t left) : left(left) { }
};
- We get info about all paths in query in parallel. Sync<State> state_(State{PathSet(), unknown_, willSubstitute_, willBuild_, downloadSize_, narSize_});
- We process the results and add new items to todo if std::function<void(Path)> doPath;
necessary. E.g. if a path is substitutable, then we need to
get info on its references.
- Repeat until todo is empty. auto mustBuildDrv = [&](const Path & drvPath, const Derivation & drv) {
*/ {
auto state(state_.lock());
while (!todo.empty()) { state->willBuild.insert(drvPath);
PathSet query, todoDrv, todoNonDrv;
for (auto & i : todo) {
if (done.find(i) != done.end()) continue;
done.insert(i);
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i);
if (isDerivation(i2.first)) {
if (!isValidPath(i2.first)) {
// FIXME: we could try to substitute p.
unknown.insert(i);
continue;
}
Derivation drv = derivationFromPath(i2.first);
PathSet invalid;
for (auto & j : drv.outputs)
if (wantOutput(j.first, i2.second)
&& !isValidPath(j.second.path))
invalid.insert(j.second.path);
if (invalid.empty()) continue;
todoDrv.insert(i);
if (settings.useSubstitutes && drv.substitutesAllowed())
query.insert(invalid.begin(), invalid.end());
}
else {
if (isValidPath(i)) continue;
query.insert(i);
todoNonDrv.insert(i);
}
} }
todo.clear(); for (auto & i : drv.inputDrvs)
pool.enqueue(std::bind(doPath, makeDrvPathWithOutputs(i.first, i.second)));
};
auto checkOutput = [&](
const Path & drvPath, ref<Derivation> drv, const Path & outPath, ref<Sync<DrvState>> drvState_)
{
if (drvState_->lock()->done) return;
SubstitutablePathInfos infos; SubstitutablePathInfos infos;
querySubstitutablePathInfos(query, infos); querySubstitutablePathInfos({outPath}, infos);
for (auto & i : todoDrv) { if (infos.empty()) {
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(i); drvState_->lock()->done = true;
mustBuildDrv(drvPath, *drv);
} else {
{
auto drvState(drvState_->lock());
if (drvState->done) return;
assert(drvState->left);
drvState->left--;
drvState->outPaths.insert(outPath);
if (!drvState->left) {
for (auto & path : drvState->outPaths)
pool.enqueue(std::bind(doPath, path));
}
}
}
};
doPath = [&](const Path & path) {
{
auto state(state_.lock());
if (state->done.count(path)) return;
state->done.insert(path);
}
DrvPathWithOutputs i2 = parseDrvPathWithOutputs(path);
if (isDerivation(i2.first)) {
if (!isValidPath(i2.first)) {
// FIXME: we could try to substitute the derivation.
auto state(state_.lock());
state->unknown.insert(path);
return;
}
// FIXME: cache this
Derivation drv = derivationFromPath(i2.first); Derivation drv = derivationFromPath(i2.first);
PathSet outputs; PathSet invalid;
bool mustBuild = false; for (auto & j : drv.outputs)
if (wantOutput(j.first, i2.second)
&& !isValidPath(j.second.path))
invalid.insert(j.second.path);
if (invalid.empty()) return;
if (settings.useSubstitutes && drv.substitutesAllowed()) { if (settings.useSubstitutes && drv.substitutesAllowed()) {
for (auto & j : drv.outputs) { auto drvState = make_ref<Sync<DrvState>>(DrvState(invalid.size()));
if (!wantOutput(j.first, i2.second)) continue; for (auto & output : invalid)
if (!isValidPath(j.second.path)) { pool.enqueue(std::bind(checkOutput, i2.first, make_ref<Derivation>(drv), output, drvState));
if (infos.find(j.second.path) == infos.end())
mustBuild = true;
else
outputs.insert(j.second.path);
}
}
} else } else
mustBuild = true; mustBuildDrv(i2.first, drv);
if (mustBuild) { } else {
willBuild.insert(i2.first);
todo.insert(drv.inputSrcs.begin(), drv.inputSrcs.end());
for (auto & j : drv.inputDrvs)
todo.insert(makeDrvPathWithOutputs(j.first, j.second));
} else
todoNonDrv.insert(outputs.begin(), outputs.end());
}
for (auto & i : todoNonDrv) { if (isValidPath(path)) return;
done.insert(i);
SubstitutablePathInfos::iterator info = infos.find(i); SubstitutablePathInfos infos;
if (info != infos.end()) { querySubstitutablePathInfos({path}, infos);
willSubstitute.insert(i);
downloadSize += info->second.downloadSize; if (infos.empty()) {
narSize += info->second.narSize; auto state(state_.lock());
todo.insert(info->second.references.begin(), info->second.references.end()); state->unknown.insert(path);
} else return;
unknown.insert(i); }
auto info = infos.find(path);
assert(info != infos.end());
{
auto state(state_.lock());
state->willSubstitute.insert(path);
state->downloadSize += info->second.downloadSize;
state->narSize += info->second.narSize;
}
for (auto & ref : info->second.references)
pool.enqueue(std::bind(doPath, ref));
} }
} };
for (auto & path : targets)
pool.enqueue(std::bind(doPath, path));
pool.process();
} }