Finish separating concerns with tarball cache

There is no longer an `importTarball` method. Instead, there is a
`unpackTarfileToSink` function (back in libutil). The caller can use
thisw with the `getParseSink` method we added in the last commit easily
enough.

In addition, tarball cache functionality is separated from `git-utils`
and moved into `tarball-cache`. This ensures we are separating mechanism
and policy.
This commit is contained in:
John Ericson 2023-12-21 04:28:06 -05:00
parent ba6a5f06ee
commit ed24baaec4
7 changed files with 107 additions and 99 deletions

View file

@ -315,8 +315,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return std::nullopt;
}
TarballInfo importTarball(Source & source) override;
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
std::string resolveSubmoduleUrl(
@ -947,88 +945,4 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
return result;
}
ref<GitRepo> getTarballCache()
{
static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache";
return make_ref<GitRepoImpl>(repoDir, true, true);
}
}
#include "tarfile.hh"
#include <archive_entry.h>
namespace nix {
GitRepo::TarballInfo GitRepoImpl::importTarball(Source & source)
{
TarArchive archive { source };
auto parseSink = getFileSystemObjectSink();
time_t lastModified = 0;
for (;;) {
// FIXME: merge with extract_archive
struct archive_entry * entry;
int r = archive_read_next_header(archive.archive, &entry);
if (r == ARCHIVE_EOF) break;
auto path = archive_entry_pathname(entry);
if (!path)
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
if (r == ARCHIVE_WARN)
warn(archive_error_string(archive.archive));
else
archive.check(r);
lastModified = std::max(lastModified, archive_entry_mtime(entry));
switch (archive_entry_filetype(entry)) {
case AE_IFDIR:
parseSink->createDirectory(path);
break;
case AE_IFREG: {
parseSink->createRegularFile(path, [&](auto & crf) {
if (archive_entry_mode(entry) & S_IXUSR)
crf.isExecutable();
while (true) {
std::vector<unsigned char> buf(128 * 1024);
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
if (n < 0)
throw Error("cannot read file '%s' from tarball", path);
if (n == 0) break;
crf(std::string_view {
(const char *) buf.data(),
(size_t) n,
});
}
});
break;
}
case AE_IFLNK: {
auto target = archive_entry_symlink(entry);
parseSink->createSymlink(path, target);
break;
}
default:
throw Error("file '%s' in tarball has unsupported file type", path);
}
}
return TarballInfo {
.treeHash = parseSink->sync(),
.lastModified = lastModified
};
}
}

View file

@ -73,12 +73,6 @@ struct GitRepo
const std::string & url,
const std::string & base) = 0;
struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};
virtual bool hasObject(const Hash & oid) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
@ -99,10 +93,6 @@ struct GitRepo
virtual void verifyCommit(
const Hash & rev,
const std::vector<fetchers::PublicKey> & publicKeys) = 0;
virtual TarballInfo importTarball(Source & source) = 0;
};
ref<GitRepo> getTarballCache();
}

View file

@ -8,7 +8,9 @@
#include "fetchers.hh"
#include "fetch-settings.hh"
#include "tarball.hh"
#include "tarfile.hh"
#include "git-utils.hh"
#include "tarball-cache.hh"
#include <optional>
#include <nlohmann/json.hpp>
@ -191,7 +193,7 @@ struct GitArchiveInputScheme : InputScheme
virtual DownloadUrl getDownloadUrl(const Input & input) const = 0;
std::pair<Input, GitRepo::TarballInfo> downloadArchive(ref<Store> store, Input input) const
std::pair<Input, TarballInfo> downloadArchive(ref<Store> store, Input input) const
{
if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD");
@ -218,7 +220,7 @@ struct GitArchiveInputScheme : InputScheme
auto treeHash = getRevAttr(*treeHashAttrs, "treeHash");
auto lastModified = getIntAttr(*lastModifiedAttrs, "lastModified");
if (getTarballCache()->hasObject(treeHash))
return {std::move(input), GitRepo::TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }};
return {std::move(input), TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }};
else
debug("Git tree with hash '%s' has disappeared from the cache, refetching...", treeHash.gitRev());
}
@ -233,7 +235,14 @@ struct GitArchiveInputScheme : InputScheme
getFileTransfer()->download(std::move(req), sink);
});
auto tarballInfo = getTarballCache()->importTarball(*source);
TarArchive archive { *source };
auto parseSink = getTarballCache()->getFileSystemObjectSink();
auto lastModified = unpackTarfileToSink(archive, *parseSink);
TarballInfo tarballInfo {
.treeHash = parseSink->sync(),
.lastModified = lastModified
};
cache->upsert(treeHashKey, Attrs{{"treeHash", tarballInfo.treeHash.gitRev()}});
cache->upsert(lastModifiedKey, Attrs{{"lastModified", (uint64_t) tarballInfo.lastModified}});

View file

@ -0,0 +1,13 @@
#include "tarball-cache.hh"
#include "users.hh"
namespace nix::fetchers {
ref<GitRepo> getTarballCache()
{
static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache";
return GitRepo::openRepo(repoDir, true, true);
}
}

View file

@ -0,0 +1,17 @@
#pragma once
///@file
#include "ref.hh"
#include "git-utils.hh"
namespace nix::fetchers {
struct TarballInfo
{
Hash treeHash;
time_t lastModified;
};
ref<GitRepo> getTarballCache();
}

View file

@ -132,4 +132,66 @@ void unpackTarfile(const Path & tarFile, const Path & destDir)
extract_archive(archive, destDir);
}
time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink)
{
time_t lastModified = 0;
for (;;) {
// FIXME: merge with extract_archive
struct archive_entry * entry;
int r = archive_read_next_header(archive.archive, &entry);
if (r == ARCHIVE_EOF) break;
auto path = archive_entry_pathname(entry);
if (!path)
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
if (r == ARCHIVE_WARN)
warn(archive_error_string(archive.archive));
else
archive.check(r);
lastModified = std::max(lastModified, archive_entry_mtime(entry));
switch (archive_entry_filetype(entry)) {
case AE_IFDIR:
parseSink.createDirectory(path);
break;
case AE_IFREG: {
parseSink.createRegularFile(path, [&](auto & crf) {
if (archive_entry_mode(entry) & S_IXUSR)
crf.isExecutable();
while (true) {
std::vector<unsigned char> buf(128 * 1024);
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
if (n < 0)
throw Error("cannot read file '%s' from tarball", path);
if (n == 0) break;
crf(std::string_view {
(const char *) buf.data(),
(size_t) n,
});
}
});
break;
}
case AE_IFLNK: {
auto target = archive_entry_symlink(entry);
parseSink.createSymlink(path, target);
break;
}
default:
throw Error("file '%s' in tarball has unsupported file type", path);
}
}
return lastModified;
}
}

View file

@ -2,6 +2,7 @@
///@file
#include "serialise.hh"
#include "fs-sink.hh"
#include <archive.h>
namespace nix {
@ -29,4 +30,6 @@ void unpackTarfile(Source & source, const Path & destDir);
void unpackTarfile(const Path & tarFile, const Path & destDir);
time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink);
}