mirror of
https://github.com/privatevoid-net/nix-super.git
synced 2024-11-10 08:16:15 +02:00
Split GitRepoImpl::importTarball
There is now a separation of: 1. A `FileSystemObjectSink` for writing to git repos 2. Adapting libarchive to use that parse sink. The prepares a proper separation of concerns.
This commit is contained in:
parent
54354eaecf
commit
ba6a5f06ee
3 changed files with 249 additions and 156 deletions
|
@ -7,6 +7,7 @@
|
|||
#include "processes.hh"
|
||||
#include "signals.hh"
|
||||
#include "users.hh"
|
||||
#include "fs-sink.hh"
|
||||
|
||||
#include <git2/attr.h>
|
||||
#include <git2/blob.h>
|
||||
|
@ -23,9 +24,6 @@
|
|||
#include <git2/submodule.h>
|
||||
#include <git2/tree.h>
|
||||
|
||||
#include "tarfile.hh"
|
||||
#include <archive_entry.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <unordered_set>
|
||||
#include <queue>
|
||||
|
@ -317,157 +315,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
TarballInfo importTarball(Source & source) override
|
||||
{
|
||||
TarArchive archive(source);
|
||||
|
||||
struct PendingDir
|
||||
{
|
||||
std::string name;
|
||||
TreeBuilder builder;
|
||||
};
|
||||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
auto pushBuilder = [&](std::string name)
|
||||
{
|
||||
git_treebuilder * b;
|
||||
if (git_treebuilder_new(&b, *this, nullptr))
|
||||
throw Error("creating a tree builder: %s", git_error_last()->message);
|
||||
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
|
||||
};
|
||||
|
||||
auto popBuilder = [&]() -> std::pair<git_oid, std::string>
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto pending = std::move(pendingDirs.back());
|
||||
git_oid oid;
|
||||
if (git_treebuilder_write(&oid, pending.builder.get()))
|
||||
throw Error("creating a tree object: %s", git_error_last()->message);
|
||||
pendingDirs.pop_back();
|
||||
return {oid, pending.name};
|
||||
};
|
||||
|
||||
auto addToTree = [&](const std::string & name, const git_oid & oid, git_filemode_t mode)
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto & pending = pendingDirs.back();
|
||||
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
|
||||
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
|
||||
};
|
||||
|
||||
auto updateBuilders = [&](std::span<const std::string> names)
|
||||
{
|
||||
// Find the common prefix of pendingDirs and names.
|
||||
size_t prefixLen = 0;
|
||||
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
|
||||
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
|
||||
break;
|
||||
|
||||
// Finish the builders that are not part of the common prefix.
|
||||
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
|
||||
auto [oid, name] = popBuilder();
|
||||
addToTree(name, oid, GIT_FILEMODE_TREE);
|
||||
}
|
||||
|
||||
// Create builders for the new directories.
|
||||
for (auto n = prefixLen; n < names.size(); ++n)
|
||||
pushBuilder(names[n]);
|
||||
};
|
||||
|
||||
pushBuilder("");
|
||||
|
||||
size_t componentsToStrip = 1;
|
||||
|
||||
time_t lastModified = 0;
|
||||
|
||||
for (;;) {
|
||||
// FIXME: merge with extract_archive
|
||||
struct archive_entry * entry;
|
||||
int r = archive_read_next_header(archive.archive, &entry);
|
||||
if (r == ARCHIVE_EOF) break;
|
||||
auto path = archive_entry_pathname(entry);
|
||||
if (!path)
|
||||
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
||||
if (r == ARCHIVE_WARN)
|
||||
warn(archive_error_string(archive.archive));
|
||||
else
|
||||
archive.check(r);
|
||||
|
||||
lastModified = std::max(lastModified, archive_entry_mtime(entry));
|
||||
|
||||
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||
|
||||
std::span<const std::string> pathComponents2{pathComponents};
|
||||
|
||||
if (pathComponents2.size() <= componentsToStrip) continue;
|
||||
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
||||
|
||||
updateBuilders(
|
||||
archive_entry_filetype(entry) == AE_IFDIR
|
||||
? pathComponents2
|
||||
: pathComponents2.first(pathComponents2.size() - 1));
|
||||
|
||||
switch (archive_entry_filetype(entry)) {
|
||||
|
||||
case AE_IFDIR:
|
||||
// Nothing to do right now.
|
||||
break;
|
||||
|
||||
case AE_IFREG: {
|
||||
|
||||
git_writestream * stream = nullptr;
|
||||
if (git_blob_create_from_stream(&stream, *this, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
|
||||
while (true) {
|
||||
std::vector<unsigned char> buf(128 * 1024);
|
||||
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
|
||||
if (n < 0)
|
||||
throw Error("cannot read file '%s' from tarball", path);
|
||||
if (n == 0) break;
|
||||
if (stream->write(stream, (const char *) buf.data(), n))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid,
|
||||
archive_entry_mode(entry) & S_IXUSR
|
||||
? GIT_FILEMODE_BLOB_EXECUTABLE
|
||||
: GIT_FILEMODE_BLOB);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AE_IFLNK: {
|
||||
auto target = archive_entry_symlink(entry);
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_buffer(&oid, *this, target, strlen(target)))
|
||||
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw Error("file '%s' in tarball has unsupported file type", path);
|
||||
}
|
||||
}
|
||||
|
||||
updateBuilders({});
|
||||
|
||||
auto [oid, _name] = popBuilder();
|
||||
|
||||
return TarballInfo {
|
||||
.treeHash = toHash(oid),
|
||||
.lastModified = lastModified
|
||||
};
|
||||
}
|
||||
TarballInfo importTarball(Source & source) override;
|
||||
|
||||
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
|
||||
|
||||
|
@ -511,6 +359,8 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
|
||||
ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override;
|
||||
|
||||
ref<GitFileSystemObjectSink> getFileSystemObjectSink() override;
|
||||
|
||||
static int sidebandProgressCallback(const char * str, int len, void * payload)
|
||||
{
|
||||
auto act = (Activity *) payload;
|
||||
|
@ -884,6 +734,154 @@ struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor {
|
|||
|
||||
};
|
||||
|
||||
struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink
|
||||
{
|
||||
ref<GitRepoImpl> repo;
|
||||
|
||||
struct PendingDir
|
||||
{
|
||||
std::string name;
|
||||
TreeBuilder builder;
|
||||
};
|
||||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
size_t componentsToStrip = 1;
|
||||
|
||||
void pushBuilder(std::string name)
|
||||
{
|
||||
git_treebuilder * b;
|
||||
if (git_treebuilder_new(&b, *repo, nullptr))
|
||||
throw Error("creating a tree builder: %s", git_error_last()->message);
|
||||
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
|
||||
};
|
||||
|
||||
GitFileSystemObjectSinkImpl(ref<GitRepoImpl> repo) : repo(repo)
|
||||
{
|
||||
pushBuilder("");
|
||||
}
|
||||
|
||||
std::pair<git_oid, std::string> popBuilder()
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto pending = std::move(pendingDirs.back());
|
||||
git_oid oid;
|
||||
if (git_treebuilder_write(&oid, pending.builder.get()))
|
||||
throw Error("creating a tree object: %s", git_error_last()->message);
|
||||
pendingDirs.pop_back();
|
||||
return {oid, pending.name};
|
||||
};
|
||||
|
||||
void addToTree(const std::string & name, const git_oid & oid, git_filemode_t mode)
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto & pending = pendingDirs.back();
|
||||
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
|
||||
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
|
||||
};
|
||||
|
||||
void updateBuilders(std::span<const std::string> names)
|
||||
{
|
||||
// Find the common prefix of pendingDirs and names.
|
||||
size_t prefixLen = 0;
|
||||
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
|
||||
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
|
||||
break;
|
||||
|
||||
// Finish the builders that are not part of the common prefix.
|
||||
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
|
||||
auto [oid, name] = popBuilder();
|
||||
addToTree(name, oid, GIT_FILEMODE_TREE);
|
||||
}
|
||||
|
||||
// Create builders for the new directories.
|
||||
for (auto n = prefixLen; n < names.size(); ++n)
|
||||
pushBuilder(names[n]);
|
||||
};
|
||||
|
||||
bool prepareDirs(const std::vector<std::string> & pathComponents, bool isDir)
|
||||
{
|
||||
std::span<const std::string> pathComponents2{pathComponents};
|
||||
|
||||
if (pathComponents2.size() <= componentsToStrip) return false;
|
||||
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
||||
|
||||
updateBuilders(
|
||||
isDir
|
||||
? pathComponents2
|
||||
: pathComponents2.first(pathComponents2.size() - 1));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void createRegularFile(
|
||||
const Path & path,
|
||||
std::function<void(CreateRegularFileSink &)> func) override
|
||||
{
|
||||
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||
if (!prepareDirs(pathComponents, false)) return;
|
||||
|
||||
git_writestream * stream = nullptr;
|
||||
if (git_blob_create_from_stream(&stream, *repo, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
|
||||
struct CRF : CreateRegularFileSink {
|
||||
const Path & path;
|
||||
GitFileSystemObjectSinkImpl & back;
|
||||
git_writestream * stream;
|
||||
bool executable = false;
|
||||
CRF(const Path & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream)
|
||||
: path(path), back(back), stream(stream)
|
||||
{}
|
||||
void operator () (std::string_view data) override
|
||||
{
|
||||
if (stream->write(stream, data.data(), data.size()))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
}
|
||||
void isExecutable() override
|
||||
{
|
||||
executable = true;
|
||||
}
|
||||
} crf { path, *this, stream };
|
||||
func(crf);
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid,
|
||||
crf.executable
|
||||
? GIT_FILEMODE_BLOB_EXECUTABLE
|
||||
: GIT_FILEMODE_BLOB);
|
||||
}
|
||||
|
||||
void createDirectory(const Path & path) override
|
||||
{
|
||||
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||
(void) prepareDirs(pathComponents, true);
|
||||
}
|
||||
|
||||
void createSymlink(const Path & path, const std::string & target) override
|
||||
{
|
||||
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||
if (!prepareDirs(pathComponents, false)) return;
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_buffer(&oid, *repo, target.c_str(), target.size()))
|
||||
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
|
||||
}
|
||||
|
||||
Hash sync() override {
|
||||
updateBuilders({});
|
||||
|
||||
auto [oid, _name] = popBuilder();
|
||||
|
||||
return toHash(oid);
|
||||
}
|
||||
};
|
||||
|
||||
ref<GitInputAccessor> GitRepoImpl::getRawAccessor(const Hash & rev)
|
||||
{
|
||||
auto self = ref<GitRepoImpl>(shared_from_this());
|
||||
|
@ -918,6 +916,11 @@ ref<InputAccessor> GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportI
|
|||
}
|
||||
}
|
||||
|
||||
ref<GitFileSystemObjectSink> GitRepoImpl::getFileSystemObjectSink()
|
||||
{
|
||||
return make_ref<GitFileSystemObjectSinkImpl>(ref<GitRepoImpl>(shared_from_this()));
|
||||
}
|
||||
|
||||
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
|
||||
{
|
||||
/* Read the .gitmodules files from this revision. */
|
||||
|
@ -951,4 +954,81 @@ ref<GitRepo> getTarballCache()
|
|||
return make_ref<GitRepoImpl>(repoDir, true, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#include "tarfile.hh"
|
||||
#include <archive_entry.h>
|
||||
|
||||
namespace nix {
|
||||
|
||||
GitRepo::TarballInfo GitRepoImpl::importTarball(Source & source)
|
||||
{
|
||||
TarArchive archive { source };
|
||||
|
||||
auto parseSink = getFileSystemObjectSink();
|
||||
|
||||
time_t lastModified = 0;
|
||||
|
||||
for (;;) {
|
||||
// FIXME: merge with extract_archive
|
||||
struct archive_entry * entry;
|
||||
int r = archive_read_next_header(archive.archive, &entry);
|
||||
if (r == ARCHIVE_EOF) break;
|
||||
auto path = archive_entry_pathname(entry);
|
||||
if (!path)
|
||||
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
||||
if (r == ARCHIVE_WARN)
|
||||
warn(archive_error_string(archive.archive));
|
||||
else
|
||||
archive.check(r);
|
||||
|
||||
lastModified = std::max(lastModified, archive_entry_mtime(entry));
|
||||
|
||||
switch (archive_entry_filetype(entry)) {
|
||||
|
||||
case AE_IFDIR:
|
||||
parseSink->createDirectory(path);
|
||||
break;
|
||||
|
||||
case AE_IFREG: {
|
||||
parseSink->createRegularFile(path, [&](auto & crf) {
|
||||
if (archive_entry_mode(entry) & S_IXUSR)
|
||||
crf.isExecutable();
|
||||
|
||||
while (true) {
|
||||
std::vector<unsigned char> buf(128 * 1024);
|
||||
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
|
||||
if (n < 0)
|
||||
throw Error("cannot read file '%s' from tarball", path);
|
||||
if (n == 0) break;
|
||||
crf(std::string_view {
|
||||
(const char *) buf.data(),
|
||||
(size_t) n,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AE_IFLNK: {
|
||||
auto target = archive_entry_symlink(entry);
|
||||
|
||||
parseSink->createSymlink(path, target);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw Error("file '%s' in tarball has unsupported file type", path);
|
||||
}
|
||||
}
|
||||
|
||||
return TarballInfo {
|
||||
.treeHash = parseSink->sync(),
|
||||
.lastModified = lastModified
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
|
@ -2,11 +2,20 @@
|
|||
|
||||
#include "filtering-input-accessor.hh"
|
||||
#include "input-accessor.hh"
|
||||
#include "fs-sink.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
namespace fetchers { struct PublicKey; }
|
||||
|
||||
struct GitFileSystemObjectSink : FileSystemObjectSink
|
||||
{
|
||||
/**
|
||||
* Flush builder and return a final Git hash.
|
||||
*/
|
||||
virtual Hash sync() = 0;
|
||||
};
|
||||
|
||||
struct GitRepo
|
||||
{
|
||||
virtual ~GitRepo()
|
||||
|
@ -70,14 +79,14 @@ struct GitRepo
|
|||
time_t lastModified;
|
||||
};
|
||||
|
||||
virtual TarballInfo importTarball(Source & source) = 0;
|
||||
|
||||
virtual bool hasObject(const Hash & oid) = 0;
|
||||
|
||||
virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
|
||||
|
||||
virtual ref<InputAccessor> getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0;
|
||||
|
||||
virtual ref<GitFileSystemObjectSink> getFileSystemObjectSink() = 0;
|
||||
|
||||
virtual void fetch(
|
||||
const std::string & url,
|
||||
const std::string & refspec,
|
||||
|
@ -90,6 +99,8 @@ struct GitRepo
|
|||
virtual void verifyCommit(
|
||||
const Hash & rev,
|
||||
const std::vector<fetchers::PublicKey> & publicKeys) = 0;
|
||||
|
||||
virtual TarballInfo importTarball(Source & source) = 0;
|
||||
};
|
||||
|
||||
ref<GitRepo> getTarballCache();
|
||||
|
|
|
@ -26,6 +26,8 @@ struct CreateRegularFileSink : Sink
|
|||
|
||||
struct FileSystemObjectSink
|
||||
{
|
||||
virtual ~FileSystemObjectSink() = default;
|
||||
|
||||
virtual void createDirectory(const Path & path) = 0;
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue