diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 1255f75b2..32f665aa0 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -7,6 +7,7 @@ #include "processes.hh" #include "signals.hh" #include "users.hh" +#include "fs-sink.hh" #include #include @@ -23,9 +24,6 @@ #include #include -#include "tarfile.hh" -#include - #include #include #include @@ -317,158 +315,6 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return std::nullopt; } - TarballInfo importTarball(Source & source) override - { - TarArchive archive(source); - - struct PendingDir - { - std::string name; - TreeBuilder builder; - }; - - std::vector pendingDirs; - - auto pushBuilder = [&](std::string name) - { - git_treebuilder * b; - if (git_treebuilder_new(&b, *this, nullptr)) - throw Error("creating a tree builder: %s", git_error_last()->message); - pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) }); - }; - - auto popBuilder = [&]() -> std::pair - { - assert(!pendingDirs.empty()); - auto pending = std::move(pendingDirs.back()); - git_oid oid; - if (git_treebuilder_write(&oid, pending.builder.get())) - throw Error("creating a tree object: %s", git_error_last()->message); - pendingDirs.pop_back(); - return {oid, pending.name}; - }; - - auto addToTree = [&](const std::string & name, const git_oid & oid, git_filemode_t mode) - { - assert(!pendingDirs.empty()); - auto & pending = pendingDirs.back(); - if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode)) - throw Error("adding a file to a tree builder: %s", git_error_last()->message); - }; - - auto updateBuilders = [&](std::span names) - { - // Find the common prefix of pendingDirs and names. - size_t prefixLen = 0; - for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen) - if (names[prefixLen] != pendingDirs[prefixLen + 1].name) - break; - - // Finish the builders that are not part of the common prefix. - for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) { - auto [oid, name] = popBuilder(); - addToTree(name, oid, GIT_FILEMODE_TREE); - } - - // Create builders for the new directories. - for (auto n = prefixLen; n < names.size(); ++n) - pushBuilder(names[n]); - }; - - pushBuilder(""); - - size_t componentsToStrip = 1; - - time_t lastModified = 0; - - for (;;) { - // FIXME: merge with extract_archive - struct archive_entry * entry; - int r = archive_read_next_header(archive.archive, &entry); - if (r == ARCHIVE_EOF) break; - auto path = archive_entry_pathname(entry); - if (!path) - throw Error("cannot get archive member name: %s", archive_error_string(archive.archive)); - if (r == ARCHIVE_WARN) - warn(archive_error_string(archive.archive)); - else - archive.check(r); - - lastModified = std::max(lastModified, archive_entry_mtime(entry)); - - auto pathComponents = tokenizeString>(path, "/"); - - std::span pathComponents2{pathComponents}; - - if (pathComponents2.size() <= componentsToStrip) continue; - pathComponents2 = pathComponents2.subspan(componentsToStrip); - - updateBuilders( - archive_entry_filetype(entry) == AE_IFDIR - ? pathComponents2 - : pathComponents2.first(pathComponents2.size() - 1)); - - switch (archive_entry_filetype(entry)) { - - case AE_IFDIR: - // Nothing to do right now. - break; - - case AE_IFREG: { - - git_writestream * stream = nullptr; - if (git_blob_create_from_stream(&stream, *this, nullptr)) - throw Error("creating a blob stream object: %s", git_error_last()->message); - - while (true) { - std::vector buf(128 * 1024); - auto n = archive_read_data(archive.archive, buf.data(), buf.size()); - if (n < 0) - throw Error("cannot read file '%s' from tarball", path); - if (n == 0) break; - if (stream->write(stream, (const char *) buf.data(), n)) - throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message); - } - - git_oid oid; - if (git_blob_create_from_stream_commit(&oid, stream)) - throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message); - - addToTree(*pathComponents.rbegin(), oid, - archive_entry_mode(entry) & S_IXUSR - ? GIT_FILEMODE_BLOB_EXECUTABLE - : GIT_FILEMODE_BLOB); - - break; - } - - case AE_IFLNK: { - auto target = archive_entry_symlink(entry); - - git_oid oid; - if (git_blob_create_from_buffer(&oid, *this, target, strlen(target))) - throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message); - - addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK); - - break; - } - - default: - throw Error("file '%s' in tarball has unsupported file type", path); - } - } - - updateBuilders({}); - - auto [oid, _name] = popBuilder(); - - return TarballInfo { - .treeHash = toHash(oid), - .lastModified = lastModified - }; - } - std::vector> getSubmodules(const Hash & rev, bool exportIgnore) override; std::string resolveSubmoduleUrl( @@ -511,6 +357,8 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override; + ref getFileSystemObjectSink() override; + static int sidebandProgressCallback(const char * str, int len, void * payload) { auto act = (Activity *) payload; @@ -925,6 +773,154 @@ struct GitExportIgnoreInputAccessor : CachingFilteringInputAccessor { }; +struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink +{ + ref repo; + + struct PendingDir + { + std::string name; + TreeBuilder builder; + }; + + std::vector pendingDirs; + + size_t componentsToStrip = 1; + + void pushBuilder(std::string name) + { + git_treebuilder * b; + if (git_treebuilder_new(&b, *repo, nullptr)) + throw Error("creating a tree builder: %s", git_error_last()->message); + pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) }); + }; + + GitFileSystemObjectSinkImpl(ref repo) : repo(repo) + { + pushBuilder(""); + } + + std::pair popBuilder() + { + assert(!pendingDirs.empty()); + auto pending = std::move(pendingDirs.back()); + git_oid oid; + if (git_treebuilder_write(&oid, pending.builder.get())) + throw Error("creating a tree object: %s", git_error_last()->message); + pendingDirs.pop_back(); + return {oid, pending.name}; + }; + + void addToTree(const std::string & name, const git_oid & oid, git_filemode_t mode) + { + assert(!pendingDirs.empty()); + auto & pending = pendingDirs.back(); + if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode)) + throw Error("adding a file to a tree builder: %s", git_error_last()->message); + }; + + void updateBuilders(std::span names) + { + // Find the common prefix of pendingDirs and names. + size_t prefixLen = 0; + for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen) + if (names[prefixLen] != pendingDirs[prefixLen + 1].name) + break; + + // Finish the builders that are not part of the common prefix. + for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) { + auto [oid, name] = popBuilder(); + addToTree(name, oid, GIT_FILEMODE_TREE); + } + + // Create builders for the new directories. + for (auto n = prefixLen; n < names.size(); ++n) + pushBuilder(names[n]); + }; + + bool prepareDirs(const std::vector & pathComponents, bool isDir) + { + std::span pathComponents2{pathComponents}; + + if (pathComponents2.size() <= componentsToStrip) return false; + pathComponents2 = pathComponents2.subspan(componentsToStrip); + + updateBuilders( + isDir + ? pathComponents2 + : pathComponents2.first(pathComponents2.size() - 1)); + + return true; + } + + void createRegularFile( + const Path & path, + std::function func) override + { + auto pathComponents = tokenizeString>(path, "/"); + if (!prepareDirs(pathComponents, false)) return; + + git_writestream * stream = nullptr; + if (git_blob_create_from_stream(&stream, *repo, nullptr)) + throw Error("creating a blob stream object: %s", git_error_last()->message); + + struct CRF : CreateRegularFileSink { + const Path & path; + GitFileSystemObjectSinkImpl & back; + git_writestream * stream; + bool executable = false; + CRF(const Path & path, GitFileSystemObjectSinkImpl & back, git_writestream * stream) + : path(path), back(back), stream(stream) + {} + void operator () (std::string_view data) override + { + if (stream->write(stream, data.data(), data.size())) + throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message); + } + void isExecutable() override + { + executable = true; + } + } crf { path, *this, stream }; + func(crf); + + git_oid oid; + if (git_blob_create_from_stream_commit(&oid, stream)) + throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message); + + addToTree(*pathComponents.rbegin(), oid, + crf.executable + ? GIT_FILEMODE_BLOB_EXECUTABLE + : GIT_FILEMODE_BLOB); + } + + void createDirectory(const Path & path) override + { + auto pathComponents = tokenizeString>(path, "/"); + (void) prepareDirs(pathComponents, true); + } + + void createSymlink(const Path & path, const std::string & target) override + { + auto pathComponents = tokenizeString>(path, "/"); + if (!prepareDirs(pathComponents, false)) return; + + git_oid oid; + if (git_blob_create_from_buffer(&oid, *repo, target.c_str(), target.size())) + throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message); + + addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK); + } + + Hash sync() override { + updateBuilders({}); + + auto [oid, _name] = popBuilder(); + + return toHash(oid); + } +}; + ref GitRepoImpl::getRawAccessor(const Hash & rev) { auto self = ref(shared_from_this()); @@ -959,6 +955,11 @@ ref GitRepoImpl::getAccessor(const WorkdirInfo & wd, bool exportI } } +ref GitRepoImpl::getFileSystemObjectSink() +{ + return make_ref(ref(shared_from_this())); +} + std::vector> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore) { /* Read the .gitmodules files from this revision. */ @@ -985,11 +986,4 @@ std::vector> GitRepoImpl::getSubmodules return result; } -ref getTarballCache() -{ - static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache"; - - return make_ref(repoDir, true, true); -} - } diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index b54559def..029d39741 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -2,11 +2,20 @@ #include "filtering-input-accessor.hh" #include "input-accessor.hh" +#include "fs-sink.hh" namespace nix { namespace fetchers { struct PublicKey; } +struct GitFileSystemObjectSink : FileSystemObjectSink +{ + /** + * Flush builder and return a final Git hash. + */ + virtual Hash sync() = 0; +}; + struct GitRepo { virtual ~GitRepo() @@ -64,20 +73,14 @@ struct GitRepo const std::string & url, const std::string & base) = 0; - struct TarballInfo - { - Hash treeHash; - time_t lastModified; - }; - - virtual TarballInfo importTarball(Source & source) = 0; - virtual bool hasObject(const Hash & oid) = 0; virtual ref getAccessor(const Hash & rev, bool exportIgnore) = 0; virtual ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0; + virtual ref getFileSystemObjectSink() = 0; + virtual void fetch( const std::string & url, const std::string & refspec, @@ -92,6 +95,4 @@ struct GitRepo const std::vector & publicKeys) = 0; }; -ref getTarballCache(); - } diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 1cfc142a5..8b3e6ff20 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -8,7 +8,9 @@ #include "fetchers.hh" #include "fetch-settings.hh" #include "tarball.hh" +#include "tarfile.hh" #include "git-utils.hh" +#include "tarball-cache.hh" #include #include @@ -191,7 +193,7 @@ struct GitArchiveInputScheme : InputScheme virtual DownloadUrl getDownloadUrl(const Input & input) const = 0; - std::pair downloadArchive(ref store, Input input) const + std::pair downloadArchive(ref store, Input input) const { if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD"); @@ -218,7 +220,7 @@ struct GitArchiveInputScheme : InputScheme auto treeHash = getRevAttr(*treeHashAttrs, "treeHash"); auto lastModified = getIntAttr(*lastModifiedAttrs, "lastModified"); if (getTarballCache()->hasObject(treeHash)) - return {std::move(input), GitRepo::TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }}; + return {std::move(input), TarballInfo { .treeHash = treeHash, .lastModified = (time_t) lastModified }}; else debug("Git tree with hash '%s' has disappeared from the cache, refetching...", treeHash.gitRev()); } @@ -233,7 +235,14 @@ struct GitArchiveInputScheme : InputScheme getFileTransfer()->download(std::move(req), sink); }); - auto tarballInfo = getTarballCache()->importTarball(*source); + TarArchive archive { *source }; + auto parseSink = getTarballCache()->getFileSystemObjectSink(); + auto lastModified = unpackTarfileToSink(archive, *parseSink); + + TarballInfo tarballInfo { + .treeHash = parseSink->sync(), + .lastModified = lastModified + }; cache->upsert(treeHashKey, Attrs{{"treeHash", tarballInfo.treeHash.gitRev()}}); cache->upsert(lastModifiedKey, Attrs{{"lastModified", (uint64_t) tarballInfo.lastModified}}); diff --git a/src/libfetchers/tarball-cache.cc b/src/libfetchers/tarball-cache.cc new file mode 100644 index 000000000..bb2c51973 --- /dev/null +++ b/src/libfetchers/tarball-cache.cc @@ -0,0 +1,13 @@ +#include "tarball-cache.hh" +#include "users.hh" + +namespace nix::fetchers { + +ref getTarballCache() +{ + static auto repoDir = std::filesystem::path(getCacheDir()) / "nix" / "tarball-cache"; + + return GitRepo::openRepo(repoDir, true, true); +} + +} diff --git a/src/libfetchers/tarball-cache.hh b/src/libfetchers/tarball-cache.hh new file mode 100644 index 000000000..e1517038b --- /dev/null +++ b/src/libfetchers/tarball-cache.hh @@ -0,0 +1,17 @@ +#pragma once +///@file + +#include "ref.hh" +#include "git-utils.hh" + +namespace nix::fetchers { + +struct TarballInfo +{ + Hash treeHash; + time_t lastModified; +}; + +ref getTarballCache(); + +} diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index 4dfb5b329..ae577819a 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -26,6 +26,8 @@ struct CreateRegularFileSink : Sink struct FileSystemObjectSink { + virtual ~FileSystemObjectSink() = default; + virtual void createDirectory(const Path & path) = 0; /** diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 187b3e948..3bb6694f8 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -132,4 +132,66 @@ void unpackTarfile(const Path & tarFile, const Path & destDir) extract_archive(archive, destDir); } +time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink) +{ + time_t lastModified = 0; + + for (;;) { + // FIXME: merge with extract_archive + struct archive_entry * entry; + int r = archive_read_next_header(archive.archive, &entry); + if (r == ARCHIVE_EOF) break; + auto path = archive_entry_pathname(entry); + if (!path) + throw Error("cannot get archive member name: %s", archive_error_string(archive.archive)); + if (r == ARCHIVE_WARN) + warn(archive_error_string(archive.archive)); + else + archive.check(r); + + lastModified = std::max(lastModified, archive_entry_mtime(entry)); + + switch (archive_entry_filetype(entry)) { + + case AE_IFDIR: + parseSink.createDirectory(path); + break; + + case AE_IFREG: { + parseSink.createRegularFile(path, [&](auto & crf) { + if (archive_entry_mode(entry) & S_IXUSR) + crf.isExecutable(); + + while (true) { + std::vector buf(128 * 1024); + auto n = archive_read_data(archive.archive, buf.data(), buf.size()); + if (n < 0) + throw Error("cannot read file '%s' from tarball", path); + if (n == 0) break; + crf(std::string_view { + (const char *) buf.data(), + (size_t) n, + }); + } + }); + + break; + } + + case AE_IFLNK: { + auto target = archive_entry_symlink(entry); + + parseSink.createSymlink(path, target); + + break; + } + + default: + throw Error("file '%s' in tarball has unsupported file type", path); + } + } + + return lastModified; +} + } diff --git a/src/libutil/tarfile.hh b/src/libutil/tarfile.hh index 237d18c31..6a9c42149 100644 --- a/src/libutil/tarfile.hh +++ b/src/libutil/tarfile.hh @@ -2,6 +2,7 @@ ///@file #include "serialise.hh" +#include "fs-sink.hh" #include namespace nix { @@ -29,4 +30,6 @@ void unpackTarfile(Source & source, const Path & destDir); void unpackTarfile(const Path & tarFile, const Path & destDir); +time_t unpackTarfileToSink(TarArchive & archive, FileSystemObjectSink & parseSink); + }