From 006d862d303aa871ab53cbccaba7118fbcb433ef Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Thu, 17 Feb 2022 00:07:20 +0100 Subject: [PATCH] GitArchiveInputScheme: Use zip files to avoid unpacking to disk --- configure.ac | 5 +- flake.nix | 1 + src/libfetchers/github.cc | 36 ++++-- src/libfetchers/input-accessor.hh | 2 + src/libfetchers/local.mk | 2 +- src/libfetchers/zip-input-accessor.cc | 161 ++++++++++++++++++++++++++ 6 files changed, 193 insertions(+), 14 deletions(-) create mode 100644 src/libfetchers/zip-input-accessor.cc diff --git a/configure.ac b/configure.ac index 8a01c33ec..eb64c9463 100644 --- a/configure.ac +++ b/configure.ac @@ -163,13 +163,16 @@ fi PKG_CHECK_MODULES([OPENSSL], [libcrypto], [CXXFLAGS="$OPENSSL_CFLAGS $CXXFLAGS"]) -# Checks for libarchive +# Look for libarchive. PKG_CHECK_MODULES([LIBARCHIVE], [libarchive >= 3.1.2], [CXXFLAGS="$LIBARCHIVE_CFLAGS $CXXFLAGS"]) # Workaround until https://github.com/libarchive/libarchive/issues/1446 is fixed if test "$shared" != yes; then LIBARCHIVE_LIBS+=' -lz' fi +# Look for libzip. +PKG_CHECK_MODULES([LIBZIP], [libzip]) + # Look for SQLite, a required dependency. PKG_CHECK_MODULES([SQLITE3], [sqlite3 >= 3.6.19], [CXXFLAGS="$SQLITE3_CFLAGS $CXXFLAGS"]) diff --git a/flake.nix b/flake.nix index 87b00edf4..5b9eb7132 100644 --- a/flake.nix +++ b/flake.nix @@ -111,6 +111,7 @@ bzip2 xz brotli editline openssl sqlite libarchive + libzip boost lowdown-nix gtest diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index 58b6e7c04..422d14142 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -183,10 +183,8 @@ struct GitArchiveInputScheme : InputScheme virtual DownloadUrl getDownloadUrl(const Input & input) const = 0; - std::pair fetch(ref store, const Input & _input) override + std::pair downloadArchive(ref store, Input input) { - Input input(_input); - if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD"); auto rev = input.getRev(); @@ -196,32 +194,46 @@ struct GitArchiveInputScheme : InputScheme input.attrs.insert_or_assign("rev", rev->gitRev()); Attrs lockedAttrs({ - {"type", "git-tarball"}, + {"type", "git-zipball"}, {"rev", rev->gitRev()}, }); if (auto res = getCache()->lookup(store, lockedAttrs)) { - input.attrs.insert_or_assign("lastModified", getIntAttr(res->first, "lastModified")); + // FIXME + //input.attrs.insert_or_assign("lastModified", getIntAttr(res->first, "lastModified")); return {std::move(res->second), input}; } auto url = getDownloadUrl(input); - auto [tree, lastModified] = downloadTarball(store, url.url, input.getName(), true, url.headers); + auto res = downloadFile(store, url.url, input.getName(), true, url.headers); - input.attrs.insert_or_assign("lastModified", uint64_t(lastModified)); + //input.attrs.insert_or_assign("lastModified", uint64_t(lastModified)); getCache()->add( store, lockedAttrs, { {"rev", rev->gitRev()}, - {"lastModified", uint64_t(lastModified)} + // FIXME: get lastModified + //{"lastModified", uint64_t(lastModified)} }, - tree.storePath, + res.storePath, true); - return {std::move(tree.storePath), input}; + return {res.storePath, input}; + } + + std::pair fetch(ref store, const Input & _input) override + { + throw UnimplementedError("GitArchive::fetch()"); + } + + std::pair, Input> lazyFetch(ref store, const Input & input) override + { + auto [storePath, input2] = downloadArchive(store, input); + + return {makeZipInputAccessor(store->toRealPath(storePath)), input2}; } }; @@ -262,7 +274,7 @@ struct GitHubInputScheme : GitArchiveInputScheme // FIXME: use regular /archive URLs instead? api.github.com // might have stricter rate limits. auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com"); - auto url = fmt("https://api.%s/repos/%s/%s/tarball/%s", // FIXME: check if this is correct for self hosted instances + auto url = fmt("https://api.%s/repos/%s/%s/zipball/%s", // FIXME: check if this is correct for self hosted instances host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), input.getRev()->to_string(Base16, false)); @@ -329,7 +341,7 @@ struct GitLabInputScheme : GitArchiveInputScheme // is 10 reqs/sec/ip-addr. See // https://docs.gitlab.com/ee/user/gitlab_com/index.html#gitlabcom-specific-rate-limits auto host = maybeGetStrAttr(input.attrs, "host").value_or("gitlab.com"); - auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.tar.gz?sha=%s", + auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.zip?sha=%s", host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"), input.getRev()->to_string(Base16, false)); diff --git a/src/libfetchers/input-accessor.hh b/src/libfetchers/input-accessor.hh index 1f0009a61..7a4dd08a6 100644 --- a/src/libfetchers/input-accessor.hh +++ b/src/libfetchers/input-accessor.hh @@ -55,6 +55,8 @@ struct MemoryInputAccessor : InputAccessor ref makeMemoryInputAccessor(); +ref makeZipInputAccessor(PathView path); + struct SourcePath { ref accessor; diff --git a/src/libfetchers/local.mk b/src/libfetchers/local.mk index 2e8869d83..1b91f8d16 100644 --- a/src/libfetchers/local.mk +++ b/src/libfetchers/local.mk @@ -8,6 +8,6 @@ libfetchers_SOURCES := $(wildcard $(d)/*.cc) libfetchers_CXXFLAGS += -I src/libutil -I src/libstore -libfetchers_LDFLAGS += -pthread +libfetchers_LDFLAGS += -pthread -lzip libfetchers_LIBS = libutil libstore diff --git a/src/libfetchers/zip-input-accessor.cc b/src/libfetchers/zip-input-accessor.cc new file mode 100644 index 000000000..3d59f902a --- /dev/null +++ b/src/libfetchers/zip-input-accessor.cc @@ -0,0 +1,161 @@ +#include "input-accessor.hh" + +#include + +namespace nix { + +struct cmp_str +{ + bool operator ()(const char * a, const char * b) const + { + return std::strcmp(a, b) < 0; + } +}; + +struct ZipMember +{ + struct zip_file * p = nullptr; + ZipMember(struct zip_file * p) : p(p) { } + ~ZipMember() { if (p) zip_fclose(p); } + operator zip_file *() { return p; } +}; + +struct ZipInputAccessor : InputAccessor +{ + Path zipPath; + struct zip * zipFile = nullptr; + + typedef std::map Members; + Members members; + + ZipInputAccessor(PathView _zipPath) + : zipPath(_zipPath) + { + int error; + zipFile = zip_open(zipPath.c_str(), 0, &error); + if (!zipFile) { + char errorMsg[1024]; + zip_error_to_str(errorMsg, sizeof errorMsg, error, errno); + throw Error("couldn't open '%s': %s", zipPath, errorMsg); + } + + /* Read the index of the zip file and put it in a map. This + is unfortunately necessary because libzip's lookup + functions are O(n) time. */ + struct zip_stat sb; + zip_uint64_t nrEntries = zip_get_num_entries(zipFile, 0); + for (zip_uint64_t n = 0; n < nrEntries; ++n) { + if (zip_stat_index(zipFile, n, 0, &sb)) + throw Error("couldn't stat archive member #%d in '%s': %s", n, zipPath, zip_strerror(zipFile)); + auto slash = strchr(sb.name, '/'); + if (!slash) continue; + members.emplace(slash, sb); + } + } + + ~ZipInputAccessor() + { + if (zipFile) zip_close(zipFile); + } + + std::string readFile(PathView _path) override + { + auto path = canonPath(_path); + + auto i = members.find(((std::string) path).c_str()); + if (i == members.end()) + throw Error("file '%s' does not exist", path); + + ZipMember member(zip_fopen_index(zipFile, i->second.index, 0)); + if (!member) + throw Error("couldn't open archive member '%s' in '%s': %s", + path, zipPath, zip_strerror(zipFile)); + + std::string buf(i->second.size, 0); + if (zip_fread(member, buf.data(), i->second.size) != (zip_int64_t) i->second.size) + throw Error("couldn't read archive member '%s' in '%s'", path, zipPath); + + return buf; + } + + bool pathExists(PathView _path) override + { + auto path = canonPath(_path); + return members.find(((std::string) path).c_str()) != members.end(); + } + + Stat lstat(PathView _path) override + { + auto path = canonPath(_path); + + Type type = tRegular; + bool isExecutable = false; + + auto i = members.find(((std::string) path).c_str()); + if (i == members.end()) { + i = members.find(((std::string) path + "/").c_str()); + type = tDirectory; + } + if (i == members.end()) + throw Error("file '%s' does not exist", path); + + zip_uint8_t opsys; + zip_uint32_t attributes; + if (zip_file_get_external_attributes(zipFile, i->second.index, ZIP_FL_UNCHANGED, &opsys, &attributes) == -1) + throw Error("couldn't get external attributes of '%s' in '%s': %s", + path, zipPath, zip_strerror(zipFile)); + + switch (opsys) { + case ZIP_OPSYS_UNIX: + auto type = (attributes >> 16) & 0770000; + switch (type) { + case 0040000: type = tDirectory; break; + case 0100000: + type = tRegular; + isExecutable = (attributes >> 16) & 0000100; + break; + case 0120000: type = tSymlink; break; + default: + throw Error("file '%s' in '%s' has unsupported type %o", path, zipPath, type); + } + break; + } + + return Stat { .type = type, .isExecutable = isExecutable }; + } + + DirEntries readDirectory(PathView _path) override + { + auto path = canonPath(_path) + "/"; + + auto i = members.find(((std::string) path).c_str()); + if (i == members.end()) + throw Error("directory '%s' does not exist", path); + + ++i; + + DirEntries entries; + + for (; i != members.end() && strncmp(i->first, path.c_str(), path.size()) == 0; ++i) { + auto start = i->first + path.size(); + auto slash = strchr(start, '/'); + if (slash && strcmp(slash, "/") != 0) continue; + auto name = slash ? std::string(start, slash - start) : std::string(start); + entries.emplace(name, std::nullopt); + } + + return entries; + } + + std::string readLink(PathView path) override + { + throw UnimplementedError("ZipInputAccessor::readLink"); + } +}; + +ref makeZipInputAccessor(PathView path) +{ + return make_ref(path); +} + +}