GitArchiveInputScheme: Use zip files to avoid unpacking to disk

This commit is contained in:
Eelco Dolstra 2022-02-17 00:07:20 +01:00
parent 631ae8df6d
commit 006d862d30
6 changed files with 193 additions and 14 deletions

View file

@ -163,13 +163,16 @@ fi
PKG_CHECK_MODULES([OPENSSL], [libcrypto], [CXXFLAGS="$OPENSSL_CFLAGS $CXXFLAGS"])
# Checks for libarchive
# Look for libarchive.
PKG_CHECK_MODULES([LIBARCHIVE], [libarchive >= 3.1.2], [CXXFLAGS="$LIBARCHIVE_CFLAGS $CXXFLAGS"])
# Workaround until https://github.com/libarchive/libarchive/issues/1446 is fixed
if test "$shared" != yes; then
LIBARCHIVE_LIBS+=' -lz'
fi
# Look for libzip.
PKG_CHECK_MODULES([LIBZIP], [libzip])
# Look for SQLite, a required dependency.
PKG_CHECK_MODULES([SQLITE3], [sqlite3 >= 3.6.19], [CXXFLAGS="$SQLITE3_CFLAGS $CXXFLAGS"])

View file

@ -111,6 +111,7 @@
bzip2 xz brotli editline
openssl sqlite
libarchive
libzip
boost
lowdown-nix
gtest

View file

@ -183,10 +183,8 @@ struct GitArchiveInputScheme : InputScheme
virtual DownloadUrl getDownloadUrl(const Input & input) const = 0;
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
std::pair<StorePath, Input> downloadArchive(ref<Store> store, Input input)
{
Input input(_input);
if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD");
auto rev = input.getRev();
@ -196,32 +194,46 @@ struct GitArchiveInputScheme : InputScheme
input.attrs.insert_or_assign("rev", rev->gitRev());
Attrs lockedAttrs({
{"type", "git-tarball"},
{"type", "git-zipball"},
{"rev", rev->gitRev()},
});
if (auto res = getCache()->lookup(store, lockedAttrs)) {
input.attrs.insert_or_assign("lastModified", getIntAttr(res->first, "lastModified"));
// FIXME
//input.attrs.insert_or_assign("lastModified", getIntAttr(res->first, "lastModified"));
return {std::move(res->second), input};
}
auto url = getDownloadUrl(input);
auto [tree, lastModified] = downloadTarball(store, url.url, input.getName(), true, url.headers);
auto res = downloadFile(store, url.url, input.getName(), true, url.headers);
input.attrs.insert_or_assign("lastModified", uint64_t(lastModified));
//input.attrs.insert_or_assign("lastModified", uint64_t(lastModified));
getCache()->add(
store,
lockedAttrs,
{
{"rev", rev->gitRev()},
{"lastModified", uint64_t(lastModified)}
// FIXME: get lastModified
//{"lastModified", uint64_t(lastModified)}
},
tree.storePath,
res.storePath,
true);
return {std::move(tree.storePath), input};
return {res.storePath, input};
}
std::pair<StorePath, Input> fetch(ref<Store> store, const Input & _input) override
{
throw UnimplementedError("GitArchive::fetch()");
}
std::pair<ref<InputAccessor>, Input> lazyFetch(ref<Store> store, const Input & input) override
{
auto [storePath, input2] = downloadArchive(store, input);
return {makeZipInputAccessor(store->toRealPath(storePath)), input2};
}
};
@ -262,7 +274,7 @@ struct GitHubInputScheme : GitArchiveInputScheme
// FIXME: use regular /archive URLs instead? api.github.com
// might have stricter rate limits.
auto host = maybeGetStrAttr(input.attrs, "host").value_or("github.com");
auto url = fmt("https://api.%s/repos/%s/%s/tarball/%s", // FIXME: check if this is correct for self hosted instances
auto url = fmt("https://api.%s/repos/%s/%s/zipball/%s", // FIXME: check if this is correct for self hosted instances
host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
input.getRev()->to_string(Base16, false));
@ -329,7 +341,7 @@ struct GitLabInputScheme : GitArchiveInputScheme
// is 10 reqs/sec/ip-addr. See
// https://docs.gitlab.com/ee/user/gitlab_com/index.html#gitlabcom-specific-rate-limits
auto host = maybeGetStrAttr(input.attrs, "host").value_or("gitlab.com");
auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.tar.gz?sha=%s",
auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.zip?sha=%s",
host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
input.getRev()->to_string(Base16, false));

View file

@ -55,6 +55,8 @@ struct MemoryInputAccessor : InputAccessor
ref<MemoryInputAccessor> makeMemoryInputAccessor();
ref<InputAccessor> makeZipInputAccessor(PathView path);
struct SourcePath
{
ref<InputAccessor> accessor;

View file

@ -8,6 +8,6 @@ libfetchers_SOURCES := $(wildcard $(d)/*.cc)
libfetchers_CXXFLAGS += -I src/libutil -I src/libstore
libfetchers_LDFLAGS += -pthread
libfetchers_LDFLAGS += -pthread -lzip
libfetchers_LIBS = libutil libstore

View file

@ -0,0 +1,161 @@
#include "input-accessor.hh"
#include <zip.h>
namespace nix {
struct cmp_str
{
bool operator ()(const char * a, const char * b) const
{
return std::strcmp(a, b) < 0;
}
};
struct ZipMember
{
struct zip_file * p = nullptr;
ZipMember(struct zip_file * p) : p(p) { }
~ZipMember() { if (p) zip_fclose(p); }
operator zip_file *() { return p; }
};
struct ZipInputAccessor : InputAccessor
{
Path zipPath;
struct zip * zipFile = nullptr;
typedef std::map<const char *, struct zip_stat, cmp_str> Members;
Members members;
ZipInputAccessor(PathView _zipPath)
: zipPath(_zipPath)
{
int error;
zipFile = zip_open(zipPath.c_str(), 0, &error);
if (!zipFile) {
char errorMsg[1024];
zip_error_to_str(errorMsg, sizeof errorMsg, error, errno);
throw Error("couldn't open '%s': %s", zipPath, errorMsg);
}
/* Read the index of the zip file and put it in a map. This
is unfortunately necessary because libzip's lookup
functions are O(n) time. */
struct zip_stat sb;
zip_uint64_t nrEntries = zip_get_num_entries(zipFile, 0);
for (zip_uint64_t n = 0; n < nrEntries; ++n) {
if (zip_stat_index(zipFile, n, 0, &sb))
throw Error("couldn't stat archive member #%d in '%s': %s", n, zipPath, zip_strerror(zipFile));
auto slash = strchr(sb.name, '/');
if (!slash) continue;
members.emplace(slash, sb);
}
}
~ZipInputAccessor()
{
if (zipFile) zip_close(zipFile);
}
std::string readFile(PathView _path) override
{
auto path = canonPath(_path);
auto i = members.find(((std::string) path).c_str());
if (i == members.end())
throw Error("file '%s' does not exist", path);
ZipMember member(zip_fopen_index(zipFile, i->second.index, 0));
if (!member)
throw Error("couldn't open archive member '%s' in '%s': %s",
path, zipPath, zip_strerror(zipFile));
std::string buf(i->second.size, 0);
if (zip_fread(member, buf.data(), i->second.size) != (zip_int64_t) i->second.size)
throw Error("couldn't read archive member '%s' in '%s'", path, zipPath);
return buf;
}
bool pathExists(PathView _path) override
{
auto path = canonPath(_path);
return members.find(((std::string) path).c_str()) != members.end();
}
Stat lstat(PathView _path) override
{
auto path = canonPath(_path);
Type type = tRegular;
bool isExecutable = false;
auto i = members.find(((std::string) path).c_str());
if (i == members.end()) {
i = members.find(((std::string) path + "/").c_str());
type = tDirectory;
}
if (i == members.end())
throw Error("file '%s' does not exist", path);
zip_uint8_t opsys;
zip_uint32_t attributes;
if (zip_file_get_external_attributes(zipFile, i->second.index, ZIP_FL_UNCHANGED, &opsys, &attributes) == -1)
throw Error("couldn't get external attributes of '%s' in '%s': %s",
path, zipPath, zip_strerror(zipFile));
switch (opsys) {
case ZIP_OPSYS_UNIX:
auto type = (attributes >> 16) & 0770000;
switch (type) {
case 0040000: type = tDirectory; break;
case 0100000:
type = tRegular;
isExecutable = (attributes >> 16) & 0000100;
break;
case 0120000: type = tSymlink; break;
default:
throw Error("file '%s' in '%s' has unsupported type %o", path, zipPath, type);
}
break;
}
return Stat { .type = type, .isExecutable = isExecutable };
}
DirEntries readDirectory(PathView _path) override
{
auto path = canonPath(_path) + "/";
auto i = members.find(((std::string) path).c_str());
if (i == members.end())
throw Error("directory '%s' does not exist", path);
++i;
DirEntries entries;
for (; i != members.end() && strncmp(i->first, path.c_str(), path.size()) == 0; ++i) {
auto start = i->first + path.size();
auto slash = strchr(start, '/');
if (slash && strcmp(slash, "/") != 0) continue;
auto name = slash ? std::string(start, slash - start) : std::string(start);
entries.emplace(name, std::nullopt);
}
return entries;
}
std::string readLink(PathView path) override
{
throw UnimplementedError("ZipInputAccessor::readLink");
}
};
ref<InputAccessor> makeZipInputAccessor(PathView path)
{
return make_ref<ZipInputAccessor>(path);
}
}