diff --git a/Makefile.config.in b/Makefile.config.in index 1482db81f..c85e028c2 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -8,15 +8,19 @@ CXX = @CXX@ CXXFLAGS = @CXXFLAGS@ CXXLTO = @CXXLTO@ EDITLINE_LIBS = @EDITLINE_LIBS@ +ENABLE_BUILD = @ENABLE_BUILD@ ENABLE_S3 = @ENABLE_S3@ +ENABLE_TESTS = @ENABLE_TESTS@ GTEST_LIBS = @GTEST_LIBS@ HAVE_LIBCPUID = @HAVE_LIBCPUID@ HAVE_SECCOMP = @HAVE_SECCOMP@ HOST_OS = @host_os@ +INSTALL_UNIT_TESTS = @INSTALL_UNIT_TESTS@ LDFLAGS = @LDFLAGS@ LIBARCHIVE_LIBS = @LIBARCHIVE_LIBS@ LIBBROTLI_LIBS = @LIBBROTLI_LIBS@ LIBCURL_LIBS = @LIBCURL_LIBS@ +LIBGIT2_LIBS = @LIBGIT2_LIBS@ LIBSECCOMP_LIBS = @LIBSECCOMP_LIBS@ LOWDOWN_LIBS = @LOWDOWN_LIBS@ OPENSSL_LIBS = @OPENSSL_LIBS@ @@ -37,6 +41,7 @@ docdir = @docdir@ embedded_sandbox_shell = @embedded_sandbox_shell@ exec_prefix = @exec_prefix@ includedir = @includedir@ +internal_api_docs = @internal_api_docs@ libdir = @libdir@ libexecdir = @libexecdir@ localstatedir = @localstatedir@ @@ -48,7 +53,3 @@ sandbox_shell = @sandbox_shell@ storedir = @storedir@ sysconfdir = @sysconfdir@ system = @system@ -ENABLE_BUILD = @ENABLE_BUILD@ -ENABLE_TESTS = @ENABLE_TESTS@ -INSTALL_UNIT_TESTS = @INSTALL_UNIT_TESTS@ -internal_api_docs = @internal_api_docs@ diff --git a/configure.ac b/configure.ac index 281ba2c32..71e93feaa 100644 --- a/configure.ac +++ b/configure.ac @@ -347,9 +347,15 @@ AC_ARG_ENABLE(doc-gen, AS_HELP_STRING([--disable-doc-gen],[disable documentation doc_generate=$enableval, doc_generate=yes) AC_SUBST(doc_generate) + # Look for lowdown library. PKG_CHECK_MODULES([LOWDOWN], [lowdown >= 0.9.0], [CXXFLAGS="$LOWDOWN_CFLAGS $CXXFLAGS"]) + +# Look for libgit2. +PKG_CHECK_MODULES([LIBGIT2], [libgit2]) + + # Setuid installations. AC_CHECK_FUNCS([setresuid setreuid lchown]) diff --git a/flake.lock b/flake.lock index 991cef1ee..2b1d96e4e 100644 --- a/flake.lock +++ b/flake.lock @@ -16,6 +16,22 @@ "type": "github" } }, + "libgit2": { + "flake": false, + "locked": { + "lastModified": 1697646580, + "narHash": "sha256-oX4Z3S9WtJlwvj0uH9HlYcWv+x1hqp8mhXl7HsLu2f0=", + "owner": "libgit2", + "repo": "libgit2", + "rev": "45fd9ed7ae1a9b74b957ef4f337bc3c8b3df01b5", + "type": "github" + }, + "original": { + "owner": "libgit2", + "repo": "libgit2", + "type": "github" + } + }, "lowdown-src": { "flake": false, "locked": { @@ -67,6 +83,7 @@ "root": { "inputs": { "flake-compat": "flake-compat", + "libgit2": "libgit2", "lowdown-src": "lowdown-src", "nixpkgs": "nixpkgs", "nixpkgs-regression": "nixpkgs-regression" diff --git a/flake.nix b/flake.nix index 05ab7b06d..06a6fe8ea 100644 --- a/flake.nix +++ b/flake.nix @@ -7,8 +7,9 @@ inputs.nixpkgs-regression.url = "github:NixOS/nixpkgs/215d4d0fd80ca5163643b03a33fde804a29cc1e2"; inputs.lowdown-src = { url = "github:kristapsdz/lowdown"; flake = false; }; inputs.flake-compat = { url = "github:edolstra/flake-compat"; flake = false; }; + inputs.libgit2 = { url = "github:libgit2/libgit2"; flake = false; }; - outputs = { self, nixpkgs, nixpkgs-regression, lowdown-src, flake-compat }: + outputs = { self, nixpkgs, nixpkgs-regression, lowdown-src, flake-compat, libgit2 }: let inherit (nixpkgs) lib; @@ -198,6 +199,11 @@ bzip2 xz brotli editline openssl sqlite libarchive + (pkgs.libgit2.overrideAttrs (attrs: { + src = libgit2; + version = libgit2.lastModifiedDate; + cmakeFlags = (attrs.cmakeFlags or []) ++ ["-DUSE_SSH=exec"]; + })) boost lowdown-nix libsodium diff --git a/src/libfetchers/cache.cc b/src/libfetchers/cache.cc index b72a464e8..63b05bdab 100644 --- a/src/libfetchers/cache.cc +++ b/src/libfetchers/cache.cc @@ -20,6 +20,9 @@ create table if not exists Cache ( ); )sql"; +// FIXME: we should periodically purge/nuke this cache to prevent it +// from growing too big. + struct CacheImpl : Cache { struct State @@ -48,6 +51,60 @@ struct CacheImpl : Cache "select info, path, immutable, timestamp from Cache where input = ?"); } + void upsert( + const Attrs & inAttrs, + const Attrs & infoAttrs) override + { + _state.lock()->add.use() + (attrsToJSON(inAttrs).dump()) + (attrsToJSON(infoAttrs).dump()) + ("") // no path + (false) + (time(0)).exec(); + } + + std::optional lookup(const Attrs & inAttrs) override + { + if (auto res = lookupExpired(inAttrs)) + return std::move(res->infoAttrs); + return {}; + } + + std::optional lookupWithTTL(const Attrs & inAttrs) override + { + if (auto res = lookupExpired(inAttrs)) { + if (!res->expired) + return std::move(res->infoAttrs); + debug("ignoring expired cache entry '%s'", + attrsToJSON(inAttrs).dump()); + } + return {}; + } + + std::optional lookupExpired(const Attrs & inAttrs) override + { + auto state(_state.lock()); + + auto inAttrsJSON = attrsToJSON(inAttrs).dump(); + + auto stmt(state->lookup.use()(inAttrsJSON)); + if (!stmt.next()) { + debug("did not find cache entry for '%s'", inAttrsJSON); + return {}; + } + + auto infoJSON = stmt.getStr(0); + auto locked = stmt.getInt(2) != 0; + auto timestamp = stmt.getInt(3); + + debug("using cache entry '%s' -> '%s'", inAttrsJSON, infoJSON); + + return Result2 { + .expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)), + .infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)), + }; + } + void add( ref store, const Attrs & inAttrs, diff --git a/src/libfetchers/cache.hh b/src/libfetchers/cache.hh index af34e66ce..f70589267 100644 --- a/src/libfetchers/cache.hh +++ b/src/libfetchers/cache.hh @@ -6,10 +6,49 @@ namespace nix::fetchers { +/** + * A cache for arbitrary `Attrs` -> `Attrs` mappings with a timestamp + * for expiration. + */ struct Cache { virtual ~Cache() { } + /** + * Add a value to the cache. The cache is an arbitrary mapping of + * Attrs to Attrs. + */ + virtual void upsert( + const Attrs & inAttrs, + const Attrs & infoAttrs) = 0; + + /** + * Look up a key with infinite TTL. + */ + virtual std::optional lookup( + const Attrs & inAttrs) = 0; + + /** + * Look up a key. Return nothing if its TTL has exceeded + * `settings.tarballTTL`. + */ + virtual std::optional lookupWithTTL( + const Attrs & inAttrs) = 0; + + struct Result2 + { + bool expired = false; + Attrs infoAttrs; + }; + + /** + * Look up a key. Return a bool denoting whether its TTL has + * exceeded `settings.tarballTTL`. + */ + virtual std::optional lookupExpired( + const Attrs & inAttrs) = 0; + + /* Old cache for things that have a store path. */ virtual void add( ref store, const Attrs & inAttrs, diff --git a/src/libfetchers/fetchers.cc b/src/libfetchers/fetchers.cc index 895515327..c2513e076 100644 --- a/src/libfetchers/fetchers.cc +++ b/src/libfetchers/fetchers.cc @@ -1,5 +1,6 @@ #include "fetchers.hh" #include "store-api.hh" +#include "input-accessor.hh" #include @@ -219,6 +220,16 @@ std::pair Input::fetch(ref store) const return {std::move(storePath), input}; } +std::pair, Input> Input::getAccessor(ref store) const +{ + try { + return scheme->getAccessor(store, *this); + } catch (Error & e) { + e.addTrace({}, "while fetching the input '%s'", to_string()); + throw; + } +} + Input Input::applyOverrides( std::optional ref, std::optional rev) const @@ -355,6 +366,18 @@ void InputScheme::clone(const Input & input, const Path & destDir) const throw Error("do not know how to clone input '%s'", input.to_string()); } +std::pair InputScheme::fetch(ref store, const Input & input) +{ + auto [accessor, input2] = getAccessor(store, input); + auto storePath = accessor->root().fetchToStore(store, input2.getName()); + return {storePath, input2}; +} + +std::pair, Input> InputScheme::getAccessor(ref store, const Input & input) const +{ + throw UnimplementedError("InputScheme must implement fetch() or getAccessor()"); +} + std::optional InputScheme::experimentalFeature() const { return {}; diff --git a/src/libfetchers/fetchers.hh b/src/libfetchers/fetchers.hh index a056c8939..ce5aa4c69 100644 --- a/src/libfetchers/fetchers.hh +++ b/src/libfetchers/fetchers.hh @@ -10,7 +10,7 @@ #include #include -namespace nix { class Store; class StorePath; } +namespace nix { class Store; class StorePath; struct InputAccessor; } namespace nix::fetchers { @@ -83,6 +83,8 @@ public: */ std::pair fetch(ref store) const; + std::pair, Input> getAccessor(ref store) const; + Input applyOverrides( std::optional ref, std::optional rev) const; @@ -167,7 +169,9 @@ struct InputScheme std::string_view contents, std::optional commitMsg) const; - virtual std::pair fetch(ref store, const Input & input) = 0; + virtual std::pair fetch(ref store, const Input & input); + + virtual std::pair, Input> getAccessor(ref store, const Input & input) const; /** * Is this `InputScheme` part of an experimental feature? diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc new file mode 100644 index 000000000..f554dcc5f --- /dev/null +++ b/src/libfetchers/git-utils.cc @@ -0,0 +1,675 @@ +#include "git-utils.hh" +#include "input-accessor.hh" +#include "cache.hh" +#include "finally.hh" +#include "processes.hh" +#include "signals.hh" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace std { + +template<> struct hash +{ + size_t operator()(const git_oid & oid) const + { + return * (size_t *) oid.id; + } +}; + +} + +std::ostream & operator << (std::ostream & str, const git_oid & oid) +{ + str << git_oid_tostr_s(&oid); + return str; +} + +bool operator == (const git_oid & oid1, const git_oid & oid2) +{ + return git_oid_equal(&oid1, &oid2); +} + +namespace nix { + +// Some wrapper types that ensure that the git_*_free functions get called. +template +struct Deleter +{ + template + void operator()(T * p) const { del(p); }; +}; + +typedef std::unique_ptr> Repository; +typedef std::unique_ptr> TreeEntry; +typedef std::unique_ptr> Tree; +typedef std::unique_ptr> TreeBuilder; +typedef std::unique_ptr> Blob; +typedef std::unique_ptr> Object; +typedef std::unique_ptr> Commit; +typedef std::unique_ptr> Reference; +typedef std::unique_ptr> DescribeResult; +typedef std::unique_ptr> StatusList; +typedef std::unique_ptr> Remote; +typedef std::unique_ptr> GitConfig; +typedef std::unique_ptr> ConfigIterator; + +// A helper to ensure that we don't leak objects returned by libgit2. +template +struct Setter +{ + T & t; + typename T::pointer p = nullptr; + + Setter(T & t) : t(t) { } + + ~Setter() { if (p) t = T(p); } + + operator typename T::pointer * () { return &p; } +}; + +Hash toHash(const git_oid & oid) +{ + #ifdef GIT_EXPERIMENTAL_SHA256 + assert(oid.type == GIT_OID_SHA1); + #endif + Hash hash(htSHA1); + memcpy(hash.hash, oid.id, hash.hashSize); + return hash; +} + +static void initLibGit2() +{ + if (git_libgit2_init() < 0) + throw Error("initialising libgit2: %s", git_error_last()->message); +} + +git_oid hashToOID(const Hash & hash) +{ + git_oid oid; + if (git_oid_fromstr(&oid, hash.gitRev().c_str())) + throw Error("cannot convert '%s' to a Git OID", hash.gitRev()); + return oid; +} + +Object lookupObject(git_repository * repo, const git_oid & oid) +{ + Object obj; + if (git_object_lookup(Setter(obj), repo, &oid, GIT_OBJECT_ANY)) { + auto err = git_error_last(); + throw Error("getting Git object '%s': %s", oid, err->message); + } + return obj; +} + +template +T peelObject(git_repository * repo, git_object * obj, git_object_t type) +{ + T obj2; + if (git_object_peel((git_object * *) (typename T::pointer *) Setter(obj2), obj, type)) { + auto err = git_error_last(); + throw Error("peeling Git object '%s': %s", git_object_id(obj), err->message); + } + return obj2; +} + +struct GitRepoImpl : GitRepo, std::enable_shared_from_this +{ + CanonPath path; + Repository repo; + + GitRepoImpl(CanonPath _path, bool create, bool bare) + : path(std::move(_path)) + { + initLibGit2(); + + if (pathExists(path.abs())) { + if (git_repository_open(Setter(repo), path.c_str())) + throw Error("opening Git repository '%s': %s", path, git_error_last()->message); + } else { + if (git_repository_init(Setter(repo), path.c_str(), bare)) + throw Error("creating Git repository '%s': %s", path, git_error_last()->message); + } + } + + operator git_repository * () + { + return repo.get(); + } + + uint64_t getRevCount(const Hash & rev) override + { + std::unordered_set done; + std::queue todo; + + todo.push(peelObject(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT)); + + while (auto commit = pop(todo)) { + if (!done.insert(*git_commit_id(commit->get())).second) continue; + + for (size_t n = 0; n < git_commit_parentcount(commit->get()); ++n) { + git_commit * parent; + if (git_commit_parent(&parent, commit->get(), n)) + throw Error("getting parent of Git commit '%s': %s", *git_commit_id(commit->get()), git_error_last()->message); + todo.push(Commit(parent)); + } + } + + return done.size(); + } + + uint64_t getLastModified(const Hash & rev) override + { + auto commit = peelObject(*this, lookupObject(*this, hashToOID(rev)).get(), GIT_OBJECT_COMMIT); + + return git_commit_time(commit.get()); + } + + bool isShallow() override + { + return git_repository_is_shallow(*this); + } + + Hash resolveRef(std::string ref) override + { + // Handle revisions used as refs. + { + git_oid oid; + if (git_oid_fromstr(&oid, ref.c_str()) == 0) + return toHash(oid); + } + + // Resolve short names like 'master'. + Reference ref2; + if (!git_reference_dwim(Setter(ref2), *this, ref.c_str())) + ref = git_reference_name(ref2.get()); + + // Resolve full references like 'refs/heads/master'. + Reference ref3; + if (git_reference_lookup(Setter(ref3), *this, ref.c_str())) + throw Error("resolving Git reference '%s': %s", ref, git_error_last()->message); + + auto oid = git_reference_target(ref3.get()); + if (!oid) + throw Error("cannot get OID for Git reference '%s'", git_reference_name(ref3.get())); + + return toHash(*oid); + } + + std::vector parseSubmodules(const CanonPath & configFile) + { + GitConfig config; + if (git_config_open_ondisk(Setter(config), configFile.abs().c_str())) + throw Error("parsing .gitmodules file: %s", git_error_last()->message); + + ConfigIterator it; + if (git_config_iterator_glob_new(Setter(it), config.get(), "^submodule\\..*\\.(path|url|branch)$")) + throw Error("iterating over .gitmodules: %s", git_error_last()->message); + + std::map entries; + + while (true) { + git_config_entry * entry = nullptr; + if (auto err = git_config_next(&entry, it.get())) { + if (err == GIT_ITEROVER) break; + throw Error("iterating over .gitmodules: %s", git_error_last()->message); + } + entries.emplace(entry->name + 10, entry->value); + } + + std::vector result; + + for (auto & [key, value] : entries) { + if (!hasSuffix(key, ".path")) continue; + std::string key2(key, 0, key.size() - 5); + auto path = CanonPath(value); + result.push_back(Submodule { + .path = path, + .url = entries[key2 + ".url"], + .branch = entries[key2 + ".branch"], + }); + } + + return result; + } + + // Helper for statusCallback below. + static int statusCallbackTrampoline(const char * path, unsigned int statusFlags, void * payload) + { + return (*((std::function *) payload))(path, statusFlags); + } + + WorkdirInfo getWorkdirInfo() override + { + WorkdirInfo info; + + /* Get the head revision, if any. */ + git_oid headRev; + if (auto err = git_reference_name_to_id(&headRev, *this, "HEAD")) { + if (err != GIT_ENOTFOUND) + throw Error("resolving HEAD: %s", git_error_last()->message); + } else + info.headRev = toHash(headRev); + + /* Get all tracked files and determine whether the working + directory is dirty. */ + std::function statusCallback = [&](const char * path, unsigned int statusFlags) + { + if (!(statusFlags & GIT_STATUS_INDEX_DELETED) && + !(statusFlags & GIT_STATUS_WT_DELETED)) + info.files.insert(CanonPath(path)); + if (statusFlags != GIT_STATUS_CURRENT) + info.isDirty = true; + return 0; + }; + + git_status_options options = GIT_STATUS_OPTIONS_INIT; + options.flags |= GIT_STATUS_OPT_INCLUDE_UNMODIFIED; + options.flags |= GIT_STATUS_OPT_EXCLUDE_SUBMODULES; + if (git_status_foreach_ext(*this, &options, &statusCallbackTrampoline, &statusCallback)) + throw Error("getting working directory status: %s", git_error_last()->message); + + /* Get submodule info. */ + auto modulesFile = path + ".gitmodules"; + if (pathExists(modulesFile.abs())) + info.submodules = parseSubmodules(modulesFile); + + return info; + } + + std::optional getWorkdirRef() override + { + Reference ref; + if (git_reference_lookup(Setter(ref), *this, "HEAD")) + throw Error("looking up HEAD: %s", git_error_last()->message); + + if (auto target = git_reference_symbolic_target(ref.get())) + return target; + + return std::nullopt; + } + + std::vector> getSubmodules(const Hash & rev) override; + + std::string resolveSubmoduleUrl( + const std::string & url, + const std::string & base) override + { + git_buf buf = GIT_BUF_INIT; + if (git_submodule_resolve_url(&buf, *this, url.c_str())) + throw Error("resolving Git submodule URL '%s'", url); + Finally cleanup = [&]() { git_buf_dispose(&buf); }; + + std::string res(buf.ptr); + + if (!hasPrefix(res, "/") && res.find("://") == res.npos) + res = parseURL(base + "/" + res).canonicalise().to_string(); + + return res; + } + + bool hasObject(const Hash & oid_) override + { + auto oid = hashToOID(oid_); + + Object obj; + if (auto errCode = git_object_lookup(Setter(obj), *this, &oid, GIT_OBJECT_ANY)) { + if (errCode == GIT_ENOTFOUND) return false; + auto err = git_error_last(); + throw Error("getting Git object '%s': %s", oid, err->message); + } + + return true; + } + + ref getAccessor(const Hash & rev) override; + + static int sidebandProgressCallback(const char * str, int len, void * payload) + { + auto act = (Activity *) payload; + act->result(resFetchStatus, trim(std::string_view(str, len))); + return _isInterrupted ? -1 : 0; + } + + static int transferProgressCallback(const git_indexer_progress * stats, void * payload) + { + auto act = (Activity *) payload; + act->result(resFetchStatus, + fmt("%d/%d objects received, %d/%d deltas indexed, %.1f MiB", + stats->received_objects, + stats->total_objects, + stats->indexed_deltas, + stats->total_deltas, + stats->received_bytes / (1024.0 * 1024.0))); + return _isInterrupted ? -1 : 0; + } + + void fetch( + const std::string & url, + const std::string & refspec, + bool shallow) override + { + Activity act(*logger, lvlTalkative, actFetchTree, fmt("fetching Git repository '%s'", url)); + + Remote remote; + + if (git_remote_create_anonymous(Setter(remote), *this, url.c_str())) + throw Error("cannot create Git remote '%s': %s", url, git_error_last()->message); + + char * refspecs[] = {(char *) refspec.c_str()}; + git_strarray refspecs2 { + .strings = refspecs, + .count = 1 + }; + + git_fetch_options opts = GIT_FETCH_OPTIONS_INIT; + opts.depth = shallow ? 1 : GIT_FETCH_DEPTH_FULL; + opts.callbacks.payload = &act; + opts.callbacks.sideband_progress = sidebandProgressCallback; + opts.callbacks.transfer_progress = transferProgressCallback; + + if (git_remote_fetch(remote.get(), &refspecs2, &opts, nullptr)) + throw Error("fetching '%s' from '%s': %s", refspec, url, git_error_last()->message); + } + + void verifyCommit( + const Hash & rev, + const std::vector & publicKeys) override + { + // Create ad-hoc allowedSignersFile and populate it with publicKeys + auto allowedSignersFile = createTempFile().second; + std::string allowedSigners; + for (const fetchers::PublicKey & k : publicKeys) { + if (k.type != "ssh-dsa" + && k.type != "ssh-ecdsa" + && k.type != "ssh-ecdsa-sk" + && k.type != "ssh-ed25519" + && k.type != "ssh-ed25519-sk" + && k.type != "ssh-rsa") + throw Error("Unknown key type '%s'.\n" + "Please use one of\n" + "- ssh-dsa\n" + " ssh-ecdsa\n" + " ssh-ecdsa-sk\n" + " ssh-ed25519\n" + " ssh-ed25519-sk\n" + " ssh-rsa", k.type); + allowedSigners += "* " + k.type + " " + k.key + "\n"; + } + writeFile(allowedSignersFile, allowedSigners); + + // Run verification command + auto [status, output] = runProgram(RunOptions { + .program = "git", + .args = { + "-c", + "gpg.ssh.allowedSignersFile=" + allowedSignersFile, + "-C", path.abs(), + "verify-commit", + rev.gitRev() + }, + .mergeStderrToStdout = true, + }); + + /* Evaluate result through status code and checking if public + key fingerprints appear on stderr. This is neccessary + because the git command might also succeed due to the + commit being signed by gpg keys that are present in the + users key agent. */ + std::string re = R"(Good "git" signature for \* with .* key SHA256:[)"; + for (const fetchers::PublicKey & k : publicKeys){ + // Calculate sha256 fingerprint from public key and escape the regex symbol '+' to match the key literally + auto fingerprint = trim(hashString(htSHA256, base64Decode(k.key)).to_string(nix::HashFormat::Base64, false), "="); + auto escaped_fingerprint = std::regex_replace(fingerprint, std::regex("\\+"), "\\+" ); + re += "(" + escaped_fingerprint + ")"; + } + re += "]"; + if (status == 0 && std::regex_search(output, std::regex(re))) + printTalkative("Signature verification on commit %s succeeded.", rev.gitRev()); + else + throw Error("Commit signature verification on commit %s failed: %s", rev.gitRev(), output); + } +}; + +ref GitRepo::openRepo(const CanonPath & path, bool create, bool bare) +{ + return make_ref(path, create, bare); +} + +struct GitInputAccessor : InputAccessor +{ + ref repo; + Tree root; + + GitInputAccessor(ref repo_, const Hash & rev) + : repo(repo_) + , root(peelObject(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE)) + { + } + + std::string readBlob(const CanonPath & path, bool symlink) + { + auto blob = getBlob(path, symlink); + + auto data = std::string_view((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); + + return std::string(data); + } + + std::string readFile(const CanonPath & path) override + { + return readBlob(path, false); + } + + bool pathExists(const CanonPath & path) override + { + return path.isRoot() ? true : (bool) lookup(path); + } + + std::optional maybeLstat(const CanonPath & path) override + { + if (path.isRoot()) + return Stat { .type = tDirectory }; + + auto entry = lookup(path); + if (!entry) + return std::nullopt; + + auto mode = git_tree_entry_filemode(entry); + + if (mode == GIT_FILEMODE_TREE) + return Stat { .type = tDirectory }; + + else if (mode == GIT_FILEMODE_BLOB) + return Stat { .type = tRegular }; + + else if (mode == GIT_FILEMODE_BLOB_EXECUTABLE) + return Stat { .type = tRegular, .isExecutable = true }; + + else if (mode == GIT_FILEMODE_LINK) + return Stat { .type = tSymlink }; + + else if (mode == GIT_FILEMODE_COMMIT) + // Treat submodules as an empty directory. + return Stat { .type = tDirectory }; + + else + throw Error("file '%s' has an unsupported Git file type"); + } + + DirEntries readDirectory(const CanonPath & path) override + { + return std::visit(overloaded { + [&](Tree tree) { + DirEntries res; + + auto count = git_tree_entrycount(tree.get()); + + for (size_t n = 0; n < count; ++n) { + auto entry = git_tree_entry_byindex(tree.get(), n); + // FIXME: add to cache + res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{}); + } + + return res; + }, + [&](Submodule) { + return DirEntries(); + } + }, getTree(path)); + } + + std::string readLink(const CanonPath & path) override + { + return readBlob(path, true); + } + + Hash getSubmoduleRev(const CanonPath & path) + { + auto entry = need(path); + + if (git_tree_entry_type(entry) != GIT_OBJECT_COMMIT) + throw Error("'%s' is not a submodule", showPath(path)); + + return toHash(*git_tree_entry_id(entry)); + } + + std::map lookupCache; + + /* Recursively look up 'path' relative to the root. */ + git_tree_entry * lookup(const CanonPath & path) + { + if (path.isRoot()) return nullptr; + + auto i = lookupCache.find(path); + if (i == lookupCache.end()) { + TreeEntry entry; + if (auto err = git_tree_entry_bypath(Setter(entry), root.get(), std::string(path.rel()).c_str())) { + if (err != GIT_ENOTFOUND) + throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); + } + + i = lookupCache.emplace(path, std::move(entry)).first; + } + + return &*i->second; + } + + git_tree_entry * need(const CanonPath & path) + { + auto entry = lookup(path); + if (!entry) + throw Error("'%s' does not exist", showPath(path)); + return entry; + } + + struct Submodule { }; + + std::variant getTree(const CanonPath & path) + { + if (path.isRoot()) { + Tree tree; + if (git_tree_dup(Setter(tree), root.get())) + throw Error("duplicating directory '%s': %s", showPath(path), git_error_last()->message); + return tree; + } + + auto entry = need(path); + + if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT) + return Submodule(); + + if (git_tree_entry_type(entry) != GIT_OBJECT_TREE) + throw Error("'%s' is not a directory", showPath(path)); + + Tree tree; + if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *repo, entry)) + throw Error("looking up directory '%s': %s", showPath(path), git_error_last()->message); + + return tree; + } + + Blob getBlob(const CanonPath & path, bool expectSymlink) + { + auto notExpected = [&]() + { + throw Error( + expectSymlink + ? "'%s' is not a symlink" + : "'%s' is not a regular file", + showPath(path)); + }; + + if (path.isRoot()) notExpected(); + + auto entry = need(path); + + if (git_tree_entry_type(entry) != GIT_OBJECT_BLOB) + notExpected(); + + auto mode = git_tree_entry_filemode(entry); + if (expectSymlink) { + if (mode != GIT_FILEMODE_LINK) + notExpected(); + } else { + if (mode != GIT_FILEMODE_BLOB && mode != GIT_FILEMODE_BLOB_EXECUTABLE) + notExpected(); + } + + Blob blob; + if (git_tree_entry_to_object((git_object * *) (git_blob * *) Setter(blob), *repo, entry)) + throw Error("looking up file '%s': %s", showPath(path), git_error_last()->message); + + return blob; + } +}; + +ref GitRepoImpl::getAccessor(const Hash & rev) +{ + return make_ref(ref(shared_from_this()), rev); +} + +std::vector> GitRepoImpl::getSubmodules(const Hash & rev) +{ + /* Read the .gitmodules files from this revision. */ + CanonPath modulesFile(".gitmodules"); + + auto accessor = getAccessor(rev); + if (!accessor->pathExists(modulesFile)) return {}; + + /* Parse it and get the revision of each submodule. */ + auto configS = accessor->readFile(modulesFile); + + auto [fdTemp, pathTemp] = createTempFile("nix-git-submodules"); + writeFull(fdTemp.get(), configS); + + std::vector> result; + + for (auto & submodule : parseSubmodules(CanonPath(pathTemp))) { + auto rev = accessor.dynamic_pointer_cast()->getSubmoduleRev(submodule.path); + result.push_back({std::move(submodule), rev}); + } + + return result; +} + + +} diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh new file mode 100644 index 000000000..1def82071 --- /dev/null +++ b/src/libfetchers/git-utils.hh @@ -0,0 +1,90 @@ +#pragma once + +#include "input-accessor.hh" + +namespace nix { + +namespace fetchers { struct PublicKey; } + +struct GitRepo +{ + virtual ~GitRepo() + { } + + static ref openRepo(const CanonPath & path, bool create = false, bool bare = false); + + virtual uint64_t getRevCount(const Hash & rev) = 0; + + virtual uint64_t getLastModified(const Hash & rev) = 0; + + virtual bool isShallow() = 0; + + /* Return the commit hash to which a ref points. */ + virtual Hash resolveRef(std::string ref) = 0; + + /** + * Info about a submodule. + */ + struct Submodule + { + CanonPath path; + std::string url; + std::string branch; + }; + + struct WorkdirInfo + { + bool isDirty = false; + + /* The checked out commit, or nullopt if there are no commits + in the repo yet. */ + std::optional headRev; + + /* All files in the working directory that are unchanged, + modified or added, but excluding deleted files. */ + std::set files; + + /* The submodules listed in .gitmodules of this workdir. */ + std::vector submodules; + }; + + virtual WorkdirInfo getWorkdirInfo() = 0; + + /* Get the ref that HEAD points to. */ + virtual std::optional getWorkdirRef() = 0; + + /** + * Return the submodules of this repo at the indicated revision, + * along with the revision of each submodule. + */ + virtual std::vector> getSubmodules(const Hash & rev) = 0; + + virtual std::string resolveSubmoduleUrl( + const std::string & url, + const std::string & base) = 0; + + struct TarballInfo + { + Hash treeHash; + time_t lastModified; + }; + + virtual bool hasObject(const Hash & oid) = 0; + + virtual ref getAccessor(const Hash & rev) = 0; + + virtual void fetch( + const std::string & url, + const std::string & refspec, + bool shallow) = 0; + + /** + * Verify that commit `rev` is signed by one of the keys in + * `publicKeys`. Throw an error if it isn't. + */ + virtual void verifyCommit( + const Hash & rev, + const std::vector & publicKeys) = 0; +}; + +} diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index cc735996b..7208a0b6d 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -8,6 +8,9 @@ #include "pathlocks.hh" #include "processes.hh" #include "git.hh" +#include "fs-input-accessor.hh" +#include "mounted-input-accessor.hh" +#include "git-utils.hh" #include "fetch-settings.hh" @@ -133,189 +136,19 @@ std::optional readHeadCached(const std::string & actualUrl) return std::nullopt; } -bool isNotDotGitDirectory(const Path & path) +std::vector getPublicKeys(const Attrs & attrs) { - return baseNameOf(path) != ".git"; -} - -struct WorkdirInfo -{ - bool clean = false; - bool hasHead = false; -}; - -std::vector getPublicKeys(const Attrs & attrs) { std::vector publicKeys; if (attrs.contains("publicKeys")) { nlohmann::json publicKeysJson = nlohmann::json::parse(getStrAttr(attrs, "publicKeys")); ensureType(publicKeysJson, nlohmann::json::value_t::array); publicKeys = publicKeysJson.get>(); } - else { - publicKeys = {}; - } if (attrs.contains("publicKey")) publicKeys.push_back(PublicKey{maybeGetStrAttr(attrs, "keytype").value_or("ssh-ed25519"),getStrAttr(attrs, "publicKey")}); return publicKeys; } -void doCommitVerification(const Path repoDir, const Path gitDir, const std::string rev, const std::vector& publicKeys) { - // Create ad-hoc allowedSignersFile and populate it with publicKeys - auto allowedSignersFile = createTempFile().second; - std::string allowedSigners; - for (const PublicKey& k : publicKeys) { - if (k.type != "ssh-dsa" - && k.type != "ssh-ecdsa" - && k.type != "ssh-ecdsa-sk" - && k.type != "ssh-ed25519" - && k.type != "ssh-ed25519-sk" - && k.type != "ssh-rsa") - warn("Unknown keytype: %s\n" - "Please use one of\n" - "- ssh-dsa\n" - " ssh-ecdsa\n" - " ssh-ecdsa-sk\n" - " ssh-ed25519\n" - " ssh-ed25519-sk\n" - " ssh-rsa", k.type); - allowedSigners += "* " + k.type + " " + k.key + "\n"; - } - writeFile(allowedSignersFile, allowedSigners); - - // Run verification command - auto [status, output] = runProgram(RunOptions { - .program = "git", - .args = {"-c", "gpg.ssh.allowedSignersFile=" + allowedSignersFile, "-C", repoDir, - "--git-dir", gitDir, "verify-commit", rev}, - .mergeStderrToStdout = true, - }); - - /* Evaluate result through status code and checking if public key fingerprints appear on stderr - * This is neccessary because the git command might also succeed due to the commit being signed by gpg keys - * that are present in the users key agent. */ - std::string re = R"(Good "git" signature for \* with .* key SHA256:[)"; - for (const PublicKey& k : publicKeys){ - // Calculate sha256 fingerprint from public key and escape the regex symbol '+' to match the key literally - auto fingerprint = trim(hashString(htSHA256, base64Decode(k.key)).to_string(nix::HashFormat::Base64, false), "="); - auto escaped_fingerprint = std::regex_replace(fingerprint, std::regex("\\+"), "\\+" ); - re += "(" + escaped_fingerprint + ")"; - } - re += "]"; - if (status == 0 && std::regex_search(output, std::regex(re))) - printTalkative("Signature verification on commit %s succeeded", rev); - else - throw Error("Commit signature verification on commit %s failed: \n%s", rev, output); -} - -// Returns whether a git workdir is clean and has commits. -WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir) -{ - const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - std::string gitDir(".git"); - - auto env = getEnv(); - // Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong - // that way unknown errors can lead to a failure instead of continuing through the wrong code path - env["LC_ALL"] = "C"; - - /* Check whether HEAD points to something that looks like a commit, - since that is the refrence we want to use later on. */ - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" }, - .environment = env, - .mergeStderrToStdout = true - }); - auto exitCode = WEXITSTATUS(result.first); - auto errorMessage = result.second; - - if (errorMessage.find("fatal: not a git repository") != std::string::npos) { - throw Error("'%s' is not a Git repository", workdir); - } else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) { - // indicates that the repo does not have any commits - // we want to proceed and will consider it dirty later - } else if (exitCode != 0) { - // any other errors should lead to a failure - throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage); - } - - bool clean = false; - bool hasHead = exitCode == 0; - - try { - if (hasHead) { - // Using git diff is preferrable over lower-level operations here, - // because its conceptually simpler and we only need the exit code anyways. - auto gitDiffOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "diff", "HEAD", "--quiet"}); - if (!submodules) { - // Changes in submodules should only make the tree dirty - // when those submodules will be copied as well. - gitDiffOpts.emplace_back("--ignore-submodules"); - } - gitDiffOpts.emplace_back("--"); - runProgram("git", true, gitDiffOpts); - - clean = true; - } - } catch (ExecError & e) { - if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw; - } - - return WorkdirInfo { .clean = clean, .hasHead = hasHead }; -} - -std::pair fetchFromWorkdir(ref store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo) -{ - const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - auto gitDir = ".git"; - - if (!fetchSettings.allowDirty) - throw Error("Git tree '%s' is dirty", workdir); - - if (fetchSettings.warnDirty) - warn("Git tree '%s' is dirty", workdir); - - auto gitOpts = Strings({ "-C", workdir, "--git-dir", gitDir, "ls-files", "-z" }); - if (submodules) - gitOpts.emplace_back("--recurse-submodules"); - - auto files = tokenizeString>( - runProgram("git", true, gitOpts), "\0"s); - - Path actualPath(absPath(workdir)); - - PathFilter filter = [&](const Path & p) -> bool { - assert(hasPrefix(p, actualPath)); - std::string file(p, actualPath.size() + 1); - - auto st = lstat(p); - - if (S_ISDIR(st.st_mode)) { - auto prefix = file + "/"; - auto i = files.lower_bound(prefix); - return i != files.end() && hasPrefix(*i, prefix); - } - - return files.count(file); - }; - - auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter); - - // FIXME: maybe we should use the timestamp of the last - // modified dirty file? - input.attrs.insert_or_assign( - "lastModified", - workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0); - - if (workdirInfo.hasHead) { - input.attrs.insert_or_assign("dirtyRev", chomp( - runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "HEAD" })) + "-dirty"); - input.attrs.insert_or_assign("dirtyShortRev", chomp( - runProgram("git", true, { "-C", actualPath, "--git-dir", gitDir, "rev-parse", "--verify", "--short", "HEAD" })) + "-dirty"); - } - - return {std::move(storePath), input}; -} } // end namespace struct GitInputScheme : InputScheme @@ -386,9 +219,6 @@ struct GitInputScheme : InputScheme || name == "publicKeys") experimentalFeatureSettings.require(Xp::VerifiedFetches); - maybeGetBoolAttr(attrs, "shallow"); - maybeGetBoolAttr(attrs, "submodules"); - maybeGetBoolAttr(attrs, "allRefs"); maybeGetBoolAttr(attrs, "verifyCommit"); if (auto ref = maybeGetStrAttr(attrs, "ref")) { @@ -401,6 +231,9 @@ struct GitInputScheme : InputScheme auto url = fixGitURL(getStrAttr(attrs, "url")); parseURL(url); input.attrs["url"] = url; + getShallowAttr(input); + getSubmodulesAttr(input); + getAllRefsAttr(input); return input; } @@ -410,8 +243,10 @@ struct GitInputScheme : InputScheme if (url.scheme != "git") url.scheme = "git+" + url.scheme; if (auto rev = input.getRev()) url.query.insert_or_assign("rev", rev->gitRev()); if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref); - if (maybeGetBoolAttr(input.attrs, "shallow").value_or(false)) + if (getShallowAttr(input)) url.query.insert_or_assign("shallow", "1"); + if (getSubmodulesAttr(input)) + url.query.insert_or_assign("submodules", "1"); if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false)) url.query.insert_or_assign("verifyCommit", "1"); auto publicKeys = getPublicKeys(input.attrs); @@ -439,11 +274,11 @@ struct GitInputScheme : InputScheme void clone(const Input & input, const Path & destDir) const override { - auto [isLocal, actualUrl] = getActualUrl(input); + auto repoInfo = getRepoInfo(input); Strings args = {"clone"}; - args.push_back(actualUrl); + args.push_back(repoInfo.url); if (auto ref = input.getRef()) { args.push_back("--branch"); @@ -459,10 +294,9 @@ struct GitInputScheme : InputScheme std::optional getSourcePath(const Input & input) const override { - auto url = parseURL(getStrAttr(input.attrs, "url")); - if (url.scheme == "file" && !input.getRef() && !input.getRev()) - return url.path; - return {}; + auto repoInfo = getRepoInfo(input); + if (repoInfo.isLocal) return repoInfo.url; + return std::nullopt; } void putFile( @@ -471,24 +305,74 @@ struct GitInputScheme : InputScheme std::string_view contents, std::optional commitMsg) const override { - auto root = getSourcePath(input); - if (!root) + auto repoInfo = getRepoInfo(input); + if (!repoInfo.isLocal) throw Error("cannot commit '%s' to Git repository '%s' because it's not a working tree", path, input.to_string()); - writeFile((CanonPath(*root) + path).abs(), contents); - - auto gitDir = ".git"; + writeFile((CanonPath(repoInfo.url) + path).abs(), contents); runProgram("git", true, - { "-C", *root, "--git-dir", gitDir, "add", "--intent-to-add", "--", std::string(path.rel()) }); + { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "add", "--intent-to-add", "--", std::string(path.rel()) }); if (commitMsg) runProgram("git", true, - { "-C", *root, "--git-dir", gitDir, "commit", std::string(path.rel()), "-m", *commitMsg }); + { "-C", repoInfo.url, "--git-dir", repoInfo.gitDir, "commit", std::string(path.rel()), "-m", *commitMsg }); } - std::pair getActualUrl(const Input & input) const + struct RepoInfo { + /* Whether this is a local, non-bare repository. */ + bool isLocal = false; + + /* Working directory info: the complete list of files, and + whether the working directory is dirty compared to HEAD. */ + GitRepo::WorkdirInfo workdirInfo; + + /* URL of the repo, or its path if isLocal. Never a `file` URL. */ + std::string url; + + void warnDirty() const + { + if (workdirInfo.isDirty) { + if (!fetchSettings.allowDirty) + throw Error("Git tree '%s' is dirty", url); + + if (fetchSettings.warnDirty) + warn("Git tree '%s' is dirty", url); + } + } + + std::string gitDir = ".git"; + }; + + bool getShallowAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "shallow").value_or(false); + } + + bool getSubmodulesAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "submodules").value_or(false); + } + + bool getAllRefsAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); + } + + RepoInfo getRepoInfo(const Input & input) const + { + auto checkHashType = [&](const std::optional & hash) + { + if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256)) + throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(HashFormat::Base16, true)); + }; + + if (auto rev = input.getRev()) + checkHashType(rev); + + RepoInfo repoInfo; + // file:// URIs are normally not cloned (but otherwise treated the // same as remote URIs, i.e. we don't use the working tree or // HEAD). Exception: If _NIX_FORCE_HTTP is set, or the repo is a bare git @@ -496,162 +380,132 @@ struct GitInputScheme : InputScheme static bool forceHttp = getEnv("_NIX_FORCE_HTTP") == "1"; // for testing auto url = parseURL(getStrAttr(input.attrs, "url")); bool isBareRepository = url.scheme == "file" && !pathExists(url.path + "/.git"); - bool isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; - return {isLocal, isLocal ? url.path : url.base}; + repoInfo.isLocal = url.scheme == "file" && !forceHttp && !isBareRepository; + repoInfo.url = repoInfo.isLocal ? url.path : url.base; + + // If this is a local directory and no ref or revision is + // given, then allow the use of an unclean working tree. + if (!input.getRef() && !input.getRev() && repoInfo.isLocal) + repoInfo.workdirInfo = GitRepo::openRepo(CanonPath(repoInfo.url))->getWorkdirInfo(); + + return repoInfo; } - std::pair fetch(ref store, const Input & _input) override + uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const { - Input input(_input); - auto gitDir = ".git"; + Attrs key{{"_what", "gitLastModified"}, {"rev", rev.gitRev()}}; + + auto cache = getCache(); + + if (auto res = cache->lookup(key)) + return getIntAttr(*res, "lastModified"); + + auto lastModified = GitRepo::openRepo(CanonPath(repoDir))->getLastModified(rev); + + cache->upsert(key, Attrs{{"lastModified", lastModified}}); + + return lastModified; + } + + uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const + { + Attrs key{{"_what", "gitRevCount"}, {"rev", rev.gitRev()}}; + + auto cache = getCache(); + + if (auto revCountAttrs = cache->lookup(key)) + return getIntAttr(*revCountAttrs, "revCount"); + + Activity act(*logger, lvlChatty, actUnknown, fmt("getting Git revision count of '%s'", repoInfo.url)); + + auto revCount = GitRepo::openRepo(CanonPath(repoDir))->getRevCount(rev); + + cache->upsert(key, Attrs{{"revCount", revCount}}); + + return revCount; + } + + std::string getDefaultRef(const RepoInfo & repoInfo) const + { + auto head = repoInfo.isLocal + ? GitRepo::openRepo(CanonPath(repoInfo.url))->getWorkdirRef() + : readHeadCached(repoInfo.url); + if (!head) { + warn("could not read HEAD ref from repo at '%s', using 'master'", repoInfo.url); + return "master"; + } + return *head; + } + + static MakeNotAllowedError makeNotAllowedError(std::string url) + { + return [url{std::move(url)}](const CanonPath & path) -> RestrictedPathError + { + if (nix::pathExists(path.abs())) + return RestrictedPathError("access to path '%s' is forbidden because it is not under Git control; maybe you should 'git add' it to the repository '%s'?", path, url); + else + return RestrictedPathError("path '%s' does not exist in Git repository '%s'", path, url); + }; + } + + void verifyCommit(const Input & input, std::shared_ptr repo) const + { + auto publicKeys = getPublicKeys(input.attrs); + auto verifyCommit = maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(!publicKeys.empty()); + + if (verifyCommit) { + if (input.getRev() && repo) + repo->verifyCommit(*input.getRev(), publicKeys); + else + throw Error("commit verification is required for Git repository '%s', but it's dirty", input.to_string()); + } + } + + std::pair, Input> getAccessorFromCommit( + ref store, + RepoInfo & repoInfo, + Input && input) const + { + assert(!repoInfo.workdirInfo.isDirty); + + auto origRev = input.getRev(); std::string name = input.getName(); - bool shallow = maybeGetBoolAttr(input.attrs, "shallow").value_or(false); - bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false); - bool allRefs = maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); - std::vector publicKeys = getPublicKeys(input.attrs); - bool verifyCommit = maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(!publicKeys.empty()); - - std::string cacheType = "git"; - if (shallow) cacheType += "-shallow"; - if (submodules) cacheType += "-submodules"; - if (allRefs) cacheType += "-all-refs"; - - auto checkHashType = [&](const std::optional & hash) - { - if (hash.has_value() && !(hash->type == htSHA1 || hash->type == htSHA256)) - throw Error("Hash '%s' is not supported by Git. Supported types are sha1 and sha256.", hash->to_string(HashFormat::Base16, true)); - }; - - auto getLockedAttrs = [&]() - { - checkHashType(input.getRev()); - - return Attrs({ - {"type", cacheType}, - {"name", name}, - {"rev", input.getRev()->gitRev()}, - {"verifyCommit", verifyCommit}, - {"publicKeys", publicKeys_to_string(publicKeys)}, - }); - }; - - auto makeResult = [&](const Attrs & infoAttrs, StorePath && storePath) - -> std::pair - { - assert(input.getRev()); - assert(!_input.getRev() || _input.getRev() == input.getRev()); - if (!shallow) - input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); - input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); - return {std::move(storePath), input}; - }; - - if (input.getRev()) { - if (auto res = getCache()->lookup(store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); - } - - auto [isLocal, actualUrl_] = getActualUrl(input); - auto actualUrl = actualUrl_; // work around clang bug - - /* If this is a local directory, no ref or revision is given and no signature verification is needed, - allow fetching directly from a dirty workdir. */ - if (!input.getRef() && !input.getRev() && isLocal) { - auto workdirInfo = getWorkdirInfo(input, actualUrl); - if (!workdirInfo.clean) { - if (verifyCommit) - throw Error("Can't fetch from a dirty workdir with commit signature verification enabled."); - else - return fetchFromWorkdir(store, input, actualUrl, workdirInfo); - } - } - - Attrs unlockedAttrs({ - {"type", cacheType}, - {"name", name}, - {"url", actualUrl}, - {"verifyCommit", verifyCommit}, - {"publicKeys", publicKeys_to_string(publicKeys)}, - }); + auto originalRef = input.getRef(); + auto ref = originalRef ? *originalRef : getDefaultRef(repoInfo); + input.attrs.insert_or_assign("ref", ref); Path repoDir; - if (isLocal) { - if (!input.getRef()) { - auto head = readHead(actualUrl); - if (!head) { - warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); - head = "master"; - } - input.attrs.insert_or_assign("ref", *head); - unlockedAttrs.insert_or_assign("ref", *head); - } - + if (repoInfo.isLocal) { + repoDir = repoInfo.url; if (!input.getRev()) - input.attrs.insert_or_assign("rev", - Hash::parseAny(chomp(runProgram("git", true, { "-C", actualUrl, "--git-dir", gitDir, "rev-parse", *input.getRef() })), htSHA1).gitRev()); - - repoDir = actualUrl; + input.attrs.insert_or_assign("rev", GitRepo::openRepo(CanonPath(repoDir))->resolveRef(ref).gitRev()); } else { - const bool useHeadRef = !input.getRef(); - if (useHeadRef) { - auto head = readHeadCached(actualUrl); - if (!head) { - warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl); - head = "master"; - } - input.attrs.insert_or_assign("ref", *head); - unlockedAttrs.insert_or_assign("ref", *head); - } else { - if (!input.getRev()) { - unlockedAttrs.insert_or_assign("ref", input.getRef().value()); - } - } - - if (auto res = getCache()->lookup(store, unlockedAttrs)) { - auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1); - if (!input.getRev() || input.getRev() == rev2) { - input.attrs.insert_or_assign("rev", rev2.gitRev()); - return makeResult(res->first, std::move(res->second)); - } - } - - Path cacheDir = getCachePath(actualUrl); + Path cacheDir = getCachePath(repoInfo.url); repoDir = cacheDir; - gitDir = "."; + repoInfo.gitDir = "."; createDirs(dirOf(cacheDir)); - PathLocks cacheDirLock({cacheDir + ".lock"}); + PathLocks cacheDirLock({cacheDir}); - if (!pathExists(cacheDir)) { - runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", "--bare", repoDir }); - } + auto repo = GitRepo::openRepo(CanonPath(cacheDir), true, true); Path localRefFile = - input.getRef()->compare(0, 5, "refs/") == 0 - ? cacheDir + "/" + *input.getRef() - : cacheDir + "/refs/heads/" + *input.getRef(); + ref.compare(0, 5, "refs/") == 0 + ? cacheDir + "/" + ref + : cacheDir + "/refs/heads/" + ref; bool doFetch; time_t now = time(0); /* If a rev was specified, we need to fetch if it's not in the repo. */ - if (input.getRev()) { - try { - runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() }); - doFetch = false; - } catch (ExecError & e) { - if (WIFEXITED(e.status)) { - doFetch = true; - } else { - throw; - } - } + if (auto rev = input.getRev()) { + doFetch = !repo->hasObject(*rev); } else { - if (allRefs) { + if (getAllRefsAttr(input)) { doFetch = true; } else { /* If the local ref is older than ‘tarball-ttl’ seconds, do a @@ -663,163 +517,193 @@ struct GitInputScheme : InputScheme } if (doFetch) { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching Git repository '%s'", actualUrl)); - - // FIXME: git stderr messes up our progress indicator, so - // we're using --quiet for now. Should process its stderr. try { - auto ref = input.getRef(); - auto fetchRef = allRefs + auto fetchRef = getAllRefsAttr(input) ? "refs/*" - : ref->compare(0, 5, "refs/") == 0 - ? *ref - : ref == "HEAD" - ? *ref - : "refs/heads/" + *ref; - runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "fetch", "--quiet", "--force", "--", actualUrl, fmt("%s:%s", fetchRef, fetchRef) }, {}, true); + : ref.compare(0, 5, "refs/") == 0 + ? ref + : ref == "HEAD" + ? ref + : "refs/heads/" + ref; + + repo->fetch(repoInfo.url, fmt("%s:%s", fetchRef, fetchRef), getShallowAttr(input)); } catch (Error & e) { if (!pathExists(localRefFile)) throw; - warn("could not update local clone of Git repository '%s'; continuing with the most recent version", actualUrl); + logError(e.info()); + warn("could not update local clone of Git repository '%s'; continuing with the most recent version", repoInfo.url); } if (!touchCacheFile(localRefFile, now)) warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno)); - if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef())) - warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl); + if (!originalRef && !storeCachedHead(repoInfo.url, ref)) + warn("could not update cached head '%s' for '%s'", ref, repoInfo.url); } - if (!input.getRev()) + if (auto rev = input.getRev()) { + if (!repo->hasObject(*rev)) + throw Error( + "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " + "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " + ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD + "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", + rev->gitRev(), + ref, + repoInfo.url + ); + } else input.attrs.insert_or_assign("rev", Hash::parseAny(chomp(readFile(localRefFile)), htSHA1).gitRev()); // cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder } - bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true"; + auto repo = GitRepo::openRepo(CanonPath(repoDir)); - if (isShallow && !shallow) - throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified.", actualUrl); + auto isShallow = repo->isShallow(); - // FIXME: check whether rev is an ancestor of ref. + if (isShallow && !getShallowAttr(input)) + throw Error("'%s' is a shallow Git repository, but shallow repositories are only allowed when `shallow = true;` is specified", repoInfo.url); - printTalkative("using revision %s of repo '%s'", input.getRev()->gitRev(), actualUrl); + // FIXME: check whether rev is an ancestor of ref? - /* Now that we know the ref, check again whether we have it in - the store. */ - if (auto res = getCache()->lookup(store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); - - Path tmpDir = createTempDir(); - AutoDelete delTmpDir(tmpDir, true); - PathFilter filter = defaultPathFilter; - - auto result = runProgram(RunOptions { - .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() }, - .mergeStderrToStdout = true - }); - if (WEXITSTATUS(result.first) == 128 - && result.second.find("bad file") != std::string::npos) - { - throw Error( - "Cannot find Git revision '%s' in ref '%s' of repository '%s'! " - "Please make sure that the " ANSI_BOLD "rev" ANSI_NORMAL " exists on the " - ANSI_BOLD "ref" ANSI_NORMAL " you've specified or add " ANSI_BOLD - "allRefs = true;" ANSI_NORMAL " to " ANSI_BOLD "fetchGit" ANSI_NORMAL ".", - input.getRev()->gitRev(), - *input.getRef(), - actualUrl - ); - } - - if (verifyCommit) - doCommitVerification(repoDir, gitDir, input.getRev()->gitRev(), publicKeys); - - if (submodules) { - Path tmpGitDir = createTempDir(); - AutoDelete delTmpGitDir(tmpGitDir, true); - - runProgram("git", true, { "-c", "init.defaultBranch=" + gitInitialBranch, "init", tmpDir, "--separate-git-dir", tmpGitDir }); - - { - // TODO: repoDir might lack the ref (it only checks if rev - // exists, see FIXME above) so use a big hammer and fetch - // everything to ensure we get the rev. - Activity act(*logger, lvlTalkative, actUnknown, fmt("making temporary clone of '%s'", repoDir)); - runProgram("git", true, { "-C", tmpDir, "fetch", "--quiet", "--force", - "--update-head-ok", "--", repoDir, "refs/*:refs/*" }, {}, true); - } - - runProgram("git", true, { "-C", tmpDir, "checkout", "--quiet", input.getRev()->gitRev() }); - - /* Ensure that we use the correct origin for fetching - submodules. This matters for submodules with relative - URLs. */ - if (isLocal) { - writeFile(tmpGitDir + "/config", readFile(repoDir + "/" + gitDir + "/config")); - - /* Restore the config.bare setting we may have just - copied erroneously from the user's repo. */ - runProgram("git", true, { "-C", tmpDir, "config", "core.bare", "false" }); - } else - runProgram("git", true, { "-C", tmpDir, "config", "remote.origin.url", actualUrl }); - - /* As an optimisation, copy the modules directory of the - source repo if it exists. */ - auto modulesPath = repoDir + "/" + gitDir + "/modules"; - if (pathExists(modulesPath)) { - Activity act(*logger, lvlTalkative, actUnknown, fmt("copying submodules of '%s'", actualUrl)); - runProgram("cp", true, { "-R", "--", modulesPath, tmpGitDir + "/modules" }); - } - - { - Activity act(*logger, lvlTalkative, actUnknown, fmt("fetching submodules of '%s'", actualUrl)); - runProgram("git", true, { "-C", tmpDir, "submodule", "--quiet", "update", "--init", "--recursive" }, {}, true); - } - - filter = isNotDotGitDirectory; - } else { - // FIXME: should pipe this, or find some better way to extract a - // revision. - auto source = sinkToSource([&](Sink & sink) { - runProgram2({ - .program = "git", - .args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() }, - .standardOut = &sink - }); - }); - - unpackTarfile(*source, tmpDir); - } - - auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter); - - auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() })); + auto rev = *input.getRev(); Attrs infoAttrs({ - {"rev", input.getRev()->gitRev()}, - {"lastModified", lastModified}, + {"rev", rev.gitRev()}, + {"lastModified", getLastModified(repoInfo, repoDir, rev)}, }); - if (!shallow) + if (!getShallowAttr(input)) infoAttrs.insert_or_assign("revCount", - std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() }))); + getRevCount(repoInfo, repoDir, rev)); - if (!_input.getRev()) - getCache()->add( - store, - unlockedAttrs, - infoAttrs, - storePath, - false); + printTalkative("using revision %s of repo '%s'", rev.gitRev(), repoInfo.url); - getCache()->add( - store, - getLockedAttrs(), - infoAttrs, - storePath, - true); + verifyCommit(input, repo); - return makeResult(infoAttrs, std::move(storePath)); + auto accessor = repo->getAccessor(rev); + + /* If the repo has submodules, fetch them and return a mounted + input accessor consisting of the accessor for the top-level + repo and the accessors for the submodules. */ + if (getSubmodulesAttr(input)) { + std::map> mounts; + + for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) { + auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url); + debug("Git submodule %s: %s %s %s -> %s", + submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved); + fetchers::Attrs attrs; + attrs.insert_or_assign("type", "git"); + attrs.insert_or_assign("url", resolved); + if (submodule.branch != "") + attrs.insert_or_assign("ref", submodule.branch); + attrs.insert_or_assign("rev", submoduleRev.gitRev()); + auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs)); + auto [submoduleAccessor, submoduleInput2] = + submoduleInput.getAccessor(store); + mounts.insert_or_assign(submodule.path, submoduleAccessor); + } + + if (!mounts.empty()) { + mounts.insert_or_assign(CanonPath::root, accessor); + accessor = makeMountedInputAccessor(std::move(mounts)); + } + } + + assert(!origRev || origRev == rev); + if (!getShallowAttr(input)) + input.attrs.insert_or_assign("revCount", getIntAttr(infoAttrs, "revCount")); + input.attrs.insert_or_assign("lastModified", getIntAttr(infoAttrs, "lastModified")); + + return {accessor, std::move(input)}; + } + + std::pair, Input> getAccessorFromWorkdir( + ref store, + RepoInfo & repoInfo, + Input && input) const + { + if (getSubmodulesAttr(input)) + /* Create mountpoints for the submodules. */ + for (auto & submodule : repoInfo.workdirInfo.submodules) + repoInfo.workdirInfo.files.insert(submodule.path); + + ref accessor = + makeFSInputAccessor(CanonPath(repoInfo.url), repoInfo.workdirInfo.files, makeNotAllowedError(repoInfo.url)); + + /* If the repo has submodules, return a mounted input accessor + consisting of the accessor for the top-level repo and the + accessors for the submodule workdirs. */ + if (getSubmodulesAttr(input) && !repoInfo.workdirInfo.submodules.empty()) { + std::map> mounts; + + for (auto & submodule : repoInfo.workdirInfo.submodules) { + auto submodulePath = CanonPath(repoInfo.url) + submodule.path; + fetchers::Attrs attrs; + attrs.insert_or_assign("type", "git"); + attrs.insert_or_assign("url", submodulePath.abs()); + auto submoduleInput = fetchers::Input::fromAttrs(std::move(attrs)); + auto [submoduleAccessor, submoduleInput2] = + submoduleInput.getAccessor(store); + + /* If the submodule is dirty, mark this repo dirty as + well. */ + if (!submoduleInput2.getRev()) + repoInfo.workdirInfo.isDirty = true; + + mounts.insert_or_assign(submodule.path, submoduleAccessor); + } + + mounts.insert_or_assign(CanonPath::root, accessor); + accessor = makeMountedInputAccessor(std::move(mounts)); + } + + if (!repoInfo.workdirInfo.isDirty) { + auto repo = GitRepo::openRepo(CanonPath(repoInfo.url)); + + if (auto ref = repo->getWorkdirRef()) + input.attrs.insert_or_assign("ref", *ref); + + auto rev = repoInfo.workdirInfo.headRev.value(); + + input.attrs.insert_or_assign("rev", rev.gitRev()); + input.attrs.insert_or_assign("revCount", getRevCount(repoInfo, repoInfo.url, rev)); + + verifyCommit(input, repo); + } else { + repoInfo.warnDirty(); + + if (repoInfo.workdirInfo.headRev) { + input.attrs.insert_or_assign("dirtyRev", + repoInfo.workdirInfo.headRev->gitRev() + "-dirty"); + input.attrs.insert_or_assign("dirtyShortRev", + repoInfo.workdirInfo.headRev->gitShortRev() + "-dirty"); + } + + verifyCommit(input, nullptr); + } + + input.attrs.insert_or_assign( + "lastModified", + repoInfo.workdirInfo.headRev + ? getLastModified(repoInfo, repoInfo.url, *repoInfo.workdirInfo.headRev) + : 0); + + input.locked = true; // FIXME + + return {accessor, std::move(input)}; + } + + std::pair, Input> getAccessor(ref store, const Input & _input) const override + { + Input input(_input); + + auto repoInfo = getRepoInfo(input); + + return + input.getRef() || input.getRev() || !repoInfo.isLocal + ? getAccessorFromCommit(store, repoInfo, std::move(input)) + : getAccessorFromWorkdir(store, repoInfo, std::move(input)); } }; diff --git a/src/libfetchers/local.mk b/src/libfetchers/local.mk index 2e8869d83..266e7a211 100644 --- a/src/libfetchers/local.mk +++ b/src/libfetchers/local.mk @@ -8,6 +8,6 @@ libfetchers_SOURCES := $(wildcard $(d)/*.cc) libfetchers_CXXFLAGS += -I src/libutil -I src/libstore -libfetchers_LDFLAGS += -pthread +libfetchers_LDFLAGS += -pthread $(LIBGIT2_LIBS) -larchive libfetchers_LIBS = libutil libstore diff --git a/src/libfetchers/mounted-input-accessor.cc b/src/libfetchers/mounted-input-accessor.cc new file mode 100644 index 000000000..6f397eb17 --- /dev/null +++ b/src/libfetchers/mounted-input-accessor.cc @@ -0,0 +1,77 @@ +#include "mounted-input-accessor.hh" + +namespace nix { + +struct MountedInputAccessor : InputAccessor +{ + std::map> mounts; + + MountedInputAccessor(std::map> _mounts) + : mounts(std::move(_mounts)) + { + // Currently we require a root filesystem. This could be relaxed. + assert(mounts.contains(CanonPath::root)); + + // FIXME: return dummy parent directories automatically? + } + + std::string readFile(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readFile(subpath); + } + + bool pathExists(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->pathExists(subpath); + } + + std::optional maybeLstat(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->maybeLstat(subpath); + } + + DirEntries readDirectory(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readDirectory(subpath); + } + + std::string readLink(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->readLink(subpath); + } + + std::string showPath(const CanonPath & path) override + { + auto [accessor, subpath] = resolve(path); + return accessor->showPath(subpath); + } + + std::pair, CanonPath> resolve(CanonPath path) + { + // Find the nearest parent of `path` that is a mount point. + std::vector subpath; + while (true) { + auto i = mounts.find(path); + if (i != mounts.end()) { + std::reverse(subpath.begin(), subpath.end()); + return {i->second, CanonPath(subpath)}; + } + + assert(!path.isRoot()); + subpath.push_back(std::string(*path.baseName())); + path.pop(); + } + } +}; + +ref makeMountedInputAccessor(std::map> mounts) +{ + return make_ref(std::move(mounts)); +} + +} diff --git a/src/libfetchers/mounted-input-accessor.hh b/src/libfetchers/mounted-input-accessor.hh new file mode 100644 index 000000000..b557c5dad --- /dev/null +++ b/src/libfetchers/mounted-input-accessor.hh @@ -0,0 +1,9 @@ +#pragma once + +#include "input-accessor.hh" + +namespace nix { + +ref makeMountedInputAccessor(std::map> mounts); + +} diff --git a/src/libmain/progress-bar.cc b/src/libmain/progress-bar.cc index a7aee47c3..3aa012ee1 100644 --- a/src/libmain/progress-bar.cc +++ b/src/libmain/progress-bar.cc @@ -340,6 +340,14 @@ public: state->activitiesByType[type].expected += j; update(*state); } + + else if (type == resFetchStatus) { + auto i = state->its.find(act); + assert(i != state->its.end()); + ActInfo & actInfo = *i->second; + actInfo.lastLine = getS(fields, 0); + update(*state); + } } void update(State & state) diff --git a/src/libutil/canon-path.cc b/src/libutil/canon-path.cc index f678fae94..1e465f1f6 100644 --- a/src/libutil/canon-path.cc +++ b/src/libutil/canon-path.cc @@ -13,6 +13,13 @@ CanonPath::CanonPath(std::string_view raw, const CanonPath & root) : path(absPath((Path) raw, root.abs())) { } +CanonPath::CanonPath(const std::vector & elems) + : path("/") +{ + for (auto & s : elems) + push(s); +} + CanonPath CanonPath::fromCwd(std::string_view path) { return CanonPath(unchecked_t(), absPath((Path) path)); diff --git a/src/libutil/canon-path.hh b/src/libutil/canon-path.hh index eefe05ed5..6d0519f4f 100644 --- a/src/libutil/canon-path.hh +++ b/src/libutil/canon-path.hh @@ -6,6 +6,7 @@ #include #include #include +#include namespace nix { @@ -46,6 +47,11 @@ public: : path(std::move(path)) { } + /** + * Construct a canon path from a vector of elements. + */ + CanonPath(const std::vector & elems); + static CanonPath fromCwd(std::string_view path = "."); static CanonPath root; diff --git a/src/libutil/logging.hh b/src/libutil/logging.hh index 5aa6bee95..183f2d8e1 100644 --- a/src/libutil/logging.hh +++ b/src/libutil/logging.hh @@ -23,6 +23,7 @@ typedef enum { actQueryPathInfo = 109, actPostBuildHook = 110, actBuildWaiting = 111, + actFetchTree = 112, } ActivityType; typedef enum { @@ -34,6 +35,7 @@ typedef enum { resProgress = 105, resSetExpected = 106, resPostBuildLogLine = 107, + resFetchStatus = 108, } ResultType; typedef uint64_t ActivityId; diff --git a/src/libutil/url.cc b/src/libutil/url.cc index 9b438e6cd..57b64d607 100644 --- a/src/libutil/url.cc +++ b/src/libutil/url.cc @@ -2,6 +2,7 @@ #include "url-parts.hh" #include "util.hh" #include "split.hh" +#include "canon-path.hh" namespace nix { @@ -141,6 +142,13 @@ bool ParsedURL::operator ==(const ParsedURL & other) const && fragment == other.fragment; } +ParsedURL ParsedURL::canonicalise() +{ + ParsedURL res(*this); + res.path = CanonPath(res.path).abs(); + return res; +} + /** * Parse a URL scheme of the form '(applicationScheme\+)?transportScheme' * into a tuple '(applicationScheme, transportScheme)' diff --git a/src/libutil/url.hh b/src/libutil/url.hh index 26c2dcc28..833f54678 100644 --- a/src/libutil/url.hh +++ b/src/libutil/url.hh @@ -19,6 +19,11 @@ struct ParsedURL std::string to_string() const; bool operator ==(const ParsedURL & other) const; + + /** + * Remove `.` and `..` path elements. + */ + ParsedURL canonicalise(); }; MakeError(BadURL, Error); diff --git a/tests/functional/fetchGit.sh b/tests/functional/fetchGit.sh index fc89f2040..c38cd27eb 100644 --- a/tests/functional/fetchGit.sh +++ b/tests/functional/fetchGit.sh @@ -185,11 +185,7 @@ path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = # Nuke the cache rm -rf $TEST_HOME/.cache/nix -# Try again, but without 'git' on PATH. This should fail. -NIX=$(command -v nix) -(! PATH= $NIX eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath" ) - -# Try again, with 'git' available. This should work. +# Try again. This should work. path5=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = $repo; ref = \"dev\"; }).outPath") [[ $path3 = $path5 ]] @@ -241,6 +237,7 @@ rm -rf $repo/.git # should succeed for a repo without commits git init $repo +git -C $repo add hello # need to add at least one file to cause the root of the repo to be visible path10=$(nix eval --impure --raw --expr "(builtins.fetchGit \"file://$repo\").outPath") # should succeed for a path with a space diff --git a/tests/functional/fetchGitSubmodules.sh b/tests/functional/fetchGitSubmodules.sh index df81232e5..369cdc5db 100644 --- a/tests/functional/fetchGitSubmodules.sh +++ b/tests/functional/fetchGitSubmodules.sh @@ -118,11 +118,3 @@ cloneRepo=$TEST_ROOT/a/b/gitSubmodulesClone # NB /a/b to make the relative path git clone $rootRepo $cloneRepo pathIndirect=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath") [[ $pathIndirect = $pathWithRelative ]] - -# Test that if the clone has the submodule already, we're not fetching -# it again. -git -C $cloneRepo submodule update --init -rm $TEST_HOME/.cache/nix/fetcher-cache* -rm -rf $subRepo -pathSubmoduleGone=$(nix eval --raw --expr "(builtins.fetchGit { url = file://$cloneRepo; rev = \"$rev2\"; submodules = true; }).outPath") -[[ $pathSubmoduleGone = $pathWithRelative ]] diff --git a/tests/functional/fetchGitVerification.sh b/tests/functional/fetchGitVerification.sh index 4d9209498..b80e061b5 100644 --- a/tests/functional/fetchGitVerification.sh +++ b/tests/functional/fetchGitVerification.sh @@ -34,6 +34,12 @@ out=$(nix eval --impure --raw --expr "builtins.fetchGit { url = \"file://$repo\" [[ $(nix eval --impure --raw --expr "builtins.readFile (builtins.fetchGit { url = \"file://$repo\"; publicKey = \"$publicKey1\"; } + \"/text\")") = 'hello' ]] echo 'hello world' > $repo/text + +# Verification on a dirty repo should fail. +out=$(nix eval --impure --raw --expr "builtins.fetchGit { url = \"file://$repo\"; keytype = \"ssh-rsa\"; publicKey = \"$publicKey2\"; }" 2>&1) || status=$? +[[ $status == 1 ]] +[[ $out =~ 'dirty' ]] + git -C $repo add text git -C $repo -c "user.signingkey=$key2File" commit -S -m 'second commit' @@ -73,4 +79,4 @@ cat > "$flakeDir/flake.nix" <&1) || status=$? [[ $status == 1 ]] -[[ $out =~ 'No principal matched.' ]] \ No newline at end of file +[[ $out =~ 'No principal matched.' ]] diff --git a/tests/functional/flakes/flake-in-submodule.sh b/tests/functional/flakes/flake-in-submodule.sh index 21a4b52de..85a4d3389 100644 --- a/tests/functional/flakes/flake-in-submodule.sh +++ b/tests/functional/flakes/flake-in-submodule.sh @@ -46,7 +46,16 @@ echo '"expression in root repo"' > $rootRepo/root.nix git -C $rootRepo add root.nix git -C $rootRepo commit -m "Add root.nix" +flakeref=git+file://$rootRepo\?submodules=1\&dir=submodule + # Flake can live inside a submodule and can be accessed via ?dir=submodule -[[ $(nix eval --json git+file://$rootRepo\?submodules=1\&dir=submodule#sub ) = '"expression in submodule"' ]] +[[ $(nix eval --json $flakeref#sub ) = '"expression in submodule"' ]] + # The flake can access content outside of the submodule -[[ $(nix eval --json git+file://$rootRepo\?submodules=1\&dir=submodule#root ) = '"expression in root repo"' ]] +[[ $(nix eval --json $flakeref#root ) = '"expression in root repo"' ]] + +# Check that dirtying a submodule makes the entire thing dirty. +[[ $(nix flake metadata --json $flakeref | jq -r .locked.rev) != null ]] +echo '"foo"' > $rootRepo/submodule/sub.nix +[[ $(nix eval --json $flakeref#sub ) = '"foo"' ]] +[[ $(nix flake metadata --json $flakeref | jq -r .locked.rev) = null ]]