From ce6d58a97cf6f975a0b930605605fab153445b22 Mon Sep 17 00:00:00 2001 From: Robert Hensing Date: Mon, 27 Nov 2023 22:34:41 +0100 Subject: [PATCH] git fetcher: Add exportIgnore parameter Enabled for fetchGit, which historically had this behavior, among other behaviors we do not want in fetchGit. fetchTree disables this parameter by default. It can choose the simpler behavior, as it is still experimental. I am not confident that the filtering implementation is future proof. It should reuse a source filtering wrapper, which I believe Eelco has already written, but not merged yet. --- src/libexpr/primops/fetchTree.cc | 14 ++++++++ src/libfetchers/git-utils.cc | 57 +++++++++++++++++++++++++++----- src/libfetchers/git-utils.hh | 4 +-- src/libfetchers/git.cc | 15 +++++++-- tests/functional/fetchGit.sh | 5 ++- 5 files changed, 81 insertions(+), 14 deletions(-) diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index eb2df8626..e00c4f190 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -39,6 +39,10 @@ void emitTreeAttrs( attrs.alloc("submodules").mkBool( fetchers::maybeGetBoolAttr(input.attrs, "submodules").value_or(false)); + if (input.getType() == "git") + attrs.alloc("exportIgnore").mkBool( + fetchers::maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false)); + if (!forceDirty) { if (auto rev = input.getRev()) { @@ -112,6 +116,11 @@ static void fetchTree( attrs.emplace("type", type.value()); + if (params.isFetchGit) { + // Default value; user attrs are assigned later. + attrs.emplace("exportIgnore", Explicit{true}); + } + for (auto & attr : *args[0]->attrs) { if (attr.name == state.sType) continue; state.forceValue(*attr.value, attr.pos); @@ -593,6 +602,11 @@ static RegisterPrimOp primop_fetchGit({ A Boolean parameter that specifies whether submodules should be checked out. + - `exportIgnore` (default: `true`) + + A Boolean parameter that specifies whether `export-ignore` from `.gitattributes` should be applied. + This approximates part of the `git archive` behavior. + - `shallow` (default: `false`) A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed. diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 65f7b45ef..4dc749504 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -7,6 +7,7 @@ #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include +#include #include #include #include @@ -307,7 +309,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return std::nullopt; } - std::vector> getSubmodules(const Hash & rev) override; + std::vector> getSubmodules(const Hash & rev, bool exportIgnore) override; std::string resolveSubmoduleUrl( const std::string & url, @@ -340,7 +342,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this return true; } - ref getAccessor(const Hash & rev) override; + ref getAccessor(const Hash & rev, bool exportIgnore) override; static int sidebandProgressCallback(const char * str, int len, void * payload) { @@ -460,10 +462,12 @@ struct GitInputAccessor : InputAccessor { ref repo; Tree root; + bool exportIgnore; - GitInputAccessor(ref repo_, const Hash & rev) + GitInputAccessor(ref repo_, const Hash & rev, bool exportIgnore) : repo(repo_) , root(peelObject(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE)) + , exportIgnore(exportIgnore) { } @@ -492,7 +496,7 @@ struct GitInputAccessor : InputAccessor return Stat { .type = tDirectory }; auto entry = lookup(path); - if (!entry) + if (!entry || isExportIgnored(path)) return std::nullopt; auto mode = git_tree_entry_filemode(entry); @@ -527,6 +531,12 @@ struct GitInputAccessor : InputAccessor for (size_t n = 0; n < count; ++n) { auto entry = git_tree_entry_byindex(tree.get(), n); + if (exportIgnore) { + if (isExportIgnored(path + git_tree_entry_name(entry))) { + continue; + } + } + // FIXME: add to cache res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{}); } @@ -556,6 +566,33 @@ struct GitInputAccessor : InputAccessor std::unordered_map lookupCache; + bool isExportIgnored(const CanonPath & path) { + if (!exportIgnore) + return false; + + const char *exportIgnoreEntry = nullptr; + + // GIT_ATTR_CHECK_INDEX_ONLY: + // > It will use index only for creating archives or for a bare repo + // > (if an index has been specified for the bare repo). + // -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48 + if (git_attr_get(&exportIgnoreEntry, + *repo, + GIT_ATTR_CHECK_INDEX_ONLY, + std::string(path.rel()).c_str(), + "export-ignore")) { + if (git_error_last()->klass == GIT_ENOTFOUND) + return false; + else + throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); + } + else { + // Official git will silently reject export-ignore lines that have + // values. We do the same. + return GIT_ATTR_IS_TRUE(exportIgnoreEntry); + } + } + /* Recursively look up 'path' relative to the root. */ git_tree_entry * lookup(const CanonPath & path) { @@ -569,6 +606,10 @@ struct GitInputAccessor : InputAccessor throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); } + if (entry && isExportIgnored(path)) { + entry.reset(); + } + i = lookupCache.emplace(path, std::move(entry)).first; } @@ -644,17 +685,17 @@ struct GitInputAccessor : InputAccessor } }; -ref GitRepoImpl::getAccessor(const Hash & rev) +ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore) { - return make_ref(ref(shared_from_this()), rev); + return make_ref(ref(shared_from_this()), rev, exportIgnore); } -std::vector> GitRepoImpl::getSubmodules(const Hash & rev) +std::vector> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore) { /* Read the .gitmodules files from this revision. */ CanonPath modulesFile(".gitmodules"); - auto accessor = getAccessor(rev); + auto accessor = getAccessor(rev, exportIgnore); if (!accessor->pathExists(modulesFile)) return {}; /* Parse it and get the revision of each submodule. */ diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 1def82071..f1cb48065 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -57,7 +57,7 @@ struct GitRepo * Return the submodules of this repo at the indicated revision, * along with the revision of each submodule. */ - virtual std::vector> getSubmodules(const Hash & rev) = 0; + virtual std::vector> getSubmodules(const Hash & rev, bool exportIgnore) = 0; virtual std::string resolveSubmoduleUrl( const std::string & url, @@ -71,7 +71,7 @@ struct GitRepo virtual bool hasObject(const Hash & oid) = 0; - virtual ref getAccessor(const Hash & rev) = 0; + virtual ref getAccessor(const Hash & rev, bool exportIgnore) = 0; virtual void fetch( const std::string & url, diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 79270c317..fb8bf5bf4 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -174,7 +174,7 @@ struct GitInputScheme : InputScheme for (auto & [name, value] : url.query) { if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys") attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "allRefs" || name == "verifyCommit") + else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit") attrs.emplace(name, Explicit { value == "1" }); else url2.query.emplace(name, value); @@ -199,6 +199,7 @@ struct GitInputScheme : InputScheme "rev", "shallow", "submodules", + "exportIgnore", "lastModified", "revCount", "narHash", @@ -250,6 +251,8 @@ struct GitInputScheme : InputScheme url.query.insert_or_assign("shallow", "1"); if (getSubmodulesAttr(input)) url.query.insert_or_assign("submodules", "1"); + if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false)) + url.query.insert_or_assign("exportIgnore", "1"); if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false)) url.query.insert_or_assign("verifyCommit", "1"); auto publicKeys = getPublicKeys(input.attrs); @@ -372,6 +375,11 @@ struct GitInputScheme : InputScheme return maybeGetBoolAttr(input.attrs, "submodules").value_or(false); } + bool getExportIgnoreAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false); + } + bool getAllRefsAttr(const Input & input) const { return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); @@ -600,7 +608,8 @@ struct GitInputScheme : InputScheme verifyCommit(input, repo); - auto accessor = repo->getAccessor(rev); + bool exportIgnore = getExportIgnoreAttr(input); + auto accessor = repo->getAccessor(rev, exportIgnore); accessor->setPathDisplay("«" + input.to_string() + "»"); @@ -610,7 +619,7 @@ struct GitInputScheme : InputScheme if (getSubmodulesAttr(input)) { std::map> mounts; - for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) { + for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) { auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url); debug("Git submodule %s: %s %s %s -> %s", submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved); diff --git a/tests/functional/fetchGit.sh b/tests/functional/fetchGit.sh index f0438f548..46532c025 100644 --- a/tests/functional/fetchGit.sh +++ b/tests/functional/fetchGit.sh @@ -231,12 +231,15 @@ unset _NIX_FORCE_HTTP # Ensure .gitattributes is respected touch $repo/not-exported-file +touch $repo/exported-wonky echo "/not-exported-file export-ignore" >> $repo/.gitattributes -git -C $repo add not-exported-file .gitattributes +echo "/exported-wonky export-ignore=wonk" >> $repo/.gitattributes +git -C $repo add not-exported-file exported-wonky .gitattributes git -C $repo commit -m 'Bla6' rev5=$(git -C $repo rev-parse HEAD) path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath") [[ ! -e $path12/not-exported-file ]] +[[ -e $path12/exported-wonky ]] # should fail if there is no repo rm -rf $repo/.git