git fetcher: Add exportIgnore parameter

Enabled for fetchGit, which historically had this behavior,
among other behaviors we do not want in fetchGit.

fetchTree disables this parameter by default. It can choose the
simpler behavior, as it is still experimental.

I am not confident that the filtering implementation is future
proof. It should reuse a source filtering wrapper, which I believe
Eelco has already written, but not merged yet.
This commit is contained in:
Robert Hensing 2023-11-27 22:34:41 +01:00
parent 4d0ecda33e
commit ce6d58a97c
5 changed files with 81 additions and 14 deletions

View file

@ -39,6 +39,10 @@ void emitTreeAttrs(
attrs.alloc("submodules").mkBool( attrs.alloc("submodules").mkBool(
fetchers::maybeGetBoolAttr(input.attrs, "submodules").value_or(false)); fetchers::maybeGetBoolAttr(input.attrs, "submodules").value_or(false));
if (input.getType() == "git")
attrs.alloc("exportIgnore").mkBool(
fetchers::maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false));
if (!forceDirty) { if (!forceDirty) {
if (auto rev = input.getRev()) { if (auto rev = input.getRev()) {
@ -112,6 +116,11 @@ static void fetchTree(
attrs.emplace("type", type.value()); attrs.emplace("type", type.value());
if (params.isFetchGit) {
// Default value; user attrs are assigned later.
attrs.emplace("exportIgnore", Explicit<bool>{true});
}
for (auto & attr : *args[0]->attrs) { for (auto & attr : *args[0]->attrs) {
if (attr.name == state.sType) continue; if (attr.name == state.sType) continue;
state.forceValue(*attr.value, attr.pos); state.forceValue(*attr.value, attr.pos);
@ -593,6 +602,11 @@ static RegisterPrimOp primop_fetchGit({
A Boolean parameter that specifies whether submodules should be checked out. A Boolean parameter that specifies whether submodules should be checked out.
- `exportIgnore` (default: `true`)
A Boolean parameter that specifies whether `export-ignore` from `.gitattributes` should be applied.
This approximates part of the `git archive` behavior.
- `shallow` (default: `false`) - `shallow` (default: `false`)
A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed. A Boolean parameter that specifies whether fetching from a shallow remote repository is allowed.

View file

@ -7,6 +7,7 @@
#include <boost/core/span.hpp> #include <boost/core/span.hpp>
#include <git2/attr.h>
#include <git2/blob.h> #include <git2/blob.h>
#include <git2/commit.h> #include <git2/commit.h>
#include <git2/config.h> #include <git2/config.h>
@ -21,6 +22,7 @@
#include <git2/submodule.h> #include <git2/submodule.h>
#include <git2/tree.h> #include <git2/tree.h>
#include <iostream>
#include <unordered_set> #include <unordered_set>
#include <queue> #include <queue>
#include <regex> #include <regex>
@ -307,7 +309,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return std::nullopt; return std::nullopt;
} }
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override; std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) override;
std::string resolveSubmoduleUrl( std::string resolveSubmoduleUrl(
const std::string & url, const std::string & url,
@ -340,7 +342,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
return true; return true;
} }
ref<InputAccessor> getAccessor(const Hash & rev) override; ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) override;
static int sidebandProgressCallback(const char * str, int len, void * payload) static int sidebandProgressCallback(const char * str, int len, void * payload)
{ {
@ -460,10 +462,12 @@ struct GitInputAccessor : InputAccessor
{ {
ref<GitRepoImpl> repo; ref<GitRepoImpl> repo;
Tree root; Tree root;
bool exportIgnore;
GitInputAccessor(ref<GitRepoImpl> repo_, const Hash & rev) GitInputAccessor(ref<GitRepoImpl> repo_, const Hash & rev, bool exportIgnore)
: repo(repo_) : repo(repo_)
, root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE)) , root(peelObject<Tree>(*repo, lookupObject(*repo, hashToOID(rev)).get(), GIT_OBJECT_TREE))
, exportIgnore(exportIgnore)
{ {
} }
@ -492,7 +496,7 @@ struct GitInputAccessor : InputAccessor
return Stat { .type = tDirectory }; return Stat { .type = tDirectory };
auto entry = lookup(path); auto entry = lookup(path);
if (!entry) if (!entry || isExportIgnored(path))
return std::nullopt; return std::nullopt;
auto mode = git_tree_entry_filemode(entry); auto mode = git_tree_entry_filemode(entry);
@ -527,6 +531,12 @@ struct GitInputAccessor : InputAccessor
for (size_t n = 0; n < count; ++n) { for (size_t n = 0; n < count; ++n) {
auto entry = git_tree_entry_byindex(tree.get(), n); auto entry = git_tree_entry_byindex(tree.get(), n);
if (exportIgnore) {
if (isExportIgnored(path + git_tree_entry_name(entry))) {
continue;
}
}
// FIXME: add to cache // FIXME: add to cache
res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{}); res.emplace(std::string(git_tree_entry_name(entry)), DirEntry{});
} }
@ -556,6 +566,33 @@ struct GitInputAccessor : InputAccessor
std::unordered_map<CanonPath, TreeEntry> lookupCache; std::unordered_map<CanonPath, TreeEntry> lookupCache;
bool isExportIgnored(const CanonPath & path) {
if (!exportIgnore)
return false;
const char *exportIgnoreEntry = nullptr;
// GIT_ATTR_CHECK_INDEX_ONLY:
// > It will use index only for creating archives or for a bare repo
// > (if an index has been specified for the bare repo).
// -- https://github.com/libgit2/libgit2/blob/HEAD/include/git2/attr.h#L113C62-L115C48
if (git_attr_get(&exportIgnoreEntry,
*repo,
GIT_ATTR_CHECK_INDEX_ONLY,
std::string(path.rel()).c_str(),
"export-ignore")) {
if (git_error_last()->klass == GIT_ENOTFOUND)
return false;
else
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message);
}
else {
// Official git will silently reject export-ignore lines that have
// values. We do the same.
return GIT_ATTR_IS_TRUE(exportIgnoreEntry);
}
}
/* Recursively look up 'path' relative to the root. */ /* Recursively look up 'path' relative to the root. */
git_tree_entry * lookup(const CanonPath & path) git_tree_entry * lookup(const CanonPath & path)
{ {
@ -569,6 +606,10 @@ struct GitInputAccessor : InputAccessor
throw Error("looking up '%s': %s", showPath(path), git_error_last()->message); throw Error("looking up '%s': %s", showPath(path), git_error_last()->message);
} }
if (entry && isExportIgnored(path)) {
entry.reset();
}
i = lookupCache.emplace(path, std::move(entry)).first; i = lookupCache.emplace(path, std::move(entry)).first;
} }
@ -644,17 +685,17 @@ struct GitInputAccessor : InputAccessor
} }
}; };
ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev) ref<InputAccessor> GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore)
{ {
return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev); return make_ref<GitInputAccessor>(ref<GitRepoImpl>(shared_from_this()), rev, exportIgnore);
} }
std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev) std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules(const Hash & rev, bool exportIgnore)
{ {
/* Read the .gitmodules files from this revision. */ /* Read the .gitmodules files from this revision. */
CanonPath modulesFile(".gitmodules"); CanonPath modulesFile(".gitmodules");
auto accessor = getAccessor(rev); auto accessor = getAccessor(rev, exportIgnore);
if (!accessor->pathExists(modulesFile)) return {}; if (!accessor->pathExists(modulesFile)) return {};
/* Parse it and get the revision of each submodule. */ /* Parse it and get the revision of each submodule. */

View file

@ -57,7 +57,7 @@ struct GitRepo
* Return the submodules of this repo at the indicated revision, * Return the submodules of this repo at the indicated revision,
* along with the revision of each submodule. * along with the revision of each submodule.
*/ */
virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) = 0; virtual std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev, bool exportIgnore) = 0;
virtual std::string resolveSubmoduleUrl( virtual std::string resolveSubmoduleUrl(
const std::string & url, const std::string & url,
@ -71,7 +71,7 @@ struct GitRepo
virtual bool hasObject(const Hash & oid) = 0; virtual bool hasObject(const Hash & oid) = 0;
virtual ref<InputAccessor> getAccessor(const Hash & rev) = 0; virtual ref<InputAccessor> getAccessor(const Hash & rev, bool exportIgnore) = 0;
virtual void fetch( virtual void fetch(
const std::string & url, const std::string & url,

View file

@ -174,7 +174,7 @@ struct GitInputScheme : InputScheme
for (auto & [name, value] : url.query) { for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys") if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys")
attrs.emplace(name, value); attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules" || name == "allRefs" || name == "verifyCommit") else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit")
attrs.emplace(name, Explicit<bool> { value == "1" }); attrs.emplace(name, Explicit<bool> { value == "1" });
else else
url2.query.emplace(name, value); url2.query.emplace(name, value);
@ -199,6 +199,7 @@ struct GitInputScheme : InputScheme
"rev", "rev",
"shallow", "shallow",
"submodules", "submodules",
"exportIgnore",
"lastModified", "lastModified",
"revCount", "revCount",
"narHash", "narHash",
@ -250,6 +251,8 @@ struct GitInputScheme : InputScheme
url.query.insert_or_assign("shallow", "1"); url.query.insert_or_assign("shallow", "1");
if (getSubmodulesAttr(input)) if (getSubmodulesAttr(input))
url.query.insert_or_assign("submodules", "1"); url.query.insert_or_assign("submodules", "1");
if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false))
url.query.insert_or_assign("exportIgnore", "1");
if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false)) if (maybeGetBoolAttr(input.attrs, "verifyCommit").value_or(false))
url.query.insert_or_assign("verifyCommit", "1"); url.query.insert_or_assign("verifyCommit", "1");
auto publicKeys = getPublicKeys(input.attrs); auto publicKeys = getPublicKeys(input.attrs);
@ -372,6 +375,11 @@ struct GitInputScheme : InputScheme
return maybeGetBoolAttr(input.attrs, "submodules").value_or(false); return maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
} }
bool getExportIgnoreAttr(const Input & input) const
{
return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false);
}
bool getAllRefsAttr(const Input & input) const bool getAllRefsAttr(const Input & input) const
{ {
return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false); return maybeGetBoolAttr(input.attrs, "allRefs").value_or(false);
@ -600,7 +608,8 @@ struct GitInputScheme : InputScheme
verifyCommit(input, repo); verifyCommit(input, repo);
auto accessor = repo->getAccessor(rev); bool exportIgnore = getExportIgnoreAttr(input);
auto accessor = repo->getAccessor(rev, exportIgnore);
accessor->setPathDisplay("«" + input.to_string() + "»"); accessor->setPathDisplay("«" + input.to_string() + "»");
@ -610,7 +619,7 @@ struct GitInputScheme : InputScheme
if (getSubmodulesAttr(input)) { if (getSubmodulesAttr(input)) {
std::map<CanonPath, nix::ref<InputAccessor>> mounts; std::map<CanonPath, nix::ref<InputAccessor>> mounts;
for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev)) { for (auto & [submodule, submoduleRev] : repo->getSubmodules(rev, exportIgnore)) {
auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url); auto resolved = repo->resolveSubmoduleUrl(submodule.url, repoInfo.url);
debug("Git submodule %s: %s %s %s -> %s", debug("Git submodule %s: %s %s %s -> %s",
submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved); submodule.path, submodule.url, submodule.branch, submoduleRev.gitRev(), resolved);

View file

@ -231,12 +231,15 @@ unset _NIX_FORCE_HTTP
# Ensure .gitattributes is respected # Ensure .gitattributes is respected
touch $repo/not-exported-file touch $repo/not-exported-file
touch $repo/exported-wonky
echo "/not-exported-file export-ignore" >> $repo/.gitattributes echo "/not-exported-file export-ignore" >> $repo/.gitattributes
git -C $repo add not-exported-file .gitattributes echo "/exported-wonky export-ignore=wonk" >> $repo/.gitattributes
git -C $repo add not-exported-file exported-wonky .gitattributes
git -C $repo commit -m 'Bla6' git -C $repo commit -m 'Bla6'
rev5=$(git -C $repo rev-parse HEAD) rev5=$(git -C $repo rev-parse HEAD)
path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath") path12=$(nix eval --impure --raw --expr "(builtins.fetchGit { url = file://$repo; rev = \"$rev5\"; }).outPath")
[[ ! -e $path12/not-exported-file ]] [[ ! -e $path12/not-exported-file ]]
[[ -e $path12/exported-wonky ]]
# should fail if there is no repo # should fail if there is no repo
rm -rf $repo/.git rm -rf $repo/.git