Improve the `FileSystemObjectSink` interface

More invariants are enforced in the type, and less state needs to be
stored in the main sink itself. The method here is roughly that known as
"session types".

Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
This commit is contained in:
John Ericson 2023-12-20 14:47:05 -05:00
parent 966d6fcd01
commit 6365bbfa81
9 changed files with 357 additions and 219 deletions

View File

@ -19,6 +19,35 @@ struct NarMember
std::map<std::string, NarMember> children;
};
struct NarMemberConstructor : CreateRegularFileSink
{
private:
NarMember & narMember;
uint64_t & pos;
public:
NarMemberConstructor(NarMember & nm, uint64_t & pos)
: narMember(nm), pos(pos)
{ }
void isExecutable() override
{
narMember.stat.isExecutable = true;
}
void preallocateContents(uint64_t size) override
{
narMember.stat.fileSize = size;
narMember.stat.narOffset = pos;
}
void operator () (std::string_view data) override
{ }
};
struct NarAccessor : public SourceAccessor
{
std::optional<const std::string> nar;
@ -42,7 +71,7 @@ struct NarAccessor : public SourceAccessor
: acc(acc), source(source)
{ }
void createMember(const Path & path, NarMember member)
NarMember & createMember(const Path & path, NarMember member)
{
size_t level = std::count(path.begin(), path.end(), '/');
while (parents.size() > level) parents.pop();
@ -50,11 +79,14 @@ struct NarAccessor : public SourceAccessor
if (parents.empty()) {
acc.root = std::move(member);
parents.push(&acc.root);
return acc.root;
} else {
if (parents.top()->stat.type != Type::tDirectory)
throw Error("NAR file missing parent directory of path '%s'", path);
auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member));
parents.push(&result.first->second);
auto & ref = result.first->second;
parents.push(&ref);
return ref;
}
}
@ -68,34 +100,18 @@ struct NarAccessor : public SourceAccessor
} });
}
void createRegularFile(const Path & path) override
void createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func) override
{
createMember(path, NarMember{ .stat = {
auto & nm = createMember(path, NarMember{ .stat = {
.type = Type::tRegular,
.fileSize = 0,
.isExecutable = false,
.narOffset = 0
} });
NarMemberConstructor nmc { nm, pos };
func(nmc);
}
void closeRegularFile() override
{ }
void isExecutable() override
{
parents.top()->stat.isExecutable = true;
}
void preallocateContents(uint64_t size) override
{
auto & st = parents.top()->stat;
st.fileSize = size;
st.narOffset = pos;
}
void receiveContents(std::string_view data) override
{ }
void createSymlink(const Path & path, const std::string & target) override
{
createMember(path,

View File

@ -133,7 +133,7 @@ static SerialisationError badArchive(const std::string & s)
}
static void parseContents(FileSystemObjectSink & sink, Source & source, const Path & path)
static void parseContents(CreateRegularFileSink & sink, Source & source)
{
uint64_t size = readLongLong(source);
@ -147,7 +147,7 @@ static void parseContents(FileSystemObjectSink & sink, Source & source, const Pa
auto n = buf.size();
if ((uint64_t)n > left) n = left;
source(buf.data(), n);
sink.receiveContents({buf.data(), n});
sink({buf.data(), n});
left -= n;
}
@ -171,95 +171,107 @@ static void parse(FileSystemObjectSink & sink, Source & source, const Path & pat
s = readString(source);
if (s != "(") throw badArchive("expected open tag");
enum { tpUnknown, tpRegular, tpDirectory, tpSymlink } type = tpUnknown;
std::map<Path, int, CaseInsensitiveCompare> names;
while (1) {
auto getString = [&]() {
checkInterrupt();
return readString(source);
};
s = readString(source);
// For first iteration
s = getString();
while (1) {
if (s == ")") {
break;
}
else if (s == "type") {
if (type != tpUnknown)
throw badArchive("multiple type fields");
std::string t = readString(source);
std::string t = getString();
if (t == "regular") {
type = tpRegular;
sink.createRegularFile(path);
sink.createRegularFile(path, [&](auto & crf) {
while (1) {
s = getString();
if (s == "contents") {
parseContents(crf, source);
}
else if (s == "executable") {
auto s2 = getString();
if (s2 != "") throw badArchive("executable marker has non-empty value");
crf.isExecutable();
}
else break;
}
});
}
else if (t == "directory") {
sink.createDirectory(path);
type = tpDirectory;
while (1) {
s = getString();
if (s == "entry") {
std::string name, prevName;
s = getString();
if (s != "(") throw badArchive("expected open tag");
while (1) {
s = getString();
if (s == ")") {
break;
} else if (s == "name") {
name = getString();
if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos)
throw Error("NAR contains invalid file name '%1%'", name);
if (name <= prevName)
throw Error("NAR directory is not sorted");
prevName = name;
if (archiveSettings.useCaseHack) {
auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else
names[name] = 0;
}
} else if (s == "node") {
if (name.empty()) throw badArchive("entry name missing");
parse(sink, source, path + "/" + name);
} else
throw badArchive("unknown field " + s);
}
}
else break;
}
}
else if (t == "symlink") {
type = tpSymlink;
s = getString();
if (s != "target")
throw badArchive("expected 'target' got " + s);
std::string target = getString();
sink.createSymlink(path, target);
// for the next iteration
s = getString();
}
else throw badArchive("unknown file type " + t);
}
else if (s == "contents" && type == tpRegular) {
parseContents(sink, source, path);
sink.closeRegularFile();
}
else if (s == "executable" && type == tpRegular) {
auto s = readString(source);
if (s != "") throw badArchive("executable marker has non-empty value");
sink.isExecutable();
}
else if (s == "entry" && type == tpDirectory) {
std::string name, prevName;
s = readString(source);
if (s != "(") throw badArchive("expected open tag");
while (1) {
checkInterrupt();
s = readString(source);
if (s == ")") {
break;
} else if (s == "name") {
name = readString(source);
if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos)
throw Error("NAR contains invalid file name '%1%'", name);
if (name <= prevName)
throw Error("NAR directory is not sorted");
prevName = name;
if (archiveSettings.useCaseHack) {
auto i = names.find(name);
if (i != names.end()) {
debug("case collision between '%1%' and '%2%'", i->first, name);
name += caseHackSuffix;
name += std::to_string(++i->second);
} else
names[name] = 0;
}
} else if (s == "node") {
if (name.empty()) throw badArchive("entry name missing");
parse(sink, source, path + "/" + name);
} else
throw badArchive("unknown field " + s);
}
}
else if (s == "target" && type == tpSymlink) {
std::string target = readString(source);
sink.createSymlink(path, target);
}
else
throw badArchive("unknown field " + s);
}

View File

@ -19,16 +19,12 @@ void copyRecursive(
case SourceAccessor::tRegular:
{
sink.createRegularFile(to);
if (stat.isExecutable)
sink.isExecutable();
LambdaSink sink2 {
[&](auto d) {
sink.receiveContents(d);
}
};
accessor.readFile(from, sink2, [&](uint64_t size) {
sink.preallocateContents(size);
sink.createRegularFile(to, [&](CreateRegularFileSink & crf) {
if (stat.isExecutable)
crf.isExecutable();
accessor.readFile(from, crf, [&](uint64_t size) {
crf.preallocateContents(size);
});
});
break;
}
@ -71,20 +67,24 @@ void RestoreSink::createDirectory(const Path & path)
throw SysError("creating directory '%1%'", p);
};
void RestoreSink::createRegularFile(const Path & path)
struct RestoreRegularFile : CreateRegularFileSink {
AutoCloseFD fd;
void operator () (std::string_view data) override;
void isExecutable() override;
void preallocateContents(uint64_t size) override;
};
void RestoreSink::createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func)
{
Path p = dstPath + path;
fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666);
if (!fd) throw SysError("creating file '%1%'", p);
RestoreRegularFile crf;
crf.fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666);
if (!crf.fd) throw SysError("creating file '%1%'", p);
func(crf);
}
void RestoreSink::closeRegularFile()
{
/* Call close explicitly to make sure the error is checked */
fd.close();
}
void RestoreSink::isExecutable()
void RestoreRegularFile::isExecutable()
{
struct stat st;
if (fstat(fd.get(), &st) == -1)
@ -93,7 +93,7 @@ void RestoreSink::isExecutable()
throw SysError("fchmod");
}
void RestoreSink::preallocateContents(uint64_t len)
void RestoreRegularFile::preallocateContents(uint64_t len)
{
if (!restoreSinkSettings.preallocateContents)
return;
@ -111,7 +111,7 @@ void RestoreSink::preallocateContents(uint64_t len)
#endif
}
void RestoreSink::receiveContents(std::string_view data)
void RestoreRegularFile::operator () (std::string_view data)
{
writeFull(fd.get(), data);
}
@ -122,4 +122,32 @@ void RestoreSink::createSymlink(const Path & path, const std::string & target)
nix::createSymlink(target, p);
}
void RegularFileSink::createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func)
{
struct CRF : CreateRegularFileSink {
RegularFileSink & back;
CRF(RegularFileSink & back) : back(back) {}
void operator () (std::string_view data) override
{
back.sink(data);
}
void isExecutable() override {}
} crf { *this };
func(crf);
}
void NullFileSystemObjectSink::createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func)
{
struct : CreateRegularFileSink {
void operator () (std::string_view data) override {}
void isExecutable() override {}
} crf;
// Even though `NullFileSystemObjectSink` doesn't do anything, it's important
// that we call the function, to e.g. advance the parser using this
// sink.
func(crf);
}
}

View File

@ -9,18 +9,13 @@
namespace nix {
/**
* \todo Fix this API, it sucks.
* Actions on an open regular file in the process of creating it.
*
* See `FileSystemObjectSink::createRegularFile`.
*/
struct FileSystemObjectSink
struct CreateRegularFileSink : Sink
{
virtual void createDirectory(const Path & path) = 0;
virtual void createRegularFile(const Path & path) = 0;
virtual void receiveContents(std::string_view data) = 0;
virtual void isExecutable() = 0;
virtual void closeRegularFile() = 0;
virtual void createSymlink(const Path & path, const std::string & target) = 0;
/**
* An optimization. By default, do nothing.
@ -28,8 +23,24 @@ struct FileSystemObjectSink
virtual void preallocateContents(uint64_t size) { };
};
struct FileSystemObjectSink
{
virtual void createDirectory(const Path & path) = 0;
/**
* This function in general is no re-entrant. Only one file can be
* written at a time.
*/
virtual void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)>) = 0;
virtual void createSymlink(const Path & path, const std::string & target) = 0;
};
/**
* Recusively copy file system objects from the source into the sink.
* Recursively copy file system objects from the source into the sink.
*/
void copyRecursive(
SourceAccessor & accessor, const CanonPath & sourcePath,
@ -41,11 +52,10 @@ void copyRecursive(
struct NullFileSystemObjectSink : FileSystemObjectSink
{
void createDirectory(const Path & path) override { }
void receiveContents(std::string_view data) override { }
void createSymlink(const Path & path, const std::string & target) override { }
void createRegularFile(const Path & path) override { }
void closeRegularFile() override { }
void isExecutable() override { }
void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)>) override;
};
/**
@ -57,17 +67,11 @@ struct RestoreSink : FileSystemObjectSink
void createDirectory(const Path & path) override;
void createRegularFile(const Path & path) override;
void receiveContents(std::string_view data) override;
void isExecutable() override;
void closeRegularFile() override;
void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)>) override;
void createSymlink(const Path & path, const std::string & target) override;
void preallocateContents(uint64_t size) override;
private:
AutoCloseFD fd;
};
/**
@ -87,19 +91,14 @@ struct RegularFileSink : FileSystemObjectSink
regular = false;
}
void receiveContents(std::string_view data) override
{
sink(data);
}
void createSymlink(const Path & path, const std::string & target) override
{
regular = false;
}
void createRegularFile(const Path & path) override { }
void closeRegularFile() override { }
void isExecutable() override { }
void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)>) override;
};
}

View File

@ -52,24 +52,22 @@ static std::string getString(Source & source, int n)
return v;
}
void parse(
void parseBlob(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
bool executable,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto type = getString(source, 5);
if (type == "blob ") {
sink.createRegularFile(sinkPath);
sink.createRegularFile(sinkPath, [&](auto & crf) {
if (executable)
crf.isExecutable();
unsigned long long size = std::stoi(getStringUntil(source, 0));
sink.preallocateContents(size);
crf.preallocateContents(size);
unsigned long long left = size;
std::string buf;
@ -79,47 +77,91 @@ void parse(
checkInterrupt();
buf.resize(std::min((unsigned long long)buf.capacity(), left));
source(buf);
sink.receiveContents(buf);
crf(buf);
left -= buf.size();
}
});
}
void parseTree(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
unsigned long long size = std::stoi(getStringUntil(source, 0));
unsigned long long left = size;
sink.createDirectory(sinkPath);
while (left) {
std::string perms = getStringUntil(source, ' ');
left -= perms.size();
left -= 1;
RawMode rawMode = std::stoi(perms, 0, 8);
auto modeOpt = decodeMode(rawMode);
if (!modeOpt)
throw Error("Unknown Git permission: %o", perms);
auto mode = std::move(*modeOpt);
std::string name = getStringUntil(source, '\0');
left -= name.size();
left -= 1;
std::string hashs = getString(source, 20);
left -= 20;
Hash hash(HashAlgorithm::SHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);
hook(name, TreeEntry {
.mode = mode,
.hash = hash,
});
}
}
ObjectType parseObjectType(
Source & source,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto type = getString(source, 5);
if (type == "blob ") {
return ObjectType::Blob;
} else if (type == "tree ") {
unsigned long long size = std::stoi(getStringUntil(source, 0));
unsigned long long left = size;
sink.createDirectory(sinkPath);
while (left) {
std::string perms = getStringUntil(source, ' ');
left -= perms.size();
left -= 1;
RawMode rawMode = std::stoi(perms, 0, 8);
auto modeOpt = decodeMode(rawMode);
if (!modeOpt)
throw Error("Unknown Git permission: %o", perms);
auto mode = std::move(*modeOpt);
std::string name = getStringUntil(source, '\0');
left -= name.size();
left -= 1;
std::string hashs = getString(source, 20);
left -= 20;
Hash hash(HashAlgorithm::SHA1);
std::copy(hashs.begin(), hashs.end(), hash.hash);
hook(name, TreeEntry {
.mode = mode,
.hash = hash,
});
if (mode == Mode::Executable)
sink.isExecutable();
}
return ObjectType::Tree;
} else throw Error("input doesn't look like a Git object");
}
void parse(
FileSystemObjectSink & sink,
const Path & sinkPath,
Source & source,
bool executable,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings)
{
xpSettings.require(Xp::GitHashing);
auto type = parseObjectType(source, xpSettings);
switch (type) {
case ObjectType::Blob:
parseBlob(sink, sinkPath, source, executable, xpSettings);
break;
case ObjectType::Tree:
parseTree(sink, sinkPath, source, hook, xpSettings);
break;
default:
assert(false);
};
}
std::optional<Mode> convertMode(SourceAccessor::Type type)
{
@ -135,7 +177,7 @@ std::optional<Mode> convertMode(SourceAccessor::Type type)
void restore(FileSystemObjectSink & sink, Source & source, std::function<RestoreHook> hook)
{
parse(sink, "", source, [&](Path name, TreeEntry entry) {
parse(sink, "", source, false, [&](Path name, TreeEntry entry) {
auto [accessor, from] = hook(entry.hash);
auto stat = accessor->lstat(from);
auto gotOpt = convertMode(stat.type);

View File

@ -13,12 +13,19 @@
namespace nix::git {
enum struct ObjectType {
Blob,
Tree,
//Commit,
//Tag,
};
using RawMode = uint32_t;
enum struct Mode : RawMode {
Directory = 0040000,
Executable = 0100755,
Regular = 0100644,
Executable = 0100755,
Symlink = 0120000,
};
@ -59,9 +66,34 @@ using Tree = std::map<std::string, TreeEntry>;
*/
using SinkHook = void(const Path & name, TreeEntry entry);
/**
* Parse the "blob " or "tree " prefix.
*
* @throws if prefix not recognized
*/
ObjectType parseObjectType(
Source & source,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
void parseBlob(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
bool executable,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
void parseTree(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);
/**
* Helper putting the previous three `parse*` functions together.
*/
void parse(
FileSystemObjectSink & sink, const Path & sinkPath,
Source & source,
bool executable,
std::function<SinkHook> hook,
const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings);

View File

@ -134,36 +134,43 @@ void MemorySink::createDirectory(const Path & path)
throw Error("file '%s' is not a directory", path);
};
void MemorySink::createRegularFile(const Path & path)
struct CreateMemoryRegularFile : CreateRegularFileSink {
File::Regular & regularFile;
CreateMemoryRegularFile(File::Regular & r)
: regularFile(r)
{ }
void operator () (std::string_view data) override;
void isExecutable() override;
void preallocateContents(uint64_t size) override;
};
void MemorySink::createRegularFile(const Path & path, std::function<void(CreateRegularFileSink &)> func)
{
auto * f = dst.open(CanonPath{path}, File { File::Regular {} });
if (!f)
throw Error("file '%s' cannot be made because some parent file is not a directory", path);
if (!(r = std::get_if<File::Regular>(&f->raw)))
if (auto * rp = std::get_if<File::Regular>(&f->raw)) {
CreateMemoryRegularFile crf { *rp };
func(crf);
} else
throw Error("file '%s' is not a regular file", path);
}
void MemorySink::closeRegularFile()
void CreateMemoryRegularFile::isExecutable()
{
r = nullptr;
regularFile.executable = true;
}
void MemorySink::isExecutable()
void CreateMemoryRegularFile::preallocateContents(uint64_t len)
{
assert(r);
r->executable = true;
regularFile.contents.reserve(len);
}
void MemorySink::preallocateContents(uint64_t len)
void CreateMemoryRegularFile::operator () (std::string_view data)
{
assert(r);
r->contents.reserve(len);
}
void MemorySink::receiveContents(std::string_view data)
{
assert(r);
r->contents += data;
regularFile.contents += data;
}
void MemorySink::createSymlink(const Path & path, const std::string & target)

View File

@ -83,17 +83,11 @@ struct MemorySink : FileSystemObjectSink
void createDirectory(const Path & path) override;
void createRegularFile(const Path & path) override;
void receiveContents(std::string_view data) override;
void isExecutable() override;
void closeRegularFile() override;
void createRegularFile(
const Path & path,
std::function<void(CreateRegularFileSink &)>) override;
void createSymlink(const Path & path, const std::string & target) override;
void preallocateContents(uint64_t size) override;
private:
MemorySourceAccessor::File::Regular * r;
};
}

View File

@ -66,7 +66,8 @@ TEST_F(GitTest, blob_read) {
StringSource in { encoded };
StringSink out;
RegularFileSink out2 { out };
parse(out2, "", in, [](auto &, auto) {}, mockXpSettings);
ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob);
parseBlob(out2, "", in, false, mockXpSettings);
auto expected = readFile(goldenMaster("hello-world.bin"));
@ -121,7 +122,8 @@ TEST_F(GitTest, tree_read) {
StringSource in { encoded };
NullFileSystemObjectSink out;
Tree got;
parse(out, "", in, [&](auto & name, auto entry) {
ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Tree);
parseTree(out, "", in, [&](auto & name, auto entry) {
auto name2 = name;
if (entry.mode == Mode::Directory)
name2 += '/';
@ -193,15 +195,21 @@ TEST_F(GitTest, both_roundrip) {
MemorySink sinkFiles2 { files2 };
std::function<void(const Path, const Hash &)> mkSinkHook;
mkSinkHook = [&](const Path prefix, const Hash & hash) {
std::function<void(const Path, const Hash &, bool)> mkSinkHook;
mkSinkHook = [&](auto prefix, auto & hash, auto executable) {
StringSource in { cas[hash] };
parse(sinkFiles2, prefix, in, [&](const Path & name, const auto & entry) {
mkSinkHook(prefix + "/" + name, entry.hash);
}, mockXpSettings);
parse(
sinkFiles2, prefix, in, executable,
[&](const Path & name, const auto & entry) {
mkSinkHook(
prefix + "/" + name,
entry.hash,
entry.mode == Mode::Executable);
},
mockXpSettings);
};
mkSinkHook("", root.hash);
mkSinkHook("", root.hash, false);
ASSERT_EQ(files, files2);
}