From 6365bbfa8120007719156b45482568aca6c74f26 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 20 Dec 2023 14:47:05 -0500 Subject: [PATCH] Improve the `FileSystemObjectSink` interface More invariants are enforced in the type, and less state needs to be stored in the main sink itself. The method here is roughly that known as "session types". Co-authored-by: Robert Hensing --- src/libstore/nar-accessor.cc | 60 +++++++---- src/libutil/archive.cc | 144 ++++++++++++++------------ src/libutil/fs-sink.cc | 72 +++++++++---- src/libutil/fs-sink.hh | 61 ++++++----- src/libutil/git.cc | 130 +++++++++++++++-------- src/libutil/git.hh | 34 +++++- src/libutil/memory-source-accessor.cc | 39 ++++--- src/libutil/memory-source-accessor.hh | 12 +-- tests/unit/libutil/git.cc | 24 +++-- 9 files changed, 357 insertions(+), 219 deletions(-) diff --git a/src/libstore/nar-accessor.cc b/src/libstore/nar-accessor.cc index 4bc68a5ae..b13e4c52c 100644 --- a/src/libstore/nar-accessor.cc +++ b/src/libstore/nar-accessor.cc @@ -19,6 +19,35 @@ struct NarMember std::map children; }; +struct NarMemberConstructor : CreateRegularFileSink +{ +private: + + NarMember & narMember; + + uint64_t & pos; + +public: + + NarMemberConstructor(NarMember & nm, uint64_t & pos) + : narMember(nm), pos(pos) + { } + + void isExecutable() override + { + narMember.stat.isExecutable = true; + } + + void preallocateContents(uint64_t size) override + { + narMember.stat.fileSize = size; + narMember.stat.narOffset = pos; + } + + void operator () (std::string_view data) override + { } +}; + struct NarAccessor : public SourceAccessor { std::optional nar; @@ -42,7 +71,7 @@ struct NarAccessor : public SourceAccessor : acc(acc), source(source) { } - void createMember(const Path & path, NarMember member) + NarMember & createMember(const Path & path, NarMember member) { size_t level = std::count(path.begin(), path.end(), '/'); while (parents.size() > level) parents.pop(); @@ -50,11 +79,14 @@ struct NarAccessor : public SourceAccessor if (parents.empty()) { acc.root = std::move(member); parents.push(&acc.root); + return acc.root; } else { if (parents.top()->stat.type != Type::tDirectory) throw Error("NAR file missing parent directory of path '%s'", path); auto result = parents.top()->children.emplace(baseNameOf(path), std::move(member)); - parents.push(&result.first->second); + auto & ref = result.first->second; + parents.push(&ref); + return ref; } } @@ -68,34 +100,18 @@ struct NarAccessor : public SourceAccessor } }); } - void createRegularFile(const Path & path) override + void createRegularFile(const Path & path, std::function func) override { - createMember(path, NarMember{ .stat = { + auto & nm = createMember(path, NarMember{ .stat = { .type = Type::tRegular, .fileSize = 0, .isExecutable = false, .narOffset = 0 } }); + NarMemberConstructor nmc { nm, pos }; + func(nmc); } - void closeRegularFile() override - { } - - void isExecutable() override - { - parents.top()->stat.isExecutable = true; - } - - void preallocateContents(uint64_t size) override - { - auto & st = parents.top()->stat; - st.fileSize = size; - st.narOffset = pos; - } - - void receiveContents(std::string_view data) override - { } - void createSymlink(const Path & path, const std::string & target) override { createMember(path, diff --git a/src/libutil/archive.cc b/src/libutil/archive.cc index 17886dd19..6062392cd 100644 --- a/src/libutil/archive.cc +++ b/src/libutil/archive.cc @@ -133,7 +133,7 @@ static SerialisationError badArchive(const std::string & s) } -static void parseContents(FileSystemObjectSink & sink, Source & source, const Path & path) +static void parseContents(CreateRegularFileSink & sink, Source & source) { uint64_t size = readLongLong(source); @@ -147,7 +147,7 @@ static void parseContents(FileSystemObjectSink & sink, Source & source, const Pa auto n = buf.size(); if ((uint64_t)n > left) n = left; source(buf.data(), n); - sink.receiveContents({buf.data(), n}); + sink({buf.data(), n}); left -= n; } @@ -171,95 +171,107 @@ static void parse(FileSystemObjectSink & sink, Source & source, const Path & pat s = readString(source); if (s != "(") throw badArchive("expected open tag"); - enum { tpUnknown, tpRegular, tpDirectory, tpSymlink } type = tpUnknown; - std::map names; - while (1) { + auto getString = [&]() { checkInterrupt(); + return readString(source); + }; - s = readString(source); + // For first iteration + s = getString(); + + while (1) { if (s == ")") { break; } else if (s == "type") { - if (type != tpUnknown) - throw badArchive("multiple type fields"); - std::string t = readString(source); + std::string t = getString(); if (t == "regular") { - type = tpRegular; - sink.createRegularFile(path); + sink.createRegularFile(path, [&](auto & crf) { + while (1) { + s = getString(); + + if (s == "contents") { + parseContents(crf, source); + } + + else if (s == "executable") { + auto s2 = getString(); + if (s2 != "") throw badArchive("executable marker has non-empty value"); + crf.isExecutable(); + } + + else break; + } + }); } else if (t == "directory") { sink.createDirectory(path); - type = tpDirectory; + + while (1) { + s = getString(); + + if (s == "entry") { + std::string name, prevName; + + s = getString(); + if (s != "(") throw badArchive("expected open tag"); + + while (1) { + s = getString(); + + if (s == ")") { + break; + } else if (s == "name") { + name = getString(); + if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos) + throw Error("NAR contains invalid file name '%1%'", name); + if (name <= prevName) + throw Error("NAR directory is not sorted"); + prevName = name; + if (archiveSettings.useCaseHack) { + auto i = names.find(name); + if (i != names.end()) { + debug("case collision between '%1%' and '%2%'", i->first, name); + name += caseHackSuffix; + name += std::to_string(++i->second); + } else + names[name] = 0; + } + } else if (s == "node") { + if (name.empty()) throw badArchive("entry name missing"); + parse(sink, source, path + "/" + name); + } else + throw badArchive("unknown field " + s); + } + } + + else break; + } } else if (t == "symlink") { - type = tpSymlink; + s = getString(); + + if (s != "target") + throw badArchive("expected 'target' got " + s); + + std::string target = getString(); + sink.createSymlink(path, target); + + // for the next iteration + s = getString(); } else throw badArchive("unknown file type " + t); } - else if (s == "contents" && type == tpRegular) { - parseContents(sink, source, path); - sink.closeRegularFile(); - } - - else if (s == "executable" && type == tpRegular) { - auto s = readString(source); - if (s != "") throw badArchive("executable marker has non-empty value"); - sink.isExecutable(); - } - - else if (s == "entry" && type == tpDirectory) { - std::string name, prevName; - - s = readString(source); - if (s != "(") throw badArchive("expected open tag"); - - while (1) { - checkInterrupt(); - - s = readString(source); - - if (s == ")") { - break; - } else if (s == "name") { - name = readString(source); - if (name.empty() || name == "." || name == ".." || name.find('/') != std::string::npos || name.find((char) 0) != std::string::npos) - throw Error("NAR contains invalid file name '%1%'", name); - if (name <= prevName) - throw Error("NAR directory is not sorted"); - prevName = name; - if (archiveSettings.useCaseHack) { - auto i = names.find(name); - if (i != names.end()) { - debug("case collision between '%1%' and '%2%'", i->first, name); - name += caseHackSuffix; - name += std::to_string(++i->second); - } else - names[name] = 0; - } - } else if (s == "node") { - if (name.empty()) throw badArchive("entry name missing"); - parse(sink, source, path + "/" + name); - } else - throw badArchive("unknown field " + s); - } - } - - else if (s == "target" && type == tpSymlink) { - std::string target = readString(source); - sink.createSymlink(path, target); - } - else throw badArchive("unknown field " + s); } diff --git a/src/libutil/fs-sink.cc b/src/libutil/fs-sink.cc index bf44de92d..b6f8db592 100644 --- a/src/libutil/fs-sink.cc +++ b/src/libutil/fs-sink.cc @@ -19,16 +19,12 @@ void copyRecursive( case SourceAccessor::tRegular: { - sink.createRegularFile(to); - if (stat.isExecutable) - sink.isExecutable(); - LambdaSink sink2 { - [&](auto d) { - sink.receiveContents(d); - } - }; - accessor.readFile(from, sink2, [&](uint64_t size) { - sink.preallocateContents(size); + sink.createRegularFile(to, [&](CreateRegularFileSink & crf) { + if (stat.isExecutable) + crf.isExecutable(); + accessor.readFile(from, crf, [&](uint64_t size) { + crf.preallocateContents(size); + }); }); break; } @@ -71,20 +67,24 @@ void RestoreSink::createDirectory(const Path & path) throw SysError("creating directory '%1%'", p); }; -void RestoreSink::createRegularFile(const Path & path) +struct RestoreRegularFile : CreateRegularFileSink { + AutoCloseFD fd; + + void operator () (std::string_view data) override; + void isExecutable() override; + void preallocateContents(uint64_t size) override; +}; + +void RestoreSink::createRegularFile(const Path & path, std::function func) { Path p = dstPath + path; - fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666); - if (!fd) throw SysError("creating file '%1%'", p); + RestoreRegularFile crf; + crf.fd = open(p.c_str(), O_CREAT | O_EXCL | O_WRONLY | O_CLOEXEC, 0666); + if (!crf.fd) throw SysError("creating file '%1%'", p); + func(crf); } -void RestoreSink::closeRegularFile() -{ - /* Call close explicitly to make sure the error is checked */ - fd.close(); -} - -void RestoreSink::isExecutable() +void RestoreRegularFile::isExecutable() { struct stat st; if (fstat(fd.get(), &st) == -1) @@ -93,7 +93,7 @@ void RestoreSink::isExecutable() throw SysError("fchmod"); } -void RestoreSink::preallocateContents(uint64_t len) +void RestoreRegularFile::preallocateContents(uint64_t len) { if (!restoreSinkSettings.preallocateContents) return; @@ -111,7 +111,7 @@ void RestoreSink::preallocateContents(uint64_t len) #endif } -void RestoreSink::receiveContents(std::string_view data) +void RestoreRegularFile::operator () (std::string_view data) { writeFull(fd.get(), data); } @@ -122,4 +122,32 @@ void RestoreSink::createSymlink(const Path & path, const std::string & target) nix::createSymlink(target, p); } + +void RegularFileSink::createRegularFile(const Path & path, std::function func) +{ + struct CRF : CreateRegularFileSink { + RegularFileSink & back; + CRF(RegularFileSink & back) : back(back) {} + void operator () (std::string_view data) override + { + back.sink(data); + } + void isExecutable() override {} + } crf { *this }; + func(crf); +} + + +void NullFileSystemObjectSink::createRegularFile(const Path & path, std::function func) +{ + struct : CreateRegularFileSink { + void operator () (std::string_view data) override {} + void isExecutable() override {} + } crf; + // Even though `NullFileSystemObjectSink` doesn't do anything, it's important + // that we call the function, to e.g. advance the parser using this + // sink. + func(crf); +} + } diff --git a/src/libutil/fs-sink.hh b/src/libutil/fs-sink.hh index f4c4e92f1..4dfb5b329 100644 --- a/src/libutil/fs-sink.hh +++ b/src/libutil/fs-sink.hh @@ -9,18 +9,13 @@ namespace nix { /** - * \todo Fix this API, it sucks. + * Actions on an open regular file in the process of creating it. + * + * See `FileSystemObjectSink::createRegularFile`. */ -struct FileSystemObjectSink +struct CreateRegularFileSink : Sink { - virtual void createDirectory(const Path & path) = 0; - - virtual void createRegularFile(const Path & path) = 0; - virtual void receiveContents(std::string_view data) = 0; virtual void isExecutable() = 0; - virtual void closeRegularFile() = 0; - - virtual void createSymlink(const Path & path, const std::string & target) = 0; /** * An optimization. By default, do nothing. @@ -28,8 +23,24 @@ struct FileSystemObjectSink virtual void preallocateContents(uint64_t size) { }; }; + +struct FileSystemObjectSink +{ + virtual void createDirectory(const Path & path) = 0; + + /** + * This function in general is no re-entrant. Only one file can be + * written at a time. + */ + virtual void createRegularFile( + const Path & path, + std::function) = 0; + + virtual void createSymlink(const Path & path, const std::string & target) = 0; +}; + /** - * Recusively copy file system objects from the source into the sink. + * Recursively copy file system objects from the source into the sink. */ void copyRecursive( SourceAccessor & accessor, const CanonPath & sourcePath, @@ -41,11 +52,10 @@ void copyRecursive( struct NullFileSystemObjectSink : FileSystemObjectSink { void createDirectory(const Path & path) override { } - void receiveContents(std::string_view data) override { } void createSymlink(const Path & path, const std::string & target) override { } - void createRegularFile(const Path & path) override { } - void closeRegularFile() override { } - void isExecutable() override { } + void createRegularFile( + const Path & path, + std::function) override; }; /** @@ -57,17 +67,11 @@ struct RestoreSink : FileSystemObjectSink void createDirectory(const Path & path) override; - void createRegularFile(const Path & path) override; - void receiveContents(std::string_view data) override; - void isExecutable() override; - void closeRegularFile() override; + void createRegularFile( + const Path & path, + std::function) override; void createSymlink(const Path & path, const std::string & target) override; - - void preallocateContents(uint64_t size) override; - -private: - AutoCloseFD fd; }; /** @@ -87,19 +91,14 @@ struct RegularFileSink : FileSystemObjectSink regular = false; } - void receiveContents(std::string_view data) override - { - sink(data); - } - void createSymlink(const Path & path, const std::string & target) override { regular = false; } - void createRegularFile(const Path & path) override { } - void closeRegularFile() override { } - void isExecutable() override { } + void createRegularFile( + const Path & path, + std::function) override; }; } diff --git a/src/libutil/git.cc b/src/libutil/git.cc index 058384db0..3b8c3ebac 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -52,24 +52,22 @@ static std::string getString(Source & source, int n) return v; } - -void parse( +void parseBlob( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, - std::function hook, + bool executable, const ExperimentalFeatureSettings & xpSettings) { xpSettings.require(Xp::GitHashing); - auto type = getString(source, 5); - - if (type == "blob ") { - sink.createRegularFile(sinkPath); + sink.createRegularFile(sinkPath, [&](auto & crf) { + if (executable) + crf.isExecutable(); unsigned long long size = std::stoi(getStringUntil(source, 0)); - sink.preallocateContents(size); + crf.preallocateContents(size); unsigned long long left = size; std::string buf; @@ -79,47 +77,91 @@ void parse( checkInterrupt(); buf.resize(std::min((unsigned long long)buf.capacity(), left)); source(buf); - sink.receiveContents(buf); + crf(buf); left -= buf.size(); } + }); +} + +void parseTree( + FileSystemObjectSink & sink, + const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings) +{ + unsigned long long size = std::stoi(getStringUntil(source, 0)); + unsigned long long left = size; + + sink.createDirectory(sinkPath); + + while (left) { + std::string perms = getStringUntil(source, ' '); + left -= perms.size(); + left -= 1; + + RawMode rawMode = std::stoi(perms, 0, 8); + auto modeOpt = decodeMode(rawMode); + if (!modeOpt) + throw Error("Unknown Git permission: %o", perms); + auto mode = std::move(*modeOpt); + + std::string name = getStringUntil(source, '\0'); + left -= name.size(); + left -= 1; + + std::string hashs = getString(source, 20); + left -= 20; + + Hash hash(HashAlgorithm::SHA1); + std::copy(hashs.begin(), hashs.end(), hash.hash); + + hook(name, TreeEntry { + .mode = mode, + .hash = hash, + }); + } +} + +ObjectType parseObjectType( + Source & source, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + auto type = getString(source, 5); + + if (type == "blob ") { + return ObjectType::Blob; } else if (type == "tree ") { - unsigned long long size = std::stoi(getStringUntil(source, 0)); - unsigned long long left = size; - - sink.createDirectory(sinkPath); - - while (left) { - std::string perms = getStringUntil(source, ' '); - left -= perms.size(); - left -= 1; - - RawMode rawMode = std::stoi(perms, 0, 8); - auto modeOpt = decodeMode(rawMode); - if (!modeOpt) - throw Error("Unknown Git permission: %o", perms); - auto mode = std::move(*modeOpt); - - std::string name = getStringUntil(source, '\0'); - left -= name.size(); - left -= 1; - - std::string hashs = getString(source, 20); - left -= 20; - - Hash hash(HashAlgorithm::SHA1); - std::copy(hashs.begin(), hashs.end(), hash.hash); - - hook(name, TreeEntry { - .mode = mode, - .hash = hash, - }); - - if (mode == Mode::Executable) - sink.isExecutable(); - } + return ObjectType::Tree; } else throw Error("input doesn't look like a Git object"); } +void parse( + FileSystemObjectSink & sink, + const Path & sinkPath, + Source & source, + bool executable, + std::function hook, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + auto type = parseObjectType(source, xpSettings); + + switch (type) { + case ObjectType::Blob: + parseBlob(sink, sinkPath, source, executable, xpSettings); + break; + case ObjectType::Tree: + parseTree(sink, sinkPath, source, hook, xpSettings); + break; + default: + assert(false); + }; +} + std::optional convertMode(SourceAccessor::Type type) { @@ -135,7 +177,7 @@ std::optional convertMode(SourceAccessor::Type type) void restore(FileSystemObjectSink & sink, Source & source, std::function hook) { - parse(sink, "", source, [&](Path name, TreeEntry entry) { + parse(sink, "", source, false, [&](Path name, TreeEntry entry) { auto [accessor, from] = hook(entry.hash); auto stat = accessor->lstat(from); auto gotOpt = convertMode(stat.type); diff --git a/src/libutil/git.hh b/src/libutil/git.hh index e2fe20509..d9eb138e1 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -13,12 +13,19 @@ namespace nix::git { +enum struct ObjectType { + Blob, + Tree, + //Commit, + //Tag, +}; + using RawMode = uint32_t; enum struct Mode : RawMode { Directory = 0040000, - Executable = 0100755, Regular = 0100644, + Executable = 0100755, Symlink = 0120000, }; @@ -59,9 +66,34 @@ using Tree = std::map; */ using SinkHook = void(const Path & name, TreeEntry entry); +/** + * Parse the "blob " or "tree " prefix. + * + * @throws if prefix not recognized + */ +ObjectType parseObjectType( + Source & source, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +void parseBlob( + FileSystemObjectSink & sink, const Path & sinkPath, + Source & source, + bool executable, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +void parseTree( + FileSystemObjectSink & sink, const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Helper putting the previous three `parse*` functions together. + */ void parse( FileSystemObjectSink & sink, const Path & sinkPath, Source & source, + bool executable, std::function hook, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); diff --git a/src/libutil/memory-source-accessor.cc b/src/libutil/memory-source-accessor.cc index 78a4dd298..880fa61b7 100644 --- a/src/libutil/memory-source-accessor.cc +++ b/src/libutil/memory-source-accessor.cc @@ -134,36 +134,43 @@ void MemorySink::createDirectory(const Path & path) throw Error("file '%s' is not a directory", path); }; -void MemorySink::createRegularFile(const Path & path) +struct CreateMemoryRegularFile : CreateRegularFileSink { + File::Regular & regularFile; + + CreateMemoryRegularFile(File::Regular & r) + : regularFile(r) + { } + + void operator () (std::string_view data) override; + void isExecutable() override; + void preallocateContents(uint64_t size) override; +}; + +void MemorySink::createRegularFile(const Path & path, std::function func) { auto * f = dst.open(CanonPath{path}, File { File::Regular {} }); if (!f) throw Error("file '%s' cannot be made because some parent file is not a directory", path); - if (!(r = std::get_if(&f->raw))) + if (auto * rp = std::get_if(&f->raw)) { + CreateMemoryRegularFile crf { *rp }; + func(crf); + } else throw Error("file '%s' is not a regular file", path); } -void MemorySink::closeRegularFile() +void CreateMemoryRegularFile::isExecutable() { - r = nullptr; + regularFile.executable = true; } -void MemorySink::isExecutable() +void CreateMemoryRegularFile::preallocateContents(uint64_t len) { - assert(r); - r->executable = true; + regularFile.contents.reserve(len); } -void MemorySink::preallocateContents(uint64_t len) +void CreateMemoryRegularFile::operator () (std::string_view data) { - assert(r); - r->contents.reserve(len); -} - -void MemorySink::receiveContents(std::string_view data) -{ - assert(r); - r->contents += data; + regularFile.contents += data; } void MemorySink::createSymlink(const Path & path, const std::string & target) diff --git a/src/libutil/memory-source-accessor.hh b/src/libutil/memory-source-accessor.hh index b46c61e54..7a1990d2f 100644 --- a/src/libutil/memory-source-accessor.hh +++ b/src/libutil/memory-source-accessor.hh @@ -83,17 +83,11 @@ struct MemorySink : FileSystemObjectSink void createDirectory(const Path & path) override; - void createRegularFile(const Path & path) override; - void receiveContents(std::string_view data) override; - void isExecutable() override; - void closeRegularFile() override; + void createRegularFile( + const Path & path, + std::function) override; void createSymlink(const Path & path, const std::string & target) override; - - void preallocateContents(uint64_t size) override; - -private: - MemorySourceAccessor::File::Regular * r; }; } diff --git a/tests/unit/libutil/git.cc b/tests/unit/libutil/git.cc index 6bbcd161b..76ef86bcf 100644 --- a/tests/unit/libutil/git.cc +++ b/tests/unit/libutil/git.cc @@ -66,7 +66,8 @@ TEST_F(GitTest, blob_read) { StringSource in { encoded }; StringSink out; RegularFileSink out2 { out }; - parse(out2, "", in, [](auto &, auto) {}, mockXpSettings); + ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Blob); + parseBlob(out2, "", in, false, mockXpSettings); auto expected = readFile(goldenMaster("hello-world.bin")); @@ -121,7 +122,8 @@ TEST_F(GitTest, tree_read) { StringSource in { encoded }; NullFileSystemObjectSink out; Tree got; - parse(out, "", in, [&](auto & name, auto entry) { + ASSERT_EQ(parseObjectType(in, mockXpSettings), ObjectType::Tree); + parseTree(out, "", in, [&](auto & name, auto entry) { auto name2 = name; if (entry.mode == Mode::Directory) name2 += '/'; @@ -193,15 +195,21 @@ TEST_F(GitTest, both_roundrip) { MemorySink sinkFiles2 { files2 }; - std::function mkSinkHook; - mkSinkHook = [&](const Path prefix, const Hash & hash) { + std::function mkSinkHook; + mkSinkHook = [&](auto prefix, auto & hash, auto executable) { StringSource in { cas[hash] }; - parse(sinkFiles2, prefix, in, [&](const Path & name, const auto & entry) { - mkSinkHook(prefix + "/" + name, entry.hash); - }, mockXpSettings); + parse( + sinkFiles2, prefix, in, executable, + [&](const Path & name, const auto & entry) { + mkSinkHook( + prefix + "/" + name, + entry.hash, + entry.mode == Mode::Executable); + }, + mockXpSettings); }; - mkSinkHook("", root.hash); + mkSinkHook("", root.hash, false); ASSERT_EQ(files, files2); }