From 9ff5f6492f46b7f3342d47f138b590f09e939865 Mon Sep 17 00:00:00 2001 From: Yorick van Pelt Date: Sat, 7 Dec 2019 22:35:14 +0700 Subject: [PATCH] libarchive proof of concept --- Makefile.config.in | 1 + configure.ac | 2 + release-common.nix | 1 + src/libstore/download.cc | 2 +- src/libutil/local.mk | 2 +- src/libutil/tarfile.cc | 135 +++++++++++++++++++---- src/libutil/tarfile.hh | 3 +- src/nix-prefetch-url/nix-prefetch-url.cc | 5 +- 8 files changed, 124 insertions(+), 27 deletions(-) diff --git a/Makefile.config.in b/Makefile.config.in index 7e3b35b98..fe609ce06 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -18,6 +18,7 @@ SODIUM_LIBS = @SODIUM_LIBS@ LIBLZMA_LIBS = @LIBLZMA_LIBS@ SQLITE3_LIBS = @SQLITE3_LIBS@ LIBBROTLI_LIBS = @LIBBROTLI_LIBS@ +LIBARCHIVE_LIBS = @LIBARCHIVE_LIBS@ EDITLINE_LIBS = @EDITLINE_LIBS@ bash = @bash@ bindir = @bindir@ diff --git a/configure.ac b/configure.ac index 9dd0acd86..29835195f 100644 --- a/configure.ac +++ b/configure.ac @@ -178,6 +178,8 @@ AC_CHECK_LIB([bz2], [BZ2_bzWriteOpen], [true], [AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])]) AC_CHECK_HEADERS([bzlib.h], [true], [AC_MSG_ERROR([Nix requires libbz2, which is part of bzip2. See https://web.archive.org/web/20180624184756/http://www.bzip.org/.])]) +# Checks for libarchive +PKG_CHECK_MODULES([LIBARCHIVE], [libarchive >= 3.4.0], [CXXFLAGS="$LIBARCHIVE_CFLAGS $CXXFLAGS"]) # Look for SQLite, a required dependency. PKG_CHECK_MODULES([SQLITE3], [sqlite3 >= 3.6.19], [CXXFLAGS="$SQLITE3_CFLAGS $CXXFLAGS"]) diff --git a/release-common.nix b/release-common.nix index dd5f939d9..f8c93f76e 100644 --- a/release-common.nix +++ b/release-common.nix @@ -49,6 +49,7 @@ rec { [ curl bzip2 xz brotli editline openssl pkgconfig sqlite boehmgc + libarchive boost nlohmann_json rustc cargo diff --git a/src/libstore/download.cc b/src/libstore/download.cc index 61e88c5c1..c7c1b93ad 100644 --- a/src/libstore/download.cc +++ b/src/libstore/download.cc @@ -907,7 +907,7 @@ CachedDownloadResult Downloader::downloadCached( printInfo("unpacking '%s'...", url); Path tmpDir = createTempDir(); AutoDelete autoDelete(tmpDir, true); - unpackTarfile(store->toRealPath(storePath), tmpDir, baseNameOf(url)); + unpackTarfile(store->toRealPath(storePath), tmpDir); auto members = readDirectory(tmpDir); if (members.size() != 1) throw nix::Error("tarball '%s' contains an unexpected number of top-level files", url); diff --git a/src/libutil/local.mk b/src/libutil/local.mk index 35c1f6c13..16c1fa03f 100644 --- a/src/libutil/local.mk +++ b/src/libutil/local.mk @@ -6,6 +6,6 @@ libutil_DIR := $(d) libutil_SOURCES := $(wildcard $(d)/*.cc) -libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(BOOST_LDFLAGS) -lboost_context +libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) $(LIBARCHIVE_LIBS) $(BOOST_LDFLAGS) -lboost_context libutil_LIBS = libnixrust diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 2cc7793fd..ab30002dd 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -1,5 +1,8 @@ #include "rust-ffi.hh" #include "compression.hh" +#include +#include +#include "finally.hh" extern "C" { rust::Result> * @@ -8,29 +11,123 @@ extern "C" { namespace nix { +std::shared_ptr archive_read_ptr() { + return std::shared_ptr(archive_read_new(), + [](auto p) { + archive_read_close(p); + archive_read_free(p); + }); +} +void archive_read_open_source(std::shared_ptr a, Source& s, unsigned int bufsize = 1024) { + std::shared_ptr buffer((unsigned char*)malloc(bufsize), [](auto p) { free(p); }); + typedef struct { + decltype(buffer) buf; + Source& src; + unsigned int bs; + } St; + St* state = new St({buffer, s, bufsize}); + if (archive_read_open(a.get(), state, + NULL /* open */, + ([] (struct archive*, void* sptr, const void** buf) -> long int { + St& s = *(static_cast(sptr)); + *buf = s.buf.get(); + try { + return s.src.read(s.buf.get(), s.bs); + } catch (EndOfFile &) { + return 0; + } + /* TODO: I don't know what happens if anything else is thrown here */ + }), [] (struct archive*, void* sptr) { + delete static_cast(sptr); + return ARCHIVE_OK; + })) { + throw Error("archive is corrupt (%s)", archive_error_string(a.get())); + } +} +std::shared_ptr archive_write_ptr() { + return std::shared_ptr(archive_write_disk_new(), + [](auto p) { + archive_write_close(p); + archive_write_free(p); + }); +} +static void copy_data(std::shared_ptr ar, std::shared_ptr aw) +{ + int r; + const void *buff; + size_t size; + la_int64_t offset; + + for (;;) { + r = archive_read_data_block(ar.get(), &buff, &size, &offset); + if (r == ARCHIVE_EOF) return; + if (r < ARCHIVE_OK) { + throw Error("archive is corrupt (%s)", archive_error_string(ar.get())); + } + r = archive_write_data_block(aw.get(), buff, size, offset); + if (r < ARCHIVE_OK) { + throw Error("could not write archive output (%s)", archive_error_string(aw.get())); + } + } +} + +static void extract_archive(std::shared_ptr a, const Path & destDir) { + char * cwd = getcwd(0, 0); + if (!cwd) throw SysError("getting current directory"); + Finally freeCwd([&]() { free(cwd); }); + int r = chdir(destDir.c_str()); + if (r != 0) throw SysError("setting directory to tar output path"); + struct archive_entry *entry; + r = archive_read_next_header(a.get(), &entry); + if (r != ARCHIVE_OK) { + throw Error("archive is corrupt (%s)", archive_error_string(a.get())); + } + int flags = 0; + auto ext = archive_write_ptr(); + flags |= ARCHIVE_EXTRACT_PERM; + flags |= ARCHIVE_EXTRACT_FFLAGS; + archive_write_disk_set_options(ext.get(), flags); + archive_write_disk_set_standard_lookup(ext.get()); + for(;;) { + r = archive_read_next_header(a.get(), &entry); + if (r == ARCHIVE_EOF) break; + if (r == ARCHIVE_WARN) { + std::cerr << "warning: " << archive_error_string(a.get()); + } else if (r < ARCHIVE_WARN) { + throw Error("archive is corrupt (%s)", archive_error_string(a.get())); + } + r = archive_write_header(ext.get(), entry); + if (r != ARCHIVE_OK) { + throw Error("could not write archive output (%s)", archive_error_string(ext.get())); + } + if (archive_entry_size(entry) > 0) { + copy_data(a, ext); + } + archive_write_finish_entry(ext.get()); + } + r = chdir(cwd); + if (r != 0) throw SysError("resetting directory after archive extraction"); +} void unpackTarfile(Source & source, const Path & destDir) { - rust::Source source2(source); - rust::CBox(unpack_tarfile(source2, destDir))->unwrap(); + auto a = archive_read_ptr(); + archive_read_support_filter_all(a.get()); + archive_read_support_format_all(a.get()); + archive_read_open_source(a, source); + createDirs(destDir); + extract_archive(a, destDir); } - -void unpackTarfile(const Path & tarFile, const Path & destDir, - std::optional baseName) +void unpackTarfile(const Path & tarFile, const Path & destDir) { - if (!baseName) baseName = baseNameOf(tarFile); - - auto source = sinkToSource([&](Sink & sink) { - // FIXME: look at first few bytes to determine compression type. - auto decompressor = - // FIXME: add .gz support - hasSuffix(*baseName, ".bz2") ? makeDecompressionSink("bzip2", sink) : - hasSuffix(*baseName, ".xz") ? makeDecompressionSink("xz", sink) : - makeDecompressionSink("none", sink); - readFile(tarFile, *decompressor); - decompressor->finish(); - }); - - unpackTarfile(*source, destDir); + auto a = archive_read_ptr(); + archive_read_support_filter_all(a.get()); + archive_read_support_format_all(a.get()); + int r = archive_read_open_filename(a.get(), tarFile.c_str(), 16384); + if (r != ARCHIVE_OK) { + throw Error("archive is corrupt (%s)", archive_error_string(a.get())); + } + createDirs(destDir); + extract_archive(a, destDir); } } diff --git a/src/libutil/tarfile.hh b/src/libutil/tarfile.hh index ce0911e2a..89a024f1d 100644 --- a/src/libutil/tarfile.hh +++ b/src/libutil/tarfile.hh @@ -4,7 +4,6 @@ namespace nix { void unpackTarfile(Source & source, const Path & destDir); -void unpackTarfile(const Path & tarFile, const Path & destDir, - std::optional baseName = {}); +void unpackTarfile(const Path & tarFile, const Path & destDir); } diff --git a/src/nix-prefetch-url/nix-prefetch-url.cc b/src/nix-prefetch-url/nix-prefetch-url.cc index 78c883833..48714446b 100644 --- a/src/nix-prefetch-url/nix-prefetch-url.cc +++ b/src/nix-prefetch-url/nix-prefetch-url.cc @@ -190,10 +190,7 @@ static int _main(int argc, char * * argv) printInfo("unpacking..."); Path unpacked = (Path) tmpDir + "/unpacked"; createDirs(unpacked); - if (hasSuffix(baseNameOf(uri), ".zip")) - runProgram("unzip", true, {"-qq", tmpFile, "-d", unpacked}); - else - unpackTarfile(tmpFile, unpacked, baseNameOf(uri)); + unpackTarfile(tmpFile, unpacked); /* If the archive unpacks to a single file/directory, then use that as the top-level. */