From 9dd2b8ac7b8d82df8c1f3f36efb683175fd6ecee Mon Sep 17 00:00:00 2001 From: Will Dietz Date: Fri, 29 Dec 2017 14:42:14 -0600 Subject: [PATCH] use libbrotli directly when available * Look for both 'brotli' and 'bro' as external command, since upstream has renamed it in newer versions. If neither are found, current runtime behavior is preserved: try to find 'bro' on PATH. * Limit amount handed to BrotliEncoderCompressStream to ensure interrupts are processed in a timely manner. Testing shows negligible performance impact. (Other compression sinks don't seem to require this) --- Makefile.config.in | 4 +- configure.ac | 9 +- src/libutil/compression.cc | 181 ++++++++++++++++++++++++++++++++++--- src/libutil/local.mk | 4 +- tests/brotli.sh | 28 ++++++ tests/common.sh.in | 1 + tests/local.mk | 3 +- 7 files changed, 212 insertions(+), 18 deletions(-) create mode 100644 tests/brotli.sh diff --git a/Makefile.config.in b/Makefile.config.in index 45a70cd6..fab82194 100644 --- a/Makefile.config.in +++ b/Makefile.config.in @@ -6,6 +6,7 @@ CXXFLAGS = @CXXFLAGS@ ENABLE_S3 = @ENABLE_S3@ HAVE_SODIUM = @HAVE_SODIUM@ HAVE_READLINE = @HAVE_READLINE@ +HAVE_BROTLI = @HAVE_BROTLI@ LIBCURL_LIBS = @LIBCURL_LIBS@ OPENSSL_LIBS = @OPENSSL_LIBS@ PACKAGE_NAME = @PACKAGE_NAME@ @@ -13,9 +14,10 @@ PACKAGE_VERSION = @PACKAGE_VERSION@ SODIUM_LIBS = @SODIUM_LIBS@ LIBLZMA_LIBS = @LIBLZMA_LIBS@ SQLITE3_LIBS = @SQLITE3_LIBS@ +LIBBROTLI_LIBS = @LIBBROTLI_LIBS@ bash = @bash@ bindir = @bindir@ -bro = @bro@ +brotli = @brotli@ lsof = @lsof@ datadir = @datadir@ datarootdir = @datarootdir@ diff --git a/configure.ac b/configure.ac index c395b871..9db92ce9 100644 --- a/configure.ac +++ b/configure.ac @@ -127,7 +127,7 @@ NEED_PROG(gzip, gzip) NEED_PROG(xz, xz) AC_PATH_PROG(dot, dot) AC_PATH_PROG(pv, pv, pv) -AC_PATH_PROG(bro, bro, bro) +AC_PATH_PROGS(brotli, brotli bro, bro) AC_PATH_PROG(lsof, lsof, lsof) @@ -176,6 +176,13 @@ AC_SUBST(HAVE_SODIUM, [$have_sodium]) PKG_CHECK_MODULES([LIBLZMA], [liblzma], [CXXFLAGS="$LIBLZMA_CFLAGS $CXXFLAGS"]) +# Look for libbrotli{enc,dec}, optional dependencies +PKG_CHECK_MODULES([LIBBROTLI], [libbrotlienc libbrotlidec], + [AC_DEFINE([HAVE_BROTLI], [1], [Whether to use libbrotli.]) + CXXFLAGS="$LIBBROTLI_CFLAGS $CXXFLAGS"] + have_brotli=1], [have_brotli=]) +AC_SUBST(HAVE_BROTLI, [$have_brotli]) + # Look for libseccomp, required for Linux sandboxing. if test "$sys_name" = linux; then PKG_CHECK_MODULES([LIBSECCOMP], [libseccomp], diff --git a/src/libutil/compression.cc b/src/libutil/compression.cc index 2b3dff3a..5e2631ba 100644 --- a/src/libutil/compression.cc +++ b/src/libutil/compression.cc @@ -7,6 +7,11 @@ #include #include +#if HAVE_BROTLI +#include +#include +#endif // HAVE_BROTLI + #include namespace nix { @@ -94,8 +99,56 @@ static ref decompressBzip2(const std::string & in) static ref decompressBrotli(const std::string & in) { - // FIXME: use libbrotli - return make_ref(runProgram(BRO, true, {"-d"}, {in})); +#if !HAVE_BROTLI + return make_ref(runProgram(BROTLI, true, {"-d"}, {in})); +#else + auto *s = BrotliDecoderCreateInstance(nullptr, nullptr, nullptr); + if (!s) + throw CompressionError("unable to initialize brotli decoder"); + + Finally free([s]() { BrotliDecoderDestroyInstance(s); }); + + uint8_t outbuf[BUFSIZ]; + ref res = make_ref(); + const uint8_t *next_in = (uint8_t *)in.c_str(); + size_t avail_in = in.size(); + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (true) { + checkInterrupt(); + + auto ret = BrotliDecoderDecompressStream(s, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr); + + switch (ret) { + case BROTLI_DECODER_RESULT_ERROR: + throw CompressionError("error while decompressing brotli file"); + case BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT: + throw CompressionError("incomplete or corrupt brotli file"); + case BROTLI_DECODER_RESULT_SUCCESS: + if (avail_in != 0) + throw CompressionError("unexpected input after brotli decompression"); + break; + case BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT: + // I'm not sure if this can happen, but abort if this happens with empty buffer + if (avail_out == sizeof(outbuf)) + throw CompressionError("brotli decompression requires larger buffer"); + break; + } + + // Always ensure we have full buffer for next invocation + if (avail_out < sizeof(outbuf)) { + res->append((char*)outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + + if (ret == BROTLI_DECODER_RESULT_SUCCESS) return res; + } +#endif // HAVE_BROTLI } ref compress(const std::string & method, const std::string & in) @@ -270,25 +323,22 @@ struct BzipSink : CompressionSink } }; -struct BrotliSink : CompressionSink +struct LambdaCompressionSink : CompressionSink { Sink & nextSink; std::string data; - - BrotliSink(Sink & nextSink) : nextSink(nextSink) + using CompressFnTy = std::function; + CompressFnTy compressFn; + LambdaCompressionSink(Sink& nextSink, CompressFnTy compressFn) + : nextSink(nextSink) + , compressFn(std::move(compressFn)) { - } - - ~BrotliSink() - { - } - - // FIXME: use libbrotli + }; void finish() override { flush(); - nextSink(runProgram(BRO, true, {}, data)); + nextSink(compressFn(data)); } void write(const unsigned char * data, size_t len) override @@ -298,6 +348,107 @@ struct BrotliSink : CompressionSink } }; +struct BrotliCmdSink : LambdaCompressionSink +{ + BrotliCmdSink(Sink& nextSink) + : LambdaCompressionSink(nextSink, [](const std::string& data) { + return runProgram(BROTLI, true, {}, data); + }) + { + } +}; + +#if HAVE_BROTLI +struct BrotliSink : CompressionSink +{ + Sink & nextSink; + uint8_t outbuf[BUFSIZ]; + BrotliEncoderState *state; + bool finished = false; + + BrotliSink(Sink & nextSink) : nextSink(nextSink) + { + state = BrotliEncoderCreateInstance(nullptr, nullptr, nullptr); + if (!state) + throw CompressionError("unable to initialise brotli encoder"); + } + + ~BrotliSink() + { + BrotliEncoderDestroyInstance(state); + } + + void finish() override + { + flush(); + assert(!finished); + + const uint8_t *next_in = nullptr; + size_t avail_in = 0; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + while (!finished) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_FINISH, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while finishing brotli file"); + + finished = BrotliEncoderIsFinished(state); + if (avail_out == 0 || finished) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } + } + + void write(const unsigned char * data, size_t len) override + { + assert(!finished); + + // Don't feed brotli too much at once + const size_t CHUNK_SIZE = sizeof(outbuf) << 2; + while (len) { + size_t n = std::min(CHUNK_SIZE, len); + writeInternal(data, n); + data += n; + len -= n; + } + } + private: + void writeInternal(const unsigned char * data, size_t len) + { + assert(!finished); + + const uint8_t *next_in = data; + size_t avail_in = len; + uint8_t *next_out = outbuf; + size_t avail_out = sizeof(outbuf); + + while (avail_in > 0) { + checkInterrupt(); + + if (!BrotliEncoderCompressStream(state, + BROTLI_OPERATION_PROCESS, + &avail_in, &next_in, + &avail_out, &next_out, + nullptr)) + throw CompressionError("error while compressing brotli file"); + + if (avail_out < sizeof(outbuf) || avail_in == 0) { + nextSink(outbuf, sizeof(outbuf) - avail_out); + next_out = outbuf; + avail_out = sizeof(outbuf); + } + } + } +}; +#endif // HAVE_BROTLI + ref makeCompressionSink(const std::string & method, Sink & nextSink) { if (method == "none") @@ -307,7 +458,11 @@ ref makeCompressionSink(const std::string & method, Sink & next else if (method == "bzip2") return make_ref(nextSink); else if (method == "br") +#if HAVE_BROTLI return make_ref(nextSink); +#else + return make_ref(nextSink); +#endif else throw UnknownCompressionMethod(format("unknown compression method '%s'") % method); } diff --git a/src/libutil/local.mk b/src/libutil/local.mk index 0721b21c..5fc2aab5 100644 --- a/src/libutil/local.mk +++ b/src/libutil/local.mk @@ -6,8 +6,8 @@ libutil_DIR := $(d) libutil_SOURCES := $(wildcard $(d)/*.cc) -libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) +libutil_LDFLAGS = $(LIBLZMA_LIBS) -lbz2 -pthread $(OPENSSL_LIBS) $(LIBBROTLI_LIBS) libutil_LIBS = libformat -libutil_CXXFLAGS = -DBRO=\"$(bro)\" +libutil_CXXFLAGS = -DBROTLI=\"$(brotli)\" diff --git a/tests/brotli.sh b/tests/brotli.sh new file mode 100644 index 00000000..645dd421 --- /dev/null +++ b/tests/brotli.sh @@ -0,0 +1,28 @@ +source common.sh + + +# Only test if we found brotli libraries +# (CLI tool is likely unavailable if libraries are missing) +if [ -n "$HAVE_BROTLI" ]; then + +clearStore +clearCache + +cacheURI="file://$cacheDir?compression=br" + +outPath=$(nix-build dependencies.nix --no-out-link) + +nix copy --to $cacheURI $outPath + +HASH=$(nix hash-path $outPath) + +clearStore +clearCacheCache + +nix copy --from $cacheURI $outPath --no-check-sigs + +HASH2=$(nix hash-path $outPath) + +[[ $HASH = $HASH2 ]] + +fi # HAVE_BROTLI diff --git a/tests/common.sh.in b/tests/common.sh.in index 09f29491..83643d8b 100644 --- a/tests/common.sh.in +++ b/tests/common.sh.in @@ -32,6 +32,7 @@ export xmllint="@xmllint@" export SHELL="@bash@" export PAGER=cat export HAVE_SODIUM="@HAVE_SODIUM@" +export HAVE_BROTLI="@HAVE_BROTLI@" export version=@PACKAGE_VERSION@ export system=@system@ diff --git a/tests/local.mk b/tests/local.mk index baf74224..83154228 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -19,7 +19,8 @@ nix_tests = \ fetchGit.sh \ fetchMercurial.sh \ signing.sh \ - run.sh + run.sh \ + brotli.sh # parallel.sh install-tests += $(foreach x, $(nix_tests), tests/$(x))