2011-12-21 19:00:10 +01:00
|
|
|
/***
|
|
|
|
This file is part of systemd.
|
|
|
|
|
|
|
|
Copyright 2011 Lennart Poettering
|
|
|
|
|
|
|
|
systemd is free software; you can redistribute it and/or modify it
|
2012-04-12 00:20:58 +02:00
|
|
|
under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or
|
2011-12-21 19:00:10 +01:00
|
|
|
(at your option) any later version.
|
|
|
|
|
|
|
|
systemd is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
2012-04-12 00:20:58 +02:00
|
|
|
Lesser General Public License for more details.
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2012-04-12 00:20:58 +02:00
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
2011-12-21 19:00:10 +01:00
|
|
|
along with systemd; If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
***/
|
|
|
|
|
2016-02-29 22:42:43 +01:00
|
|
|
#include <inttypes.h>
|
2011-12-21 19:00:10 +01:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
#include <sys/mman.h>
|
2015-10-24 22:58:24 +02:00
|
|
|
#include <unistd.h>
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
2015-10-25 13:14:12 +01:00
|
|
|
#include <lzma.h>
|
2014-07-04 04:42:22 +02:00
|
|
|
#endif
|
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
2015-10-25 13:14:12 +01:00
|
|
|
#include <lz4.h>
|
|
|
|
#include <lz4frame.h>
|
2014-07-04 04:42:22 +02:00
|
|
|
#endif
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2015-10-27 03:01:06 +01:00
|
|
|
#include "alloc-util.h"
|
2011-12-21 19:00:10 +01:00
|
|
|
#include "compress.h"
|
2015-10-25 13:14:12 +01:00
|
|
|
#include "fd-util.h"
|
2015-10-25 14:08:25 +01:00
|
|
|
#include "io-util.h"
|
2015-10-24 22:58:24 +02:00
|
|
|
#include "journal-def.h"
|
2014-06-25 03:24:46 +02:00
|
|
|
#include "macro.h"
|
2014-07-04 04:42:22 +02:00
|
|
|
#include "sparse-endian.h"
|
2015-10-26 22:31:05 +01:00
|
|
|
#include "string-table.h"
|
2015-10-24 22:58:24 +02:00
|
|
|
#include "string-util.h"
|
|
|
|
#include "util.h"
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_compressionContext_t, LZ4F_freeCompressionContext);
|
|
|
|
DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext);
|
|
|
|
#endif
|
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
#define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t))
|
|
|
|
|
|
|
|
static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = {
|
|
|
|
[OBJECT_COMPRESSED_XZ] = "XZ",
|
|
|
|
[OBJECT_COMPRESSED_LZ4] = "LZ4",
|
|
|
|
};
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
DEFINE_STRING_TABLE_LOOKUP(object_compressed, int);
|
|
|
|
|
2015-12-13 19:39:12 +01:00
|
|
|
int compress_blob_xz(const void *src, uint64_t src_size,
|
|
|
|
void *dst, size_t dst_alloc_size, size_t *dst_size) {
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
journal/compress: improve xz compression performance
The new lzma2 compression options at the top of compress_blob_xz are
equivalent to using preset "0", exept for using a 1 MiB dictionary
(the same as preset "1"). This makes the memory usage at most 7.5 MiB
in the compressor, and 1 MiB in the decompressor, instead of the
previous 92 MiB in the compressor and 8 MiB in the decompressor.
According to test-compress-benchmark this commit makes XZ compression
20 times faster, with no increase in compressed data size.
Using more realistic test data (an ELF binary rather than repeating
ASCII letters 'a' through 'z' in order) it only provides a factor 10
speedup, and at a cost if a 10% increase in compressed data size.
But that is still a worthwhile trade-off.
According to test-compress-benchmark XZ compression is still 25 times
slower than LZ4, but the compressed data is one eighth the size.
Using more realistic test data XZ compression is only 18 times slower
than LZ4, and the compressed data is only one quarter the size.
$ ./test-compress-benchmark
XZ: compressed & decompressed 2535300963 bytes in 42.30s (57.15MiB/s), mean compresion 99.95%, skipped 3570 bytes
LZ4: compressed & decompressed 2535303543 bytes in 1.60s (1510.60MiB/s), mean compresion 99.60%, skipped 990 bytes
2014-07-08 18:29:46 +02:00
|
|
|
static const lzma_options_lzma opt = {
|
|
|
|
1u << 20u, NULL, 0, LZMA_LC_DEFAULT, LZMA_LP_DEFAULT,
|
2015-10-24 15:08:15 +02:00
|
|
|
LZMA_PB_DEFAULT, LZMA_MODE_FAST, 128, LZMA_MF_HC3, 4
|
|
|
|
};
|
|
|
|
static const lzma_filter filters[] = {
|
|
|
|
{ LZMA_FILTER_LZMA2, (lzma_options_lzma*) &opt },
|
|
|
|
{ LZMA_VLI_UNKNOWN, NULL }
|
journal/compress: improve xz compression performance
The new lzma2 compression options at the top of compress_blob_xz are
equivalent to using preset "0", exept for using a 1 MiB dictionary
(the same as preset "1"). This makes the memory usage at most 7.5 MiB
in the compressor, and 1 MiB in the decompressor, instead of the
previous 92 MiB in the compressor and 8 MiB in the decompressor.
According to test-compress-benchmark this commit makes XZ compression
20 times faster, with no increase in compressed data size.
Using more realistic test data (an ELF binary rather than repeating
ASCII letters 'a' through 'z' in order) it only provides a factor 10
speedup, and at a cost if a 10% increase in compressed data size.
But that is still a worthwhile trade-off.
According to test-compress-benchmark XZ compression is still 25 times
slower than LZ4, but the compressed data is one eighth the size.
Using more realistic test data XZ compression is only 18 times slower
than LZ4, and the compressed data is only one quarter the size.
$ ./test-compress-benchmark
XZ: compressed & decompressed 2535300963 bytes in 42.30s (57.15MiB/s), mean compresion 99.95%, skipped 3570 bytes
LZ4: compressed & decompressed 2535303543 bytes in 1.60s (1510.60MiB/s), mean compresion 99.60%, skipped 990 bytes
2014-07-08 18:29:46 +02:00
|
|
|
};
|
2011-12-21 19:00:10 +01:00
|
|
|
lzma_ret ret;
|
2014-06-25 03:24:09 +02:00
|
|
|
size_t out_pos = 0;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(dst);
|
2015-12-13 19:39:12 +01:00
|
|
|
assert(dst_alloc_size > 0);
|
2011-12-21 19:00:10 +01:00
|
|
|
assert(dst_size);
|
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
/* Returns < 0 if we couldn't compress the data or the
|
2011-12-21 19:00:10 +01:00
|
|
|
* compressed result is longer than the original */
|
|
|
|
|
journal/compress: improve xz compression performance
The new lzma2 compression options at the top of compress_blob_xz are
equivalent to using preset "0", exept for using a 1 MiB dictionary
(the same as preset "1"). This makes the memory usage at most 7.5 MiB
in the compressor, and 1 MiB in the decompressor, instead of the
previous 92 MiB in the compressor and 8 MiB in the decompressor.
According to test-compress-benchmark this commit makes XZ compression
20 times faster, with no increase in compressed data size.
Using more realistic test data (an ELF binary rather than repeating
ASCII letters 'a' through 'z' in order) it only provides a factor 10
speedup, and at a cost if a 10% increase in compressed data size.
But that is still a worthwhile trade-off.
According to test-compress-benchmark XZ compression is still 25 times
slower than LZ4, but the compressed data is one eighth the size.
Using more realistic test data XZ compression is only 18 times slower
than LZ4, and the compressed data is only one quarter the size.
$ ./test-compress-benchmark
XZ: compressed & decompressed 2535300963 bytes in 42.30s (57.15MiB/s), mean compresion 99.95%, skipped 3570 bytes
LZ4: compressed & decompressed 2535303543 bytes in 1.60s (1510.60MiB/s), mean compresion 99.60%, skipped 990 bytes
2014-07-08 18:29:46 +02:00
|
|
|
if (src_size < 80)
|
|
|
|
return -ENOBUFS;
|
|
|
|
|
|
|
|
ret = lzma_stream_buffer_encode((lzma_filter*) filters, LZMA_CHECK_NONE, NULL,
|
2015-12-13 19:39:12 +01:00
|
|
|
src, src_size, dst, &out_pos, dst_alloc_size);
|
2011-12-21 19:00:10 +01:00
|
|
|
if (ret != LZMA_OK)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -ENOBUFS;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-06-25 03:24:09 +02:00
|
|
|
*dst_size = out_pos;
|
2014-07-04 04:42:22 +02:00
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
2011-12-21 19:00:10 +01:00
|
|
|
}
|
|
|
|
|
2015-12-13 19:39:12 +01:00
|
|
|
int compress_blob_lz4(const void *src, uint64_t src_size,
|
|
|
|
void *dst, size_t dst_alloc_size, size_t *dst_size) {
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
2014-07-04 04:42:22 +02:00
|
|
|
int r;
|
|
|
|
|
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(dst);
|
2015-12-13 19:39:12 +01:00
|
|
|
assert(dst_alloc_size > 0);
|
2014-07-04 04:42:22 +02:00
|
|
|
assert(dst_size);
|
|
|
|
|
|
|
|
/* Returns < 0 if we couldn't compress the data or the
|
|
|
|
* compressed result is longer than the original */
|
|
|
|
|
|
|
|
if (src_size < 9)
|
|
|
|
return -ENOBUFS;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2016-12-10 19:52:49 +01:00
|
|
|
#if LZ4_VERSION_NUMBER >= 10700
|
|
|
|
r = LZ4_compress_default(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8);
|
|
|
|
#else
|
compress: fix gcc warnings about void* used in arithmetic
src/journal/compress.c: In function ‘compress_blob_lz4’:
src/journal/compress.c:115:49: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8);
^
src/journal/compress.c: In function ‘decompress_blob_xz’:
src/journal/compress.c:179:35: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *dst + used;
^
src/journal/compress.c: In function ‘decompress_blob_lz4’:
src/journal/compress.c:218:37: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe(src + 8, out, src_size - 8, size);
^
src/journal/compress.c: In function ‘decompress_startswith_xz’:
src/journal/compress.c:294:38: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c:294:53: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c: In function ‘decompress_startswith_lz4’:
src/journal/compress.c:327:45: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8,
^
LZ4 and XZ functions use char* and unsigned char*, respectively,
so keep void* in our internal APIs and add casts.
2016-04-03 00:51:16 +02:00
|
|
|
r = LZ4_compress_limitedOutput(src, (char*)dst + 8, src_size, (int) dst_alloc_size - 8);
|
2016-12-10 19:52:49 +01:00
|
|
|
#endif
|
2014-07-04 04:42:22 +02:00
|
|
|
if (r <= 0)
|
|
|
|
return -ENOBUFS;
|
|
|
|
|
|
|
|
*(le64_t*) dst = htole64(src_size);
|
|
|
|
*dst_size = r + 8;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int decompress_blob_xz(const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
2014-07-05 01:53:58 +02:00
|
|
|
_cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
|
2011-12-21 19:00:10 +01:00
|
|
|
lzma_ret ret;
|
2014-08-04 04:50:00 +02:00
|
|
|
size_t space;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(dst);
|
|
|
|
assert(dst_alloc_size);
|
|
|
|
assert(dst_size);
|
|
|
|
assert(*dst_alloc_size == 0 || *dst);
|
|
|
|
|
|
|
|
ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
|
|
|
|
if (ret != LZMA_OK)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-08-04 04:50:00 +02:00
|
|
|
space = MIN(src_size * 2, dst_max ?: (size_t) -1);
|
2014-07-05 01:53:58 +02:00
|
|
|
if (!greedy_realloc(dst, dst_alloc_size, space, 1))
|
2014-07-19 03:44:36 +02:00
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
s.next_in = src;
|
|
|
|
s.avail_in = src_size;
|
|
|
|
|
|
|
|
s.next_out = *dst;
|
2012-11-21 00:28:00 +01:00
|
|
|
s.avail_out = space;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
for (;;) {
|
2014-08-04 04:50:00 +02:00
|
|
|
size_t used;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
ret = lzma_code(&s, LZMA_FINISH);
|
|
|
|
|
|
|
|
if (ret == LZMA_STREAM_END)
|
|
|
|
break;
|
2014-07-04 04:42:22 +02:00
|
|
|
else if (ret != LZMA_OK)
|
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2012-11-21 00:28:00 +01:00
|
|
|
if (dst_max > 0 && (space - s.avail_out) >= dst_max)
|
|
|
|
break;
|
2014-07-04 04:42:22 +02:00
|
|
|
else if (dst_max > 0 && space == dst_max)
|
|
|
|
return -ENOBUFS;
|
2012-11-21 00:28:00 +01:00
|
|
|
|
2014-07-05 01:53:58 +02:00
|
|
|
used = space - s.avail_out;
|
2014-08-04 04:50:00 +02:00
|
|
|
space = MIN(2 * space, dst_max ?: (size_t) -1);
|
2014-07-05 01:53:58 +02:00
|
|
|
if (!greedy_realloc(dst, dst_alloc_size, space, 1))
|
2014-07-19 03:44:36 +02:00
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-07-05 01:53:58 +02:00
|
|
|
s.avail_out = space - used;
|
compress: fix gcc warnings about void* used in arithmetic
src/journal/compress.c: In function ‘compress_blob_lz4’:
src/journal/compress.c:115:49: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8);
^
src/journal/compress.c: In function ‘decompress_blob_xz’:
src/journal/compress.c:179:35: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *dst + used;
^
src/journal/compress.c: In function ‘decompress_blob_lz4’:
src/journal/compress.c:218:37: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe(src + 8, out, src_size - 8, size);
^
src/journal/compress.c: In function ‘decompress_startswith_xz’:
src/journal/compress.c:294:38: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c:294:53: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c: In function ‘decompress_startswith_lz4’:
src/journal/compress.c:327:45: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8,
^
LZ4 and XZ functions use char* and unsigned char*, respectively,
so keep void* in our internal APIs and add casts.
2016-04-03 00:51:16 +02:00
|
|
|
s.next_out = *(uint8_t**)dst + used;
|
2011-12-21 19:00:10 +01:00
|
|
|
}
|
|
|
|
|
2012-11-21 00:28:00 +01:00
|
|
|
*dst_size = space - s.avail_out;
|
2014-07-04 04:42:22 +02:00
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
2011-12-21 19:00:10 +01:00
|
|
|
}
|
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
int decompress_blob_lz4(const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
2014-07-04 04:42:22 +02:00
|
|
|
char* out;
|
2014-08-04 04:50:00 +02:00
|
|
|
int r, size; /* LZ4 uses int for size */
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(dst);
|
|
|
|
assert(dst_alloc_size);
|
|
|
|
assert(dst_size);
|
|
|
|
assert(*dst_alloc_size == 0 || *dst);
|
|
|
|
|
|
|
|
if (src_size <= 8)
|
|
|
|
return -EBADMSG;
|
|
|
|
|
|
|
|
size = le64toh( *(le64_t*)src );
|
2015-12-02 04:53:23 +01:00
|
|
|
if (size < 0 || (unsigned) size != le64toh(*(le64_t*)src))
|
2014-08-04 04:50:00 +02:00
|
|
|
return -EFBIG;
|
|
|
|
if ((size_t) size > *dst_alloc_size) {
|
2014-07-04 04:42:22 +02:00
|
|
|
out = realloc(*dst, size);
|
|
|
|
if (!out)
|
|
|
|
return -ENOMEM;
|
|
|
|
*dst = out;
|
|
|
|
*dst_alloc_size = size;
|
|
|
|
} else
|
|
|
|
out = *dst;
|
|
|
|
|
compress: fix gcc warnings about void* used in arithmetic
src/journal/compress.c: In function ‘compress_blob_lz4’:
src/journal/compress.c:115:49: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8);
^
src/journal/compress.c: In function ‘decompress_blob_xz’:
src/journal/compress.c:179:35: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *dst + used;
^
src/journal/compress.c: In function ‘decompress_blob_lz4’:
src/journal/compress.c:218:37: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe(src + 8, out, src_size - 8, size);
^
src/journal/compress.c: In function ‘decompress_startswith_xz’:
src/journal/compress.c:294:38: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c:294:53: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c: In function ‘decompress_startswith_lz4’:
src/journal/compress.c:327:45: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8,
^
LZ4 and XZ functions use char* and unsigned char*, respectively,
so keep void* in our internal APIs and add casts.
2016-04-03 00:51:16 +02:00
|
|
|
r = LZ4_decompress_safe((char*)src + 8, out, src_size - 8, size);
|
2014-08-04 04:50:00 +02:00
|
|
|
if (r < 0 || r != size)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EBADMSG;
|
|
|
|
|
|
|
|
*dst_size = size;
|
|
|
|
return 0;
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
int decompress_blob(int compression,
|
|
|
|
const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **dst, size_t *dst_alloc_size, size_t* dst_size, size_t dst_max) {
|
2014-07-04 04:42:22 +02:00
|
|
|
if (compression == OBJECT_COMPRESSED_XZ)
|
|
|
|
return decompress_blob_xz(src, src_size,
|
|
|
|
dst, dst_alloc_size, dst_size, dst_max);
|
|
|
|
else if (compression == OBJECT_COMPRESSED_LZ4)
|
|
|
|
return decompress_blob_lz4(src, src_size,
|
|
|
|
dst, dst_alloc_size, dst_size, dst_max);
|
|
|
|
else
|
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int decompress_startswith_xz(const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **buffer, size_t *buffer_size,
|
|
|
|
const void *prefix, size_t prefix_len,
|
2014-07-04 04:42:22 +02:00
|
|
|
uint8_t extra) {
|
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
2014-07-05 01:53:58 +02:00
|
|
|
_cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
|
2011-12-21 19:00:10 +01:00
|
|
|
lzma_ret ret;
|
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
/* Checks whether the decompressed blob starts with the
|
2011-12-21 19:00:10 +01:00
|
|
|
* mentioned prefix. The byte extra needs to follow the
|
|
|
|
* prefix */
|
|
|
|
|
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(buffer);
|
|
|
|
assert(buffer_size);
|
|
|
|
assert(prefix);
|
|
|
|
assert(*buffer_size == 0 || *buffer);
|
|
|
|
|
|
|
|
ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
|
|
|
|
if (ret != LZMA_OK)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EBADMSG;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
|
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
s.next_in = src;
|
|
|
|
s.avail_in = src_size;
|
|
|
|
|
|
|
|
s.next_out = *buffer;
|
|
|
|
s.avail_out = *buffer_size;
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
ret = lzma_code(&s, LZMA_FINISH);
|
|
|
|
|
|
|
|
if (ret != LZMA_STREAM_END && ret != LZMA_OK)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EBADMSG;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
2014-07-05 01:53:58 +02:00
|
|
|
if (*buffer_size - s.avail_out >= prefix_len + 1)
|
|
|
|
return memcmp(*buffer, prefix, prefix_len) == 0 &&
|
|
|
|
((const uint8_t*) *buffer)[prefix_len] == extra;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
if (ret == LZMA_STREAM_END)
|
2014-07-04 04:42:22 +02:00
|
|
|
return 0;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
|
|
|
s.avail_out += *buffer_size;
|
|
|
|
|
2014-07-05 01:53:58 +02:00
|
|
|
if (!(greedy_realloc(buffer, buffer_size, *buffer_size * 2, 1)))
|
2014-07-04 04:42:22 +02:00
|
|
|
return -ENOMEM;
|
2011-12-21 19:00:10 +01:00
|
|
|
|
compress: fix gcc warnings about void* used in arithmetic
src/journal/compress.c: In function ‘compress_blob_lz4’:
src/journal/compress.c:115:49: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8);
^
src/journal/compress.c: In function ‘decompress_blob_xz’:
src/journal/compress.c:179:35: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *dst + used;
^
src/journal/compress.c: In function ‘decompress_blob_lz4’:
src/journal/compress.c:218:37: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe(src + 8, out, src_size - 8, size);
^
src/journal/compress.c: In function ‘decompress_startswith_xz’:
src/journal/compress.c:294:38: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c:294:53: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c: In function ‘decompress_startswith_lz4’:
src/journal/compress.c:327:45: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8,
^
LZ4 and XZ functions use char* and unsigned char*, respectively,
so keep void* in our internal APIs and add casts.
2016-04-03 00:51:16 +02:00
|
|
|
s.next_out = *(uint8_t**)buffer + *buffer_size - s.avail_out;
|
2014-07-05 01:53:58 +02:00
|
|
|
}
|
2014-07-04 04:42:22 +02:00
|
|
|
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
int decompress_startswith_lz4(const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **buffer, size_t *buffer_size,
|
|
|
|
const void *prefix, size_t prefix_len,
|
2014-07-04 04:42:22 +02:00
|
|
|
uint8_t extra) {
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
2014-07-04 04:42:22 +02:00
|
|
|
/* Checks whether the decompressed blob starts with the
|
|
|
|
* mentioned prefix. The byte extra needs to follow the
|
|
|
|
* prefix */
|
|
|
|
|
|
|
|
int r;
|
2015-12-11 15:10:33 +01:00
|
|
|
size_t size;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
|
|
|
assert(src);
|
|
|
|
assert(src_size > 0);
|
|
|
|
assert(buffer);
|
|
|
|
assert(buffer_size);
|
|
|
|
assert(prefix);
|
|
|
|
assert(*buffer_size == 0 || *buffer);
|
|
|
|
|
|
|
|
if (src_size <= 8)
|
|
|
|
return -EBADMSG;
|
|
|
|
|
|
|
|
if (!(greedy_realloc(buffer, buffer_size, ALIGN_8(prefix_len + 1), 1)))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
compress: fix gcc warnings about void* used in arithmetic
src/journal/compress.c: In function ‘compress_blob_lz4’:
src/journal/compress.c:115:49: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_compress_limitedOutput(src, dst + 8, src_size, (int) dst_alloc_size - 8);
^
src/journal/compress.c: In function ‘decompress_blob_xz’:
src/journal/compress.c:179:35: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *dst + used;
^
src/journal/compress.c: In function ‘decompress_blob_lz4’:
src/journal/compress.c:218:37: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe(src + 8, out, src_size - 8, size);
^
src/journal/compress.c: In function ‘decompress_startswith_xz’:
src/journal/compress.c:294:38: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c:294:53: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
s.next_out = *buffer + *buffer_size - s.avail_out;
^
src/journal/compress.c: In function ‘decompress_startswith_lz4’:
src/journal/compress.c:327:45: warning: pointer of type ‘void *’ used in arithmetic [-Wpointer-arith]
r = LZ4_decompress_safe_partial(src + 8, *buffer, src_size - 8,
^
LZ4 and XZ functions use char* and unsigned char*, respectively,
so keep void* in our internal APIs and add casts.
2016-04-03 00:51:16 +02:00
|
|
|
r = LZ4_decompress_safe_partial((char*)src + 8, *buffer, src_size - 8,
|
2014-07-04 04:42:22 +02:00
|
|
|
prefix_len + 1, *buffer_size);
|
2015-12-11 15:10:33 +01:00
|
|
|
if (r >= 0)
|
|
|
|
size = (unsigned) r;
|
|
|
|
else {
|
|
|
|
/* lz4 always tries to decode full "sequence", so in
|
|
|
|
* pathological cases might need to decompress the
|
|
|
|
* full field. */
|
|
|
|
r = decompress_blob_lz4(src, src_size, buffer, buffer_size, &size, 0);
|
|
|
|
if (r < 0)
|
|
|
|
return r;
|
|
|
|
}
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2015-12-11 15:10:33 +01:00
|
|
|
if (size >= prefix_len + 1)
|
2014-07-04 04:42:22 +02:00
|
|
|
return memcmp(*buffer, prefix, prefix_len) == 0 &&
|
|
|
|
((const uint8_t*) *buffer)[prefix_len] == extra;
|
|
|
|
else
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
2011-12-21 19:00:10 +01:00
|
|
|
}
|
2014-06-25 03:24:46 +02:00
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
int decompress_startswith(int compression,
|
|
|
|
const void *src, uint64_t src_size,
|
2014-08-04 04:50:00 +02:00
|
|
|
void **buffer, size_t *buffer_size,
|
|
|
|
const void *prefix, size_t prefix_len,
|
2014-07-04 04:42:22 +02:00
|
|
|
uint8_t extra) {
|
|
|
|
if (compression == OBJECT_COMPRESSED_XZ)
|
|
|
|
return decompress_startswith_xz(src, src_size,
|
|
|
|
buffer, buffer_size,
|
|
|
|
prefix, prefix_len,
|
|
|
|
extra);
|
|
|
|
else if (compression == OBJECT_COMPRESSED_LZ4)
|
|
|
|
return decompress_startswith_lz4(src, src_size,
|
|
|
|
buffer, buffer_size,
|
|
|
|
prefix, prefix_len,
|
|
|
|
extra);
|
|
|
|
else
|
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
int compress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
2014-06-25 03:24:46 +02:00
|
|
|
_cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
|
|
|
|
lzma_ret ret;
|
|
|
|
uint8_t buf[BUFSIZ], out[BUFSIZ];
|
|
|
|
lzma_action action = LZMA_RUN;
|
|
|
|
|
|
|
|
assert(fdf >= 0);
|
|
|
|
assert(fdt >= 0);
|
|
|
|
|
2014-07-04 04:42:22 +02:00
|
|
|
ret = lzma_easy_encoder(&s, LZMA_PRESET_DEFAULT, LZMA_CHECK_CRC64);
|
2014-06-25 03:24:46 +02:00
|
|
|
if (ret != LZMA_OK) {
|
2015-01-21 04:22:15 +01:00
|
|
|
log_error("Failed to initialize XZ encoder: code %u", ret);
|
2014-06-25 03:24:46 +02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
if (s.avail_in == 0 && action == LZMA_RUN) {
|
|
|
|
size_t m = sizeof(buf);
|
|
|
|
ssize_t n;
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
if (max_bytes != (uint64_t) -1 && (uint64_t) m > max_bytes)
|
|
|
|
m = (size_t) max_bytes;
|
2014-06-25 03:24:46 +02:00
|
|
|
|
|
|
|
n = read(fdf, buf, m);
|
|
|
|
if (n < 0)
|
|
|
|
return -errno;
|
|
|
|
if (n == 0)
|
|
|
|
action = LZMA_FINISH;
|
|
|
|
else {
|
|
|
|
s.next_in = buf;
|
|
|
|
s.avail_in = n;
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
if (max_bytes != (uint64_t) -1) {
|
|
|
|
assert(max_bytes >= (uint64_t) n);
|
2014-06-25 03:24:46 +02:00
|
|
|
max_bytes -= n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.avail_out == 0) {
|
|
|
|
s.next_out = out;
|
|
|
|
s.avail_out = sizeof(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = lzma_code(&s, action);
|
|
|
|
if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
|
2015-01-21 04:22:15 +01:00
|
|
|
log_error("Compression failed: code %u", ret);
|
2014-06-25 03:24:46 +02:00
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
|
|
|
|
ssize_t n, k;
|
|
|
|
|
|
|
|
n = sizeof(out) - s.avail_out;
|
|
|
|
|
|
|
|
k = loop_write(fdt, out, n, false);
|
|
|
|
if (k < 0)
|
|
|
|
return k;
|
|
|
|
|
|
|
|
if (ret == LZMA_STREAM_END) {
|
2014-08-04 04:50:00 +02:00
|
|
|
log_debug("XZ compression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
|
2014-06-25 03:24:46 +02:00
|
|
|
s.total_in, s.total_out,
|
|
|
|
(double) s.total_out / s.total_in * 100);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-07-11 16:42:06 +02:00
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
2014-06-25 03:24:46 +02:00
|
|
|
}
|
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
#define LZ4_BUFSIZE (512*1024u)
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
int compress_stream_lz4(int fdf, int fdt, uint64_t max_bytes) {
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
LZ4F_errorCode_t c;
|
|
|
|
_cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL;
|
|
|
|
_cleanup_free_ char *buf = NULL;
|
|
|
|
char *src = NULL;
|
2015-12-13 20:20:21 +01:00
|
|
|
size_t size, n, total_in = 0, total_out, offset = 0, frame_size;
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
struct stat st;
|
|
|
|
int r;
|
|
|
|
static const LZ4F_compressOptions_t options = {
|
|
|
|
.stableSrc = 1,
|
|
|
|
};
|
|
|
|
static const LZ4F_preferences_t preferences = {
|
|
|
|
.frameInfo.blockSizeID = 5,
|
|
|
|
};
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
|
|
|
|
if (LZ4F_isError(c))
|
|
|
|
return -ENOMEM;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
if (fstat(fdf, &st) < 0)
|
2015-10-14 16:42:18 +02:00
|
|
|
return log_debug_errno(errno, "fstat() failed: %m");
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences);
|
|
|
|
size = frame_size + 64*1024; /* add some space for header and trailer */
|
|
|
|
buf = malloc(size);
|
|
|
|
if (!buf)
|
|
|
|
return -ENOMEM;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2015-12-13 20:20:21 +01:00
|
|
|
n = offset = total_out = LZ4F_compressBegin(ctx, buf, size, &preferences);
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
if (LZ4F_isError(n))
|
|
|
|
return -EINVAL;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fdf, 0);
|
2015-10-14 16:15:27 +02:00
|
|
|
if (src == MAP_FAILED)
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
return -errno;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
log_debug("Buffer size is %zu bytes, header size %zu bytes.", size, n);
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
while (total_in < (size_t) st.st_size) {
|
|
|
|
ssize_t k;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
k = MIN(LZ4_BUFSIZE, st.st_size - total_in);
|
|
|
|
n = LZ4F_compressUpdate(ctx, buf + offset, size - offset,
|
|
|
|
src + total_in, k, &options);
|
|
|
|
if (LZ4F_isError(n)) {
|
|
|
|
r = -ENOTRECOVERABLE;
|
|
|
|
goto cleanup;
|
2014-07-04 04:42:22 +02:00
|
|
|
}
|
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
total_in += k;
|
|
|
|
offset += n;
|
|
|
|
total_out += n;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) {
|
2016-02-29 22:42:43 +01:00
|
|
|
log_debug("Compressed stream longer than %"PRIu64" bytes", max_bytes);
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
return -EFBIG;
|
|
|
|
}
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
if (size - offset < frame_size + 4) {
|
|
|
|
k = loop_write(fdt, buf, offset, false);
|
|
|
|
if (k < 0) {
|
|
|
|
r = k;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
offset = 0;
|
|
|
|
}
|
|
|
|
}
|
2014-07-04 04:42:22 +02:00
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
n = LZ4F_compressEnd(ctx, buf + offset, size - offset, &options);
|
|
|
|
if (LZ4F_isError(n)) {
|
|
|
|
r = -ENOTRECOVERABLE;
|
|
|
|
goto cleanup;
|
2014-07-04 04:42:22 +02:00
|
|
|
}
|
|
|
|
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
offset += n;
|
|
|
|
total_out += n;
|
|
|
|
r = loop_write(fdt, buf, offset, false);
|
|
|
|
if (r < 0)
|
|
|
|
goto cleanup;
|
2014-07-04 04:42:22 +02:00
|
|
|
|
|
|
|
log_debug("LZ4 compression finished (%zu -> %zu bytes, %.1f%%)",
|
|
|
|
total_in, total_out,
|
|
|
|
(double) total_out / total_in * 100);
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
cleanup:
|
|
|
|
munmap(src, st.st_size);
|
|
|
|
return r;
|
2014-07-04 04:42:22 +02:00
|
|
|
#else
|
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
int decompress_stream_xz(int fdf, int fdt, uint64_t max_bytes) {
|
2014-07-04 04:42:22 +02:00
|
|
|
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_XZ
|
2014-06-25 03:24:46 +02:00
|
|
|
_cleanup_(lzma_end) lzma_stream s = LZMA_STREAM_INIT;
|
|
|
|
lzma_ret ret;
|
|
|
|
|
|
|
|
uint8_t buf[BUFSIZ], out[BUFSIZ];
|
|
|
|
lzma_action action = LZMA_RUN;
|
|
|
|
|
|
|
|
assert(fdf >= 0);
|
|
|
|
assert(fdt >= 0);
|
|
|
|
|
|
|
|
ret = lzma_stream_decoder(&s, UINT64_MAX, 0);
|
|
|
|
if (ret != LZMA_OK) {
|
2015-10-14 16:42:18 +02:00
|
|
|
log_debug("Failed to initialize XZ decoder: code %u", ret);
|
2014-07-04 04:42:22 +02:00
|
|
|
return -ENOMEM;
|
2014-06-25 03:24:46 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (;;) {
|
|
|
|
if (s.avail_in == 0 && action == LZMA_RUN) {
|
|
|
|
ssize_t n;
|
|
|
|
|
|
|
|
n = read(fdf, buf, sizeof(buf));
|
|
|
|
if (n < 0)
|
|
|
|
return -errno;
|
|
|
|
if (n == 0)
|
|
|
|
action = LZMA_FINISH;
|
|
|
|
else {
|
|
|
|
s.next_in = buf;
|
|
|
|
s.avail_in = n;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.avail_out == 0) {
|
|
|
|
s.next_out = out;
|
|
|
|
s.avail_out = sizeof(out);
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = lzma_code(&s, action);
|
|
|
|
if (ret != LZMA_OK && ret != LZMA_STREAM_END) {
|
2015-10-14 16:42:18 +02:00
|
|
|
log_debug("Decompression failed: code %u", ret);
|
2014-06-25 03:24:46 +02:00
|
|
|
return -EBADMSG;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (s.avail_out == 0 || ret == LZMA_STREAM_END) {
|
|
|
|
ssize_t n, k;
|
|
|
|
|
|
|
|
n = sizeof(out) - s.avail_out;
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
if (max_bytes != (uint64_t) -1) {
|
|
|
|
if (max_bytes < (uint64_t) n)
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EFBIG;
|
2014-06-25 03:24:46 +02:00
|
|
|
|
|
|
|
max_bytes -= n;
|
|
|
|
}
|
|
|
|
|
|
|
|
k = loop_write(fdt, out, n, false);
|
|
|
|
if (k < 0)
|
|
|
|
return k;
|
|
|
|
|
|
|
|
if (ret == LZMA_STREAM_END) {
|
2014-08-04 04:50:00 +02:00
|
|
|
log_debug("XZ decompression finished (%"PRIu64" -> %"PRIu64" bytes, %.1f%%)",
|
2014-06-25 03:24:46 +02:00
|
|
|
s.total_in, s.total_out,
|
|
|
|
(double) s.total_out / s.total_in * 100);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-07-04 04:42:22 +02:00
|
|
|
#else
|
2015-10-14 16:42:18 +02:00
|
|
|
log_debug("Cannot decompress file. Compiled without XZ support.");
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-10-18 23:27:51 +02:00
|
|
|
int decompress_stream_lz4(int in, int out, uint64_t max_bytes) {
|
2017-10-03 10:41:51 +02:00
|
|
|
#if HAVE_LZ4
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
size_t c;
|
|
|
|
_cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL;
|
|
|
|
_cleanup_free_ char *buf = NULL;
|
|
|
|
char *src;
|
|
|
|
struct stat st;
|
|
|
|
int r = 0;
|
|
|
|
size_t total_in = 0, total_out = 0;
|
|
|
|
|
|
|
|
c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION);
|
|
|
|
if (LZ4F_isError(c))
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (fstat(in, &st) < 0)
|
2015-10-14 16:42:18 +02:00
|
|
|
return log_debug_errno(errno, "fstat() failed: %m");
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
|
|
|
|
buf = malloc(LZ4_BUFSIZE);
|
|
|
|
if (!buf)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0);
|
2015-10-14 16:15:27 +02:00
|
|
|
if (src == MAP_FAILED)
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
return -errno;
|
|
|
|
|
|
|
|
while (total_in < (size_t) st.st_size) {
|
|
|
|
size_t produced = LZ4_BUFSIZE;
|
|
|
|
size_t used = st.st_size - total_in;
|
|
|
|
|
|
|
|
c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL);
|
|
|
|
if (LZ4F_isError(c)) {
|
|
|
|
r = -EBADMSG;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
total_in += used;
|
|
|
|
total_out += produced;
|
|
|
|
|
|
|
|
if (max_bytes != (uint64_t) -1 && total_out > (size_t) max_bytes) {
|
2016-02-29 22:42:43 +01:00
|
|
|
log_debug("Decompressed stream longer than %"PRIu64" bytes", max_bytes);
|
2015-10-14 16:42:18 +02:00
|
|
|
r = -EFBIG;
|
coredump: use lz4frame api to compress coredumps
This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.
Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.
Old decompression code is retained and is used if the new code fails
indicating a format error. There have been reports of various smaller
distributions using previous lz4 code, i.e. the old format, and it is
nice to provide backwards compatibility. We can remove the legacy code
in a few versions.
The way that blobs are compressed in the journal is not affected.
2014-12-07 03:33:27 +01:00
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
r = loop_write(out, buf, produced, false);
|
|
|
|
if (r < 0)
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
|
|
|
|
log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)",
|
|
|
|
total_in, total_out,
|
|
|
|
(double) total_out / total_in * 100);
|
|
|
|
cleanup:
|
|
|
|
munmap(src, st.st_size);
|
|
|
|
return r;
|
2014-07-04 04:42:22 +02:00
|
|
|
#else
|
2015-10-14 16:42:18 +02:00
|
|
|
log_debug("Cannot decompress file. Compiled without LZ4 support.");
|
2014-07-04 04:42:22 +02:00
|
|
|
return -EPROTONOSUPPORT;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2015-09-10 18:16:18 +02:00
|
|
|
int decompress_stream(const char *filename, int fdf, int fdt, uint64_t max_bytes) {
|
2014-07-04 04:42:22 +02:00
|
|
|
|
|
|
|
if (endswith(filename, ".lz4"))
|
|
|
|
return decompress_stream_lz4(fdf, fdt, max_bytes);
|
|
|
|
else if (endswith(filename, ".xz"))
|
|
|
|
return decompress_stream_xz(fdf, fdt, max_bytes);
|
|
|
|
else
|
|
|
|
return -EPROTONOSUPPORT;
|
2014-06-25 03:24:46 +02:00
|
|
|
}
|