Merge pull request #15557 from poettering/journal-zero-fix

journal: deal better with reading from zeroed out journal mmaps
This commit is contained in:
Anita Zhang 2020-05-20 18:14:51 -07:00 committed by GitHub
commit b10ceb4783
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 193 additions and 100 deletions

View File

@ -585,4 +585,17 @@ static inline int __coverity_check_and_return__(int condition) {
DEFINE_PUBLIC_TRIVIAL_REF_FUNC(type, name); \
DEFINE_PUBLIC_TRIVIAL_UNREF_FUNC(type, name, free_func);
/* A macro to force copying of a variable from memory. This is useful whenever we want to read something from
* memory and want to make sure the compiler won't optimize away the destination variable for us. It's not
* supposed to be a full CPU memory barrier, i.e. CPU is still allowed to reorder the reads, but it is not
* allowed to remove our local copies of the variables. We want this to work for unaligned memory, hence
* memcpy() is great for our purposes. */
#define READ_NOW(x) \
({ \
typeof(x) _copy; \
memcpy(&_copy, &(x), sizeof(_copy)); \
asm volatile ("" : : : "memory"); \
_copy; \
})
#include "log.h"

View File

@ -12,7 +12,7 @@
size_t page_size(void) _pure_;
#define PAGE_ALIGN(l) ALIGN_TO((l), page_size())
#define PAGE_ALIGN_DOWN(l) (l & ~(page_size() - 1))
#define PAGE_ALIGN_DOWN(l) ((l) & ~(page_size() - 1))
/* Normal memcpy requires src to be nonnull. We do nothing if n is 0. */
static inline void memcpy_safe(void *dst, const void *src, size_t n) {

View File

@ -533,7 +533,7 @@ static int journal_file_verify_header(JournalFile *f) {
if (f->header->state >= _STATE_MAX)
return -EBADMSG;
header_size = le64toh(f->header->header_size);
header_size = le64toh(READ_NOW(f->header->header_size));
/* The first addition was n_data, so check that we are at least this large */
if (header_size < HEADER_SIZE_MIN)
@ -542,7 +542,7 @@ static int journal_file_verify_header(JournalFile *f) {
if (JOURNAL_HEADER_SEALED(f->header) && !JOURNAL_HEADER_CONTAINS(f->header, n_entry_arrays))
return -EBADMSG;
arena_size = le64toh(f->header->arena_size);
arena_size = le64toh(READ_NOW(f->header->arena_size));
if (UINT64_MAX - header_size < arena_size || header_size + arena_size > (uint64_t) f->last_stat.st_size)
return -ENODATA;
@ -625,26 +625,29 @@ int journal_file_fstat(JournalFile *f) {
}
static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size) {
uint64_t old_size, new_size;
uint64_t old_size, new_size, old_header_size, old_arena_size;
int r;
assert(f);
assert(f->header);
/* We assume that this file is not sparse, and we know that
* for sure, since we always call posix_fallocate()
* ourselves */
/* We assume that this file is not sparse, and we know that for sure, since we always call
* posix_fallocate() ourselves */
if (size > PAGE_ALIGN_DOWN(UINT64_MAX) - offset)
return -EINVAL;
if (mmap_cache_got_sigbus(f->mmap, f->cache_fd))
return -EIO;
old_size =
le64toh(f->header->header_size) +
le64toh(f->header->arena_size);
old_header_size = le64toh(READ_NOW(f->header->header_size));
old_arena_size = le64toh(READ_NOW(f->header->arena_size));
if (old_arena_size > PAGE_ALIGN_DOWN(UINT64_MAX) - old_header_size)
return -EBADMSG;
new_size = PAGE_ALIGN(offset + size);
if (new_size < le64toh(f->header->header_size))
new_size = le64toh(f->header->header_size);
old_size = old_header_size + old_arena_size;
new_size = MAX(PAGE_ALIGN(offset + size), old_header_size);
if (new_size <= old_size) {
@ -690,7 +693,7 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
if (r != 0)
return -r;
f->header->arena_size = htole64(new_size - le64toh(f->header->header_size));
f->header->arena_size = htole64(new_size - old_header_size);
return journal_file_fstat(f);
}
@ -702,7 +705,15 @@ static unsigned type_to_context(ObjectType type) {
return type > OBJECT_UNUSED && type < _OBJECT_TYPE_MAX ? type : 0;
}
static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_always, uint64_t offset, uint64_t size, void **ret, size_t *ret_size) {
static int journal_file_move_to(
JournalFile *f,
ObjectType type,
bool keep_always,
uint64_t offset,
uint64_t size,
void **ret,
size_t *ret_size) {
int r;
assert(f);
@ -711,6 +722,9 @@ static int journal_file_move_to(JournalFile *f, ObjectType type, bool keep_alway
if (size <= 0)
return -EINVAL;
if (size > UINT64_MAX - offset)
return -EBADMSG;
/* Avoid SIGBUS on invalid accesses */
if (offset + size > (uint64_t) f->last_stat.st_size) {
/* Hmm, out of range? Let's refresh the fstat() data
@ -760,7 +774,7 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
le64toh(o->data.n_entries),
offset);
if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
if (le64toh(o->object.size) <= offsetof(DataObject, payload))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad object size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(DataObject, payload),
@ -782,7 +796,7 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
break;
case OBJECT_FIELD:
if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
if (le64toh(o->object.size) <= offsetof(FieldObject, payload))
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad field size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(FieldObject, payload),
@ -798,18 +812,22 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
offset);
break;
case OBJECT_ENTRY:
if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
case OBJECT_ENTRY: {
uint64_t sz;
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(EntryObject, items) ||
(sz - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Bad entry size (<= %zu): %" PRIu64 ": %" PRIu64,
offsetof(EntryObject, items),
le64toh(o->object.size),
sz,
offset);
if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
if ((sz - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid number items in entry: %" PRIu64 ": %" PRIu64,
(le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem),
(sz - offsetof(EntryObject, items)) / sizeof(EntryItem),
offset);
if (le64toh(o->entry.seqnum) <= 0)
@ -831,25 +849,35 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
offset);
break;
}
case OBJECT_DATA_HASH_TABLE:
case OBJECT_FIELD_HASH_TABLE:
if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
(le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
case OBJECT_FIELD_HASH_TABLE: {
uint64_t sz;
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(HashTableObject, items) ||
(sz - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0 ||
(sz - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid %s hash table size: %" PRIu64 ": %" PRIu64,
o->object.type == OBJECT_DATA_HASH_TABLE ? "data" : "field",
le64toh(o->object.size),
sz,
offset);
break;
}
case OBJECT_ENTRY_ARRAY:
if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
(le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
case OBJECT_ENTRY_ARRAY: {
uint64_t sz;
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(EntryArrayObject, items) ||
(sz - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0 ||
(sz - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
"Invalid object entry array size: %" PRIu64 ": %" PRIu64,
le64toh(o->object.size),
sz,
offset);
if (!VALID64(le64toh(o->entry_array.next_entry_array_offset)))
@ -859,6 +887,7 @@ static int journal_file_check_object(JournalFile *f, uint64_t offset, Object *o)
offset);
break;
}
case OBJECT_TAG:
if (le64toh(o->object.size) != sizeof(TagObject))
@ -905,7 +934,7 @@ int journal_file_move_to_object(JournalFile *f, ObjectType type, uint64_t offset
return r;
o = (Object*) t;
s = le64toh(o->object.size);
s = le64toh(READ_NOW(o->object.size));
if (s == 0)
return log_debug_errno(SYNTHETIC_ERRNO(EBADMSG),
@ -995,11 +1024,21 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
if (p == 0)
p = le64toh(f->header->header_size);
else {
uint64_t sz;
r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &tail);
if (r < 0)
return r;
p += ALIGN64(le64toh(tail->object.size));
sz = le64toh(READ_NOW(tail->object.size));
if (sz > UINT64_MAX - sizeof(uint64_t) + 1)
return -EBADMSG;
sz = ALIGN64(sz);
if (p > UINT64_MAX - sz)
return -EBADMSG;
p += sz;
}
r = journal_file_allocate(f, p, size);
@ -1011,10 +1050,10 @@ int journal_file_append_object(JournalFile *f, ObjectType type, uint64_t size, O
return r;
o = (Object*) t;
zero(o->object);
o->object.type = type;
o->object.size = htole64(size);
o->object = (ObjectHeader) {
.type = type,
.size = htole64(size),
};
f->header->tail_object_offset = htole64(p);
f->header->n_objects = htole64(le64toh(f->header->n_objects) + 1);
@ -1156,7 +1195,7 @@ static int journal_file_link_field(
if (o->object.type != OBJECT_FIELD)
return -EINVAL;
m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
@ -1201,7 +1240,7 @@ static int journal_file_link_data(
if (o->object.type != OBJECT_DATA)
return -EINVAL;
m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
@ -1257,7 +1296,7 @@ int journal_file_find_field_object_with_hash(
osize = offsetof(Object, field.payload) + size;
m = le64toh(f->header->field_hash_table_size) / sizeof(HashItem);
m = le64toh(READ_NOW(f->header->field_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
@ -1329,7 +1368,7 @@ int journal_file_find_data_object_with_hash(
osize = offsetof(Object, data.payload) + size;
m = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
m = le64toh(READ_NOW(f->header->data_hash_table_size)) / sizeof(HashItem);
if (m <= 0)
return -EBADMSG;
@ -1351,7 +1390,7 @@ int journal_file_find_data_object_with_hash(
uint64_t l;
size_t rsize = 0;
l = le64toh(o->object.size);
l = le64toh(READ_NOW(o->object.size));
if (l <= offsetof(Object, data.payload))
return -EBADMSG;
@ -1576,30 +1615,47 @@ static int journal_file_append_data(
}
uint64_t journal_file_entry_n_items(Object *o) {
uint64_t sz;
assert(o);
if (o->object.type != OBJECT_ENTRY)
return 0;
return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(Object, entry.items))
return 0;
return (sz - offsetof(Object, entry.items)) / sizeof(EntryItem);
}
uint64_t journal_file_entry_array_n_items(Object *o) {
uint64_t sz;
assert(o);
if (o->object.type != OBJECT_ENTRY_ARRAY)
return 0;
return (le64toh(o->object.size) - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(Object, entry_array.items))
return 0;
return (sz - offsetof(Object, entry_array.items)) / sizeof(uint64_t);
}
uint64_t journal_file_hash_table_n_items(Object *o) {
uint64_t sz;
assert(o);
if (!IN_SET(o->object.type, OBJECT_DATA_HASH_TABLE, OBJECT_FIELD_HASH_TABLE))
return 0;
return (le64toh(o->object.size) - offsetof(Object, hash_table.items)) / sizeof(HashItem);
sz = le64toh(READ_NOW(o->object.size));
if (sz < offsetof(Object, hash_table.items))
return 0;
return (sz - offsetof(Object, hash_table.items)) / sizeof(HashItem);
}
static int link_entry_into_array(JournalFile *f,
@ -1617,7 +1673,7 @@ static int link_entry_into_array(JournalFile *f,
assert(p > 0);
a = le64toh(*first);
i = hidx = le64toh(*idx);
i = hidx = le64toh(READ_NOW(*idx));
while (a > 0) {
r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
@ -1682,6 +1738,7 @@ static int link_entry_into_array_plus_one(JournalFile *f,
le64_t *idx,
uint64_t p) {
uint64_t hidx;
int r;
assert(f);
@ -1690,32 +1747,33 @@ static int link_entry_into_array_plus_one(JournalFile *f,
assert(idx);
assert(p > 0);
if (*idx == 0)
hidx = le64toh(READ_NOW(*idx));
if (hidx == UINT64_MAX)
return -EBADMSG;
if (hidx == 0)
*extra = htole64(p);
else {
le64_t i;
i = htole64(le64toh(*idx) - 1);
i = htole64(hidx - 1);
r = link_entry_into_array(f, first, &i, p);
if (r < 0)
return r;
}
*idx = htole64(le64toh(*idx) + 1);
*idx = htole64(hidx + 1);
return 0;
}
static int journal_file_link_entry_item(JournalFile *f, Object *o, uint64_t offset, uint64_t i) {
uint64_t p;
int r;
assert(f);
assert(o);
assert(offset > 0);
p = le64toh(o->entry.items[i].object_offset);
if (p == 0)
return -EINVAL;
r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
if (r < 0)
return r;
@ -2435,6 +2493,7 @@ _pure_ static int test_object_offset(JournalFile *f, uint64_t p, uint64_t needle
}
static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
uint64_t sq;
Object *o;
int r;
@ -2445,9 +2504,10 @@ static int test_object_seqnum(JournalFile *f, uint64_t p, uint64_t needle) {
if (r < 0)
return r;
if (le64toh(o->entry.seqnum) == needle)
sq = le64toh(READ_NOW(o->entry.seqnum));
if (sq == needle)
return TEST_FOUND;
else if (le64toh(o->entry.seqnum) < needle)
else if (sq < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
@ -2473,6 +2533,7 @@ int journal_file_move_to_entry_by_seqnum(
static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
uint64_t rt;
int r;
assert(f);
@ -2482,9 +2543,10 @@ static int test_object_realtime(JournalFile *f, uint64_t p, uint64_t needle) {
if (r < 0)
return r;
if (le64toh(o->entry.realtime) == needle)
rt = le64toh(READ_NOW(o->entry.realtime));
if (rt == needle)
return TEST_FOUND;
else if (le64toh(o->entry.realtime) < needle)
else if (rt < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
@ -2510,6 +2572,7 @@ int journal_file_move_to_entry_by_realtime(
static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
Object *o;
uint64_t m;
int r;
assert(f);
@ -2519,9 +2582,10 @@ static int test_object_monotonic(JournalFile *f, uint64_t p, uint64_t needle) {
if (r < 0)
return r;
if (le64toh(o->entry.monotonic) == needle)
m = le64toh(READ_NOW(o->entry.monotonic));
if (m == needle)
return TEST_FOUND;
else if (le64toh(o->entry.monotonic) < needle)
else if (m < needle)
return TEST_LEFT;
else
return TEST_RIGHT;
@ -2679,7 +2743,7 @@ int journal_file_next_entry(
assert(f);
assert(f->header);
n = le64toh(f->header->n_entries);
n = le64toh(READ_NOW(f->header->n_entries));
if (n <= 0)
return 0;
@ -2752,7 +2816,7 @@ int journal_file_next_entry_for_data(
if (r < 0)
return r;
n = le64toh(d->data.n_entries);
n = le64toh(READ_NOW(d->data.n_entries));
if (n <= 0)
return n;
@ -2981,7 +3045,7 @@ void journal_file_dump(JournalFile *f) {
journal_file_print_header(f);
p = le64toh(f->header->header_size);
p = le64toh(READ_NOW(f->header->header_size));
while (p != 0) {
r = journal_file_move_to_object(f, OBJECT_UNUSED, p, &o);
if (r < 0)
@ -3038,7 +3102,7 @@ void journal_file_dump(JournalFile *f) {
if (p == le64toh(f->header->tail_object_offset))
p = 0;
else
p = p + ALIGN64(le64toh(o->object.size));
p += ALIGN64(le64toh(o->object.size));
}
return;
@ -3659,7 +3723,11 @@ int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint6
if (le_hash != o->data.hash)
return -EBADMSG;
l = le64toh(o->object.size) - offsetof(Object, data.payload);
l = le64toh(READ_NOW(o->object.size));
if (l < offsetof(Object, data.payload))
return -EBADMSG;
l -= offsetof(Object, data.payload);
t = (size_t) l;
/* We hit the limit on 32bit machines */

View File

@ -41,10 +41,10 @@ struct Match {
struct Location {
LocationType type;
bool seqnum_set;
bool realtime_set;
bool monotonic_set;
bool xor_hash_set;
bool seqnum_set:1;
bool realtime_set:1;
bool monotonic_set:1;
bool xor_hash_set:1;
uint64_t seqnum;
sd_id128_t seqnum_id;

View File

@ -115,28 +115,24 @@ static void detach_location(sd_journal *j) {
journal_file_reset_location(f);
}
static void reset_location(sd_journal *j) {
assert(j);
detach_location(j);
zero(j->current_location);
}
static void init_location(Location *l, LocationType type, JournalFile *f, Object *o) {
assert(l);
assert(IN_SET(type, LOCATION_DISCRETE, LOCATION_SEEK));
assert(f);
assert(o->object.type == OBJECT_ENTRY);
l->type = type;
l->seqnum = le64toh(o->entry.seqnum);
l->seqnum_id = f->header->seqnum_id;
l->realtime = le64toh(o->entry.realtime);
l->monotonic = le64toh(o->entry.monotonic);
l->boot_id = o->entry.boot_id;
l->xor_hash = le64toh(o->entry.xor_hash);
l->seqnum_set = l->realtime_set = l->monotonic_set = l->xor_hash_set = true;
*l = (Location) {
.type = type,
.seqnum = le64toh(o->entry.seqnum),
.seqnum_id = f->header->seqnum_id,
.realtime = le64toh(o->entry.realtime),
.monotonic = le64toh(o->entry.monotonic),
.boot_id = o->entry.boot_id,
.xor_hash = le64toh(o->entry.xor_hash),
.seqnum_set = true,
.realtime_set = true,
.monotonic_set = true,
.xor_hash_set = true,
};
}
static void set_location(sd_journal *j, JournalFile *f, Object *o) {
@ -1014,9 +1010,10 @@ _public_ int sd_journal_seek_cursor(sd_journal *j, const char *cursor) {
!realtime_set)
return -EINVAL;
reset_location(j);
j->current_location.type = LOCATION_SEEK;
detach_location(j);
j->current_location = (Location) {
.type = LOCATION_SEEK,
};
if (realtime_set) {
j->current_location.realtime = (uint64_t) realtime;
@ -1129,11 +1126,14 @@ _public_ int sd_journal_seek_monotonic_usec(sd_journal *j, sd_id128_t boot_id, u
assert_return(j, -EINVAL);
assert_return(!journal_pid_changed(j), -ECHILD);
reset_location(j);
j->current_location.type = LOCATION_SEEK;
j->current_location.boot_id = boot_id;
j->current_location.monotonic = usec;
j->current_location.monotonic_set = true;
detach_location(j);
j->current_location = (Location) {
.type = LOCATION_SEEK,
.boot_id = boot_id,
.monotonic = usec,
.monotonic_set = true,
};
return 0;
}
@ -1142,10 +1142,13 @@ _public_ int sd_journal_seek_realtime_usec(sd_journal *j, uint64_t usec) {
assert_return(j, -EINVAL);
assert_return(!journal_pid_changed(j), -ECHILD);
reset_location(j);
j->current_location.type = LOCATION_SEEK;
j->current_location.realtime = usec;
j->current_location.realtime_set = true;
detach_location(j);
j->current_location = (Location) {
.type = LOCATION_SEEK,
.realtime = usec,
.realtime_set = true,
};
return 0;
}
@ -1154,8 +1157,11 @@ _public_ int sd_journal_seek_head(sd_journal *j) {
assert_return(j, -EINVAL);
assert_return(!journal_pid_changed(j), -ECHILD);
reset_location(j);
j->current_location.type = LOCATION_HEAD;
detach_location(j);
j->current_location = (Location) {
.type = LOCATION_HEAD,
};
return 0;
}
@ -1164,8 +1170,11 @@ _public_ int sd_journal_seek_tail(sd_journal *j) {
assert_return(j, -EINVAL);
assert_return(!journal_pid_changed(j), -ECHILD);
reset_location(j);
j->current_location.type = LOCATION_TAIL;
detach_location(j);
j->current_location = (Location) {
.type = LOCATION_TAIL,
};
return 0;
}
@ -2357,7 +2366,10 @@ static int return_data(sd_journal *j, JournalFile *f, Object *o, const void **da
uint64_t l;
int compression;
l = le64toh(o->object.size) - offsetof(Object, data.payload);
l = le64toh(READ_NOW(o->object.size));
if (l < offsetof(Object, data.payload))
return -EBADMSG;
l -= offsetof(Object, data.payload);
t = (size_t) l;
/* We can't read objects larger than 4G on a 32bit machine */