2017-11-18 17:09:20 +01:00
/* SPDX-License-Identifier: LGPL-2.1+ */
2011-10-08 02:20:44 +02:00
/***
This file is part of systemd .
Copyright 2011 Lennart Poettering
systemd is free software ; you can redistribute it and / or modify it
2012-04-12 00:20:58 +02:00
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation ; either version 2.1 of the License , or
2011-10-08 02:20:44 +02:00
( at your option ) any later version .
systemd is distributed in the hope that it will be useful , but
WITHOUT ANY WARRANTY ; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the GNU
2012-04-12 00:20:58 +02:00
Lesser General Public License for more details .
2011-10-08 02:20:44 +02:00
2012-04-12 00:20:58 +02:00
You should have received a copy of the GNU Lesser General Public License
2011-10-08 02:20:44 +02:00
along with systemd ; If not , see < http : //www.gnu.org/licenses/>.
* * */
# include <errno.h>
# include <fcntl.h>
2015-01-08 01:22:29 +01:00
# include <linux/fs.h>
2016-02-12 13:59:57 +01:00
# include <pthread.h>
2015-10-24 22:58:24 +02:00
# include <stddef.h>
# include <sys/mman.h>
# include <sys/statvfs.h>
# include <sys/uio.h>
# include <unistd.h>
2012-10-16 22:58:07 +02:00
2015-10-27 03:01:06 +01:00
# include "alloc-util.h"
2015-01-06 19:51:03 +01:00
# include "btrfs-util.h"
2015-10-26 20:39:23 +01:00
# include "chattr-util.h"
2015-10-24 22:58:24 +02:00
# include "compress.h"
2015-10-25 13:14:12 +01:00
# include "fd-util.h"
2012-08-16 01:51:54 +02:00
# include "journal-authenticate.h"
2011-10-08 02:20:44 +02:00
# include "journal-def.h"
# include "journal-file.h"
# include "lookup3.h"
2015-10-26 16:18:16 +01:00
# include "parse-util.h"
2016-04-25 00:31:24 +02:00
# include "path-util.h"
2015-04-10 22:27:10 +02:00
# include "random-util.h"
2015-12-11 07:42:22 +01:00
# include "sd-event.h"
2016-02-18 02:37:10 +01:00
# include "set.h"
2018-02-19 18:01:05 +01:00
# include "stat-util.h"
2015-10-24 22:58:24 +02:00
# include "string-util.h"
2017-01-25 01:19:33 +01:00
# include "strv.h"
2015-10-26 20:26:23 +01:00
# include "xattr-util.h"
2011-10-08 02:20:44 +02:00
2012-07-17 00:36:15 +02:00
# define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
# define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
2011-10-08 02:20:44 +02:00
2012-01-31 20:35:07 +01:00
# define COMPRESSION_SIZE_THRESHOLD (512ULL)
2011-12-21 02:40:59 +01:00
2011-12-31 02:31:54 +01:00
/* This is the minimum journal file size */
2015-11-06 11:32:34 +01:00
# define JOURNAL_FILE_SIZE_MIN (512ULL*1024ULL) /* 512 KiB */
2011-12-31 02:31:54 +01:00
/* These are the lower and upper bounds if we deduce the max_use value
* from the file system size */
# define DEFAULT_MAX_USE_LOWER (1ULL*1024ULL*1024ULL) /* 1 MiB */
# define DEFAULT_MAX_USE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
2015-10-02 23:21:59 +02:00
/* This is the default minimal use limit, how much we'll use even if keep_free suggests otherwise. */
# define DEFAULT_MIN_USE (1ULL*1024ULL*1024ULL) /* 1 MiB */
2011-12-31 02:31:54 +01:00
/* This is the upper bound if we deduce max_size from max_use */
2012-03-15 02:57:39 +01:00
# define DEFAULT_MAX_SIZE_UPPER (128ULL*1024ULL*1024ULL) /* 128 MiB */
2011-12-31 02:31:54 +01:00
/* This is the upper bound if we deduce the keep_free value from the
* file system size */
# define DEFAULT_KEEP_FREE_UPPER (4ULL*1024ULL*1024ULL*1024ULL) /* 4 GiB */
/* This is the keep_free value when we can't determine the system
* size */
# define DEFAULT_KEEP_FREE (1024ULL*1024ULL) /* 1 MB */
2015-10-02 23:21:59 +02:00
/* This is the default maximum number of journal files to keep around. */
# define DEFAULT_N_MAX_FILES (100)
2012-07-16 22:24:02 +02:00
/* n_data was the first entry we added after the initial file format design */
# define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
2011-10-08 02:20:44 +02:00
2012-10-26 03:24:03 +02:00
/* How many entries to keep in the entry array chain cache at max */
# define CHAIN_CACHE_MAX 20
2013-11-26 18:39:42 +01:00
/* How much to increase the journal file size at once each time we allocate something new. */
# define FILE_SIZE_INCREASE (8ULL*1024ULL*1024ULL) /* 8MB */
2015-01-05 02:09:01 +01:00
/* Reread fstat() of the file for detecting deletions at least this often */
# define LAST_STAT_REFRESH_USEC (5*USEC_PER_SEC)
2014-12-30 20:57:53 +01:00
/* The mmap context to use for the header we pick as one above the last defined typed */
# define CONTEXT_HEADER _OBJECT_TYPE_MAX
2017-11-01 22:54:39 +01:00
# ifdef __clang__
# pragma GCC diagnostic ignored "-Waddress-of-packed-member"
# endif
2016-02-12 13:59:57 +01:00
/* This may be called from a separate thread to prevent blocking the caller for the duration of fsync().
* As a result we use atomic operations on f - > offline_state for inter - thread communications with
* journal_file_set_offline ( ) and journal_file_set_online ( ) . */
static void journal_file_set_offline_internal ( JournalFile * f ) {
2013-03-25 17:49:03 +01:00
assert ( f ) ;
2016-02-12 13:59:57 +01:00
assert ( f - > fd > = 0 ) ;
assert ( f - > header ) ;
for ( ; ; ) {
switch ( f - > offline_state ) {
case OFFLINE_CANCEL :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_CANCEL , OFFLINE_DONE ) )
continue ;
return ;
case OFFLINE_AGAIN_FROM_SYNCING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_AGAIN_FROM_SYNCING , OFFLINE_SYNCING ) )
continue ;
break ;
case OFFLINE_AGAIN_FROM_OFFLINING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_AGAIN_FROM_OFFLINING , OFFLINE_SYNCING ) )
continue ;
break ;
case OFFLINE_SYNCING :
( void ) fsync ( f - > fd ) ;
2013-03-25 17:49:03 +01:00
2016-02-12 13:59:57 +01:00
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_SYNCING , OFFLINE_OFFLINING ) )
continue ;
2013-03-25 17:49:03 +01:00
2016-04-27 08:29:43 +02:00
f - > header - > state = f - > archive ? STATE_ARCHIVED : STATE_OFFLINE ;
2016-02-12 13:59:57 +01:00
( void ) fsync ( f - > fd ) ;
break ;
case OFFLINE_OFFLINING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_OFFLINING , OFFLINE_DONE ) )
continue ;
2017-11-19 19:06:10 +01:00
_fallthrough_ ;
2016-02-12 13:59:57 +01:00
case OFFLINE_DONE :
return ;
case OFFLINE_JOINED :
log_debug ( " OFFLINE_JOINED unexpected offline state for journal_file_set_offline_internal() " ) ;
return ;
}
}
}
static void * journal_file_set_offline_thread ( void * arg ) {
JournalFile * f = arg ;
2017-12-22 13:31:55 +01:00
( void ) pthread_setname_np ( pthread_self ( ) , " journal-offline " ) ;
2016-02-12 13:59:57 +01:00
journal_file_set_offline_internal ( f ) ;
return NULL ;
}
static int journal_file_set_offline_thread_join ( JournalFile * f ) {
int r ;
assert ( f ) ;
if ( f - > offline_state = = OFFLINE_JOINED )
return 0 ;
r = pthread_join ( f - > offline_thread , NULL ) ;
if ( r )
return - r ;
f - > offline_state = OFFLINE_JOINED ;
2013-03-25 17:49:03 +01:00
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) )
2014-12-30 20:57:53 +01:00
return - EIO ;
2016-02-12 13:59:57 +01:00
return 0 ;
}
2013-03-25 17:49:03 +01:00
2016-02-12 13:59:57 +01:00
/* Trigger a restart if the offline thread is mid-flight in a restartable state. */
static bool journal_file_set_offline_try_restart ( JournalFile * f ) {
for ( ; ; ) {
switch ( f - > offline_state ) {
case OFFLINE_AGAIN_FROM_SYNCING :
case OFFLINE_AGAIN_FROM_OFFLINING :
return true ;
case OFFLINE_CANCEL :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_CANCEL , OFFLINE_AGAIN_FROM_SYNCING ) )
continue ;
return true ;
case OFFLINE_SYNCING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_SYNCING , OFFLINE_AGAIN_FROM_SYNCING ) )
continue ;
return true ;
case OFFLINE_OFFLINING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_OFFLINING , OFFLINE_AGAIN_FROM_OFFLINING ) )
continue ;
return true ;
2013-03-25 17:49:03 +01:00
default :
2016-02-12 13:59:57 +01:00
return false ;
}
2013-03-25 17:49:03 +01:00
}
}
2016-02-12 13:59:57 +01:00
/* Sets a journal offline.
*
* If wait is false then an offline is dispatched in a separate thread for a
* subsequent journal_file_set_offline ( ) or journal_file_set_online ( ) of the
* same journal to synchronize with .
*
* If wait is true , then either an existing offline thread will be restarted
* and joined , or if none exists the offline is simply performed in this
* context without involving another thread .
*/
int journal_file_set_offline ( JournalFile * f , bool wait ) {
bool restarted ;
int r ;
2013-03-25 17:49:03 +01:00
assert ( f ) ;
if ( ! f - > writable )
return - EPERM ;
if ( ! ( f - > fd > = 0 & & f - > header ) )
return - EINVAL ;
journal: fix already offline check and thread leak (#2810)
Early in journal_file_set_offline() f->header->state is tested to see if
it's != STATE_ONLINE, and since there's no need to do anything if the
journal isn't online, the function simply returned here.
Since moving part of the offlining process to a separate thread, there
are two problems here:
1. We can't simply check f->header->state, because if there is an
offline thread active it may modify f->header->state.
2. Even if the journal is deemed offline, the thread responsible may
still need joining, so a bare return may leak the thread's resources
like its stack.
To address #1, the helper journal_file_is_offlining() is called prior to
accessing f->header->state.
If journal_file_is_offlining() returns true, f->header->state isn't even
checked, because an offlining journal is obviously online, and we'll
just continue with the normal set offline code path.
If journal_file_is_offlining() returns false, then it's safe to check
f->header->state, because the offline_state is beyond the point of
modifying f->header->state, and there's a memory barrier in the helper.
If we find f->header->state is != STATE_ONLINE, then we call the
idempotent journal_file_set_offline_thread_join() on the way out of the
function, to join a potential lingering offline thread.
2016-04-25 19:58:16 +02:00
/* An offlining journal is implicitly online and may modify f->header->state,
* we must also join any potentially lingering offline thread when not online . */
if ( ! journal_file_is_offlining ( f ) & & f - > header - > state ! = STATE_ONLINE )
return journal_file_set_offline_thread_join ( f ) ;
2013-03-25 17:49:03 +01:00
2016-02-12 13:59:57 +01:00
/* Restart an in-flight offline thread and wait if needed, or join a lingering done one. */
restarted = journal_file_set_offline_try_restart ( f ) ;
if ( ( restarted & & wait ) | | ! restarted ) {
r = journal_file_set_offline_thread_join ( f ) ;
if ( r < 0 )
return r ;
}
2013-03-25 17:49:03 +01:00
2016-02-12 13:59:57 +01:00
if ( restarted )
return 0 ;
/* Initiate a new offline. */
f - > offline_state = OFFLINE_SYNCING ;
2014-12-30 20:57:53 +01:00
2016-02-12 13:59:57 +01:00
if ( wait ) /* Without using a thread if waiting. */
journal_file_set_offline_internal ( f ) ;
else {
2017-12-29 21:21:54 +01:00
sigset_t ss , saved_ss ;
int k ;
if ( sigfillset ( & ss ) < 0 )
return - errno ;
r = pthread_sigmask ( SIG_BLOCK , & ss , & saved_ss ) ;
if ( r > 0 )
return - r ;
2016-02-12 13:59:57 +01:00
r = pthread_create ( & f - > offline_thread , NULL , journal_file_set_offline_thread , f ) ;
2017-12-29 21:21:54 +01:00
k = pthread_sigmask ( SIG_SETMASK , & saved_ss , NULL ) ;
2016-02-23 05:00:13 +01:00
if ( r > 0 ) {
f - > offline_state = OFFLINE_JOINED ;
2016-02-12 13:59:57 +01:00
return - r ;
2016-02-23 05:00:13 +01:00
}
2017-12-29 21:21:54 +01:00
if ( k > 0 )
return - k ;
2016-02-12 13:59:57 +01:00
}
return 0 ;
}
static int journal_file_set_online ( JournalFile * f ) {
bool joined = false ;
assert ( f ) ;
if ( ! f - > writable )
return - EPERM ;
if ( ! ( f - > fd > = 0 & & f - > header ) )
return - EINVAL ;
while ( ! joined ) {
switch ( f - > offline_state ) {
case OFFLINE_JOINED :
/* No offline thread, no need to wait. */
joined = true ;
break ;
case OFFLINE_SYNCING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_SYNCING , OFFLINE_CANCEL ) )
continue ;
/* Canceled syncing prior to offlining, no need to wait. */
break ;
case OFFLINE_AGAIN_FROM_SYNCING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_AGAIN_FROM_SYNCING , OFFLINE_CANCEL ) )
continue ;
/* Canceled restart from syncing, no need to wait. */
break ;
case OFFLINE_AGAIN_FROM_OFFLINING :
if ( ! __sync_bool_compare_and_swap ( & f - > offline_state , OFFLINE_AGAIN_FROM_OFFLINING , OFFLINE_CANCEL ) )
continue ;
/* Canceled restart from offlining, must wait for offlining to complete however. */
2017-11-19 19:06:10 +01:00
_fallthrough_ ;
2016-02-12 13:59:57 +01:00
default : {
int r ;
r = journal_file_set_offline_thread_join ( f ) ;
if ( r < 0 )
return r ;
joined = true ;
break ;
}
}
}
2013-03-25 17:49:03 +01:00
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) )
2014-12-30 20:57:53 +01:00
return - EIO ;
2016-02-12 13:59:57 +01:00
switch ( f - > header - > state ) {
case STATE_ONLINE :
return 0 ;
2013-03-25 17:49:03 +01:00
2016-02-12 13:59:57 +01:00
case STATE_OFFLINE :
f - > header - > state = STATE_ONLINE ;
( void ) fsync ( f - > fd ) ;
return 0 ;
default :
return - EINVAL ;
}
2013-03-25 17:49:03 +01:00
}
2016-02-18 02:37:10 +01:00
bool journal_file_is_offlining ( JournalFile * f ) {
assert ( f ) ;
__sync_synchronize ( ) ;
2017-09-29 00:37:23 +02:00
if ( IN_SET ( f - > offline_state , OFFLINE_DONE , OFFLINE_JOINED ) )
2016-02-18 02:37:10 +01:00
return false ;
return true ;
}
2015-10-02 22:36:33 +02:00
JournalFile * journal_file_close ( JournalFile * f ) {
2011-11-08 18:20:03 +01:00
assert ( f ) ;
2011-10-08 02:20:44 +02:00
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-13 20:57:38 +02:00
/* Write the final tag */
2016-09-23 12:12:13 +02:00
if ( f - > seal & & f - > writable ) {
int r ;
r = journal_file_append_tag ( f ) ;
if ( r < 0 )
log_error_errno ( r , " Failed to append tag when closing journal: %m " ) ;
}
2012-08-20 16:51:46 +02:00
# endif
2012-08-13 20:57:38 +02:00
2015-12-11 07:42:22 +01:00
if ( f - > post_change_timer ) {
int enabled ;
if ( sd_event_source_get_enabled ( f - > post_change_timer , & enabled ) > = 0 )
if ( enabled = = SD_EVENT_ONESHOT )
journal_file_post_change ( f ) ;
2016-01-26 14:06:35 +01:00
( void ) sd_event_source_set_enabled ( f - > post_change_timer , SD_EVENT_OFF ) ;
2015-12-11 07:42:22 +01:00
sd_event_source_unref ( f - > post_change_timer ) ;
}
2016-02-12 13:59:57 +01:00
journal_file_set_offline ( f , true ) ;
2011-10-08 02:20:44 +02:00
2017-07-11 01:24:56 +02:00
if ( f - > mmap & & f - > cache_fd )
mmap_cache_free_fd ( f - > mmap , f - > cache_fd ) ;
2011-10-08 02:20:44 +02:00
2015-01-08 01:22:29 +01:00
if ( f - > fd > = 0 & & f - > defrag_on_close ) {
/* Be friendly to btrfs: turn COW back on again now,
* and defragment the file . We won ' t write to the file
* ever again , hence remove all fragmentation , and
* reenable all the good bits COW usually provides
* ( such as data checksumming ) . */
2015-04-08 20:47:35 +02:00
( void ) chattr_fd ( f - > fd , 0 , FS_NOCOW_FL ) ;
2015-01-08 01:22:29 +01:00
( void ) btrfs_defrag_fd ( f - > fd ) ;
}
2015-01-06 19:51:03 +01:00
2016-04-25 00:31:24 +02:00
if ( f - > close_fd )
safe_close ( f - > fd ) ;
2011-10-08 02:20:44 +02:00
free ( f - > path ) ;
2011-12-21 02:40:59 +01:00
2015-12-10 11:25:14 +01:00
mmap_cache_unref ( f - > mmap ) ;
2012-08-14 22:02:24 +02:00
2014-10-14 17:58:13 +02:00
ordered_hashmap_free_free ( f - > chain_cache ) ;
2012-10-26 03:24:03 +02:00
2017-10-03 10:41:51 +02:00
# if HAVE_XZ || HAVE_LZ4
2011-12-21 02:40:59 +01:00
free ( f - > compress_buffer ) ;
# endif
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-17 00:45:18 +02:00
if ( f - > fss_file )
munmap ( f - > fss_file , PAGE_ALIGN ( f - > fss_file_size ) ) ;
2015-08-17 09:30:49 +02:00
else
2012-08-16 20:51:43 +02:00
free ( f - > fsprg_state ) ;
free ( f - > fsprg_seed ) ;
2012-08-13 20:31:10 +02:00
if ( f - > hmac )
gcry_md_close ( f - > hmac ) ;
# endif
2016-10-17 00:28:30 +02:00
return mfree ( f ) ;
2011-10-08 02:20:44 +02:00
}
2011-10-13 05:19:35 +02:00
static int journal_file_init_header ( JournalFile * f , JournalFile * template ) {
2014-07-04 04:42:22 +02:00
Header h = { } ;
2011-10-08 02:20:44 +02:00
ssize_t k ;
int r ;
assert ( f ) ;
2012-08-13 20:31:10 +02:00
memcpy ( h . signature , HEADER_SIGNATURE , 8 ) ;
2012-06-05 20:32:55 +02:00
h . header_size = htole64 ( ALIGN64 ( sizeof ( h ) ) ) ;
2011-10-08 02:20:44 +02:00
2014-07-04 04:42:22 +02:00
h . incompatible_flags | = htole32 (
f - > compress_xz * HEADER_INCOMPATIBLE_COMPRESSED_XZ |
f - > compress_lz4 * HEADER_INCOMPATIBLE_COMPRESSED_LZ4 ) ;
2012-08-13 20:31:10 +02:00
2014-07-04 04:42:22 +02:00
h . compatible_flags = htole32 (
f - > seal * HEADER_COMPATIBLE_SEALED ) ;
2012-08-13 20:31:10 +02:00
2011-10-08 02:20:44 +02:00
r = sd_id128_randomize ( & h . file_id ) ;
if ( r < 0 )
return r ;
2011-10-13 05:19:35 +02:00
if ( template ) {
h . seqnum_id = template - > header - > seqnum_id ;
2012-08-15 01:54:09 +02:00
h . tail_entry_seqnum = template - > header - > tail_entry_seqnum ;
2011-10-13 05:19:35 +02:00
} else
h . seqnum_id = h . file_id ;
2011-10-08 02:20:44 +02:00
k = pwrite ( f - > fd , & h , sizeof ( h ) , 0 ) ;
if ( k < 0 )
return - errno ;
if ( k ! = sizeof ( h ) )
return - EIO ;
return 0 ;
}
2016-04-26 15:47:55 +02:00
static int fsync_directory_of_file ( int fd ) {
_cleanup_free_ char * path = NULL , * dn = NULL ;
_cleanup_close_ int dfd = - 1 ;
struct stat st ;
int r ;
if ( fstat ( fd , & st ) < 0 )
return - errno ;
if ( ! S_ISREG ( st . st_mode ) )
return - EBADFD ;
r = fd_get_path ( fd , & path ) ;
if ( r < 0 )
return r ;
if ( ! path_is_absolute ( path ) )
return - EINVAL ;
dn = dirname_malloc ( path ) ;
if ( ! dn )
return - ENOMEM ;
dfd = open ( dn , O_RDONLY | O_CLOEXEC | O_DIRECTORY ) ;
if ( dfd < 0 )
return - errno ;
if ( fsync ( dfd ) < 0 )
return - errno ;
return 0 ;
}
2011-10-08 02:20:44 +02:00
static int journal_file_refresh_header ( JournalFile * f ) {
2011-11-08 18:20:03 +01:00
sd_id128_t boot_id ;
2014-12-30 20:57:53 +01:00
int r ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
r = sd_id128_get_machine ( & f - > header - > machine_id ) ;
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
r = sd_id128_get_boot ( & boot_id ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
if ( sd_id128_equal ( boot_id , f - > header - > boot_id ) )
f - > tail_entry_monotonic_valid = true ;
f - > header - > boot_id = boot_id ;
2014-12-30 20:57:53 +01:00
r = journal_file_set_online ( f ) ;
2012-01-04 02:15:45 +01:00
2012-08-13 20:31:10 +02:00
/* Sync the online state to disk */
2016-02-20 01:36:27 +01:00
( void ) fsync ( f - > fd ) ;
2012-01-04 02:15:45 +01:00
2016-04-26 15:47:55 +02:00
/* We likely just created a new file, also sync the directory this file is located in. */
( void ) fsync_directory_of_file ( f - > fd ) ;
2014-12-30 20:57:53 +01:00
return r ;
2011-10-08 02:20:44 +02:00
}
2017-01-25 01:00:23 +01:00
static bool warn_wrong_flags ( const JournalFile * f , bool compatible ) {
const uint32_t any = compatible ? HEADER_COMPATIBLE_ANY : HEADER_INCOMPATIBLE_ANY ,
supported = compatible ? HEADER_COMPATIBLE_SUPPORTED : HEADER_INCOMPATIBLE_SUPPORTED ;
const char * type = compatible ? " compatible " : " incompatible " ;
2014-07-04 04:42:22 +02:00
uint32_t flags ;
2017-01-25 01:00:23 +01:00
flags = le32toh ( compatible ? f - > header - > compatible_flags : f - > header - > incompatible_flags ) ;
if ( flags & ~ supported ) {
if ( flags & ~ any )
2017-01-25 01:19:33 +01:00
log_debug ( " Journal file %s has unknown %s flags 0x% " PRIx32 ,
2017-01-25 01:00:23 +01:00
f - > path , type , flags & ~ any ) ;
flags = ( flags & any ) & ~ supported ;
2017-01-25 01:19:33 +01:00
if ( flags ) {
const char * strv [ 3 ] ;
unsigned n = 0 ;
_cleanup_free_ char * t = NULL ;
if ( compatible & & ( flags & HEADER_COMPATIBLE_SEALED ) )
strv [ n + + ] = " sealed " ;
if ( ! compatible & & ( flags & HEADER_INCOMPATIBLE_COMPRESSED_XZ ) )
strv [ n + + ] = " xz-compressed " ;
if ( ! compatible & & ( flags & HEADER_INCOMPATIBLE_COMPRESSED_LZ4 ) )
strv [ n + + ] = " lz4-compressed " ;
strv [ n ] = NULL ;
assert ( n < ELEMENTSOF ( strv ) ) ;
t = strv_join ( ( char * * ) strv , " , " ) ;
log_debug ( " Journal file %s uses %s %s %s disabled at compilation time. " ,
f - > path , type , n > 1 ? " flags " : " flag " , strnull ( t ) ) ;
}
2017-01-25 01:00:23 +01:00
return true ;
}
return false ;
}
static int journal_file_verify_header ( JournalFile * f ) {
2017-03-13 08:14:42 +01:00
uint64_t arena_size , header_size ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2012-08-13 20:31:10 +02:00
if ( memcmp ( f - > header - > signature , HEADER_SIGNATURE , 8 ) )
2011-10-08 02:20:44 +02:00
return - EBADMSG ;
2017-01-25 01:00:23 +01:00
/* In both read and write mode we refuse to open files with incompatible
* flags we don ' t know . */
if ( warn_wrong_flags ( f , false ) )
2011-10-08 02:20:44 +02:00
return - EPROTONOSUPPORT ;
2017-01-25 01:00:23 +01:00
/* When open for writing we refuse to open files with compatible flags, too. */
if ( f - > writable & & warn_wrong_flags ( f , true ) )
2014-07-04 04:42:22 +02:00
return - EPROTONOSUPPORT ;
2012-08-13 20:31:10 +02:00
2012-08-18 00:37:21 +02:00
if ( f - > header - > state > = _STATE_MAX )
return - EBADMSG ;
2017-03-13 08:14:42 +01:00
header_size = le64toh ( f - > header - > header_size ) ;
2012-07-16 22:24:02 +02:00
/* The first addition was n_data, so check that we are at least this large */
2017-03-13 08:14:42 +01:00
if ( header_size < HEADER_SIZE_MIN )
2012-06-05 20:32:55 +02:00
return - EBADMSG ;
2012-08-20 16:11:42 +02:00
if ( JOURNAL_HEADER_SEALED ( f - > header ) & & ! JOURNAL_HEADER_CONTAINS ( f - > header , n_entry_arrays ) )
2012-08-15 01:54:09 +02:00
return - EBADMSG ;
2017-03-13 08:14:42 +01:00
arena_size = le64toh ( f - > header - > arena_size ) ;
if ( UINT64_MAX - header_size < arena_size | | header_size + arena_size > ( uint64_t ) f - > last_stat . st_size )
2012-08-18 00:37:21 +02:00
return - ENODATA ;
2017-03-13 08:14:42 +01:00
if ( le64toh ( f - > header - > tail_object_offset ) > header_size + arena_size )
2012-08-18 00:37:21 +02:00
return - ENODATA ;
2012-09-10 11:58:00 +02:00
if ( ! VALID64 ( le64toh ( f - > header - > data_hash_table_offset ) ) | |
! VALID64 ( le64toh ( f - > header - > field_hash_table_offset ) ) | |
! VALID64 ( le64toh ( f - > header - > tail_object_offset ) ) | |
! VALID64 ( le64toh ( f - > header - > entry_array_offset ) ) )
return - ENODATA ;
2011-10-08 02:20:44 +02:00
if ( f - > writable ) {
sd_id128_t machine_id ;
2016-10-12 18:53:35 +02:00
uint8_t state ;
2011-10-08 02:20:44 +02:00
int r ;
r = sd_id128_get_machine ( & machine_id ) ;
if ( r < 0 )
return r ;
if ( ! sd_id128_equal ( machine_id , f - > header - > machine_id ) )
return - EHOSTDOWN ;
2011-11-08 18:20:03 +01:00
state = f - > header - > state ;
2011-10-08 02:20:44 +02:00
2017-02-02 00:36:09 +01:00
if ( state = = STATE_ARCHIVED )
return - ESHUTDOWN ; /* Already archived */
else if ( state = = STATE_ONLINE ) {
2012-07-16 22:51:28 +02:00
log_debug ( " Journal file %s is already online. Assuming unclean closing. " , f - > path ) ;
return - EBUSY ;
2017-02-02 00:36:09 +01:00
} else if ( state ! = STATE_OFFLINE ) {
2015-01-22 05:47:37 +01:00
log_debug ( " Journal file %s has unknown state %i. " , f - > path , state ) ;
2012-07-16 22:51:28 +02:00
return - EBUSY ;
}
2016-10-12 18:53:35 +02:00
2017-04-29 19:37:53 +02:00
if ( f - > header - > field_hash_table_size = = 0 | | f - > header - > data_hash_table_size = = 0 )
return - EBADMSG ;
2016-10-12 18:53:35 +02:00
/* Don't permit appending to files from the future. Because otherwise the realtime timestamps wouldn't
* be strictly ordered in the entries in the file anymore , and we can ' t have that since it breaks
* bisection . */
if ( le64toh ( f - > header - > tail_entry_realtime ) > now ( CLOCK_REALTIME ) ) {
log_debug ( " Journal file %s is from the future, refusing to append new data to it that'd be older. " , f - > path ) ;
return - ETXTBSY ;
}
2011-10-08 02:20:44 +02:00
}
2014-07-04 04:42:22 +02:00
f - > compress_xz = JOURNAL_HEADER_COMPRESSED_XZ ( f - > header ) ;
f - > compress_lz4 = JOURNAL_HEADER_COMPRESSED_LZ4 ( f - > header ) ;
2012-08-18 00:38:57 +02:00
2012-09-22 21:45:30 +02:00
f - > seal = JOURNAL_HEADER_SEALED ( f - > header ) ;
2012-08-13 20:31:10 +02:00
2011-10-08 02:20:44 +02:00
return 0 ;
}
2015-01-05 02:09:01 +01:00
static int journal_file_fstat ( JournalFile * f ) {
2018-02-19 18:01:05 +01:00
int r ;
2015-01-05 02:09:01 +01:00
assert ( f ) ;
assert ( f - > fd > = 0 ) ;
if ( fstat ( f - > fd , & f - > last_stat ) < 0 )
return - errno ;
f - > last_stat_usec = now ( CLOCK_MONOTONIC ) ;
2018-02-19 17:35:36 +01:00
/* Refuse dealing with with files that aren't regular */
2018-02-19 18:01:05 +01:00
r = stat_verify_regular ( & f - > last_stat ) ;
if ( r < 0 )
return r ;
2018-02-19 17:35:36 +01:00
2015-01-05 02:09:01 +01:00
/* Refuse appending to files that are already deleted */
if ( f - > last_stat . st_nlink < = 0 )
return - EIDRM ;
return 0 ;
}
2011-10-08 02:20:44 +02:00
static int journal_file_allocate ( JournalFile * f , uint64_t offset , uint64_t size ) {
2013-11-27 01:44:52 +01:00
uint64_t old_size , new_size ;
2012-05-09 20:43:34 +02:00
int r ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
/* We assume that this file is not sparse, and we know that
2011-10-12 04:41:27 +02:00
* for sure , since we always call posix_fallocate ( )
2011-10-08 02:20:44 +02:00
* ourselves */
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) )
2014-12-30 20:57:53 +01:00
return - EIO ;
2011-10-08 02:20:44 +02:00
old_size =
2012-06-05 20:32:55 +02:00
le64toh ( f - > header - > header_size ) +
2011-10-08 02:20:44 +02:00
le64toh ( f - > header - > arena_size ) ;
2011-12-20 02:38:36 +01:00
new_size = PAGE_ALIGN ( offset + size ) ;
2012-06-05 20:32:55 +02:00
if ( new_size < le64toh ( f - > header - > header_size ) )
new_size = le64toh ( f - > header - > header_size ) ;
2011-12-20 02:38:36 +01:00
2015-01-05 02:09:01 +01:00
if ( new_size < = old_size ) {
/* We already pre-allocated enough space, but before
* we write to it , let ' s check with fstat ( ) if the
* file got deleted , in order make sure we don ' t throw
* away the data immediately . Don ' t check fstat ( ) for
* all writes though , but only once ever 10 s . */
if ( f - > last_stat_usec + LAST_STAT_REFRESH_USEC > now ( CLOCK_MONOTONIC ) )
return 0 ;
return journal_file_fstat ( f ) ;
}
/* Allocate more space. */
2011-10-08 02:20:44 +02:00
2013-11-26 18:39:42 +01:00
if ( f - > metrics . max_size > 0 & & new_size > f - > metrics . max_size )
2011-12-20 02:38:36 +01:00
return - E2BIG ;
2011-10-08 02:20:44 +02:00
2013-11-26 18:39:42 +01:00
if ( new_size > f - > metrics . min_size & & f - > metrics . keep_free > 0 ) {
2011-10-08 02:20:44 +02:00
struct statvfs svfs ;
if ( fstatvfs ( f - > fd , & svfs ) > = 0 ) {
uint64_t available ;
2015-10-02 22:42:13 +02:00
available = LESS_BY ( ( uint64_t ) svfs . f_bfree * ( uint64_t ) svfs . f_bsize , f - > metrics . keep_free ) ;
2011-10-08 02:20:44 +02:00
if ( new_size - old_size > available )
return - E2BIG ;
}
}
2013-11-27 01:44:52 +01:00
/* Increase by larger blocks at once */
new_size = ( ( new_size + FILE_SIZE_INCREASE - 1 ) / FILE_SIZE_INCREASE ) * FILE_SIZE_INCREASE ;
if ( f - > metrics . max_size > 0 & & new_size > f - > metrics . max_size )
new_size = f - > metrics . max_size ;
2011-12-20 02:38:36 +01:00
/* Note that the glibc fallocate() fallback is very
inefficient , hence we try to minimize the allocation area
as we can . */
2012-05-09 20:43:34 +02:00
r = posix_fallocate ( f - > fd , old_size , new_size - old_size ) ;
if ( r ! = 0 )
return - r ;
2011-10-08 02:20:44 +02:00
2012-06-05 20:32:55 +02:00
f - > header - > arena_size = htole64 ( new_size - le64toh ( f - > header - > header_size ) ) ;
2011-10-08 02:20:44 +02:00
2015-01-05 02:09:01 +01:00
return journal_file_fstat ( f ) ;
2011-10-08 02:20:44 +02:00
}
2014-12-10 15:18:49 +01:00
static unsigned type_to_context ( ObjectType type ) {
2014-12-12 18:06:22 +01:00
/* One context for each type, plus one catch-all for the rest */
2014-12-03 18:25:44 +01:00
assert_cc ( _OBJECT_TYPE_MAX < = MMAP_CACHE_MAX_CONTEXTS ) ;
2014-12-30 20:57:53 +01:00
assert_cc ( CONTEXT_HEADER < MMAP_CACHE_MAX_CONTEXTS ) ;
2014-12-12 22:51:24 +01:00
return type > OBJECT_UNUSED & & type < _OBJECT_TYPE_MAX ? type : 0 ;
2014-12-12 18:06:22 +01:00
}
2017-07-13 07:17:06 +02:00
static int journal_file_move_to ( JournalFile * f , ObjectType type , bool keep_always , uint64_t offset , uint64_t size , void * * ret , size_t * ret_size ) {
2015-01-05 02:09:01 +01:00
int r ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
assert ( ret ) ;
2012-09-10 11:58:00 +02:00
if ( size < = 0 )
return - EINVAL ;
2011-12-28 01:53:06 +01:00
/* Avoid SIGBUS on invalid accesses */
2012-01-04 02:16:38 +01:00
if ( offset + size > ( uint64_t ) f - > last_stat . st_size ) {
/* Hmm, out of range? Let's refresh the fstat() data
* first , before we trust that check . */
2015-01-05 02:09:01 +01:00
r = journal_file_fstat ( f ) ;
if ( r < 0 )
return r ;
if ( offset + size > ( uint64_t ) f - > last_stat . st_size )
2012-01-04 02:16:38 +01:00
return - EADDRNOTAVAIL ;
}
2017-07-13 07:17:06 +02:00
return mmap_cache_get ( f - > mmap , f - > cache_fd , f - > prot , type_to_context ( type ) , keep_always , offset , size , & f - > last_stat , ret , ret_size ) ;
2011-10-08 02:20:44 +02:00
}
2012-08-14 22:02:24 +02:00
static uint64_t minimum_header_size ( Object * o ) {
2013-11-26 18:40:23 +01:00
static const uint64_t table [ ] = {
2012-08-14 22:02:24 +02:00
[ OBJECT_DATA ] = sizeof ( DataObject ) ,
[ OBJECT_FIELD ] = sizeof ( FieldObject ) ,
[ OBJECT_ENTRY ] = sizeof ( EntryObject ) ,
[ OBJECT_DATA_HASH_TABLE ] = sizeof ( HashTableObject ) ,
[ OBJECT_FIELD_HASH_TABLE ] = sizeof ( HashTableObject ) ,
[ OBJECT_ENTRY_ARRAY ] = sizeof ( EntryArrayObject ) ,
[ OBJECT_TAG ] = sizeof ( TagObject ) ,
} ;
if ( o - > object . type > = ELEMENTSOF ( table ) | | table [ o - > object . type ] < = 0 )
return sizeof ( ObjectHeader ) ;
return table [ o - > object . type ] ;
}
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
/* Lightweight object checks. We want this to be fast, so that we won't
* slowdown every journal_file_move_to_object ( ) call too much . */
static int journal_file_check_object ( JournalFile * f , uint64_t offset , Object * o ) {
assert ( f ) ;
assert ( o ) ;
switch ( o - > object . type ) {
case OBJECT_DATA : {
if ( ( le64toh ( o - > data . entry_offset ) = = 0 ) ^ ( le64toh ( o - > data . n_entries ) = = 0 ) ) {
log_debug ( " Bad n_entries: % " PRIu64 " : % " PRIu64 ,
2017-09-24 10:56:52 +02:00
le64toh ( o - > data . n_entries ) , offset ) ;
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
return - EBADMSG ;
}
if ( le64toh ( o - > object . size ) - offsetof ( DataObject , payload ) < = 0 ) {
log_debug ( " Bad object size (<= %zu): % " PRIu64 " : % " PRIu64 ,
offsetof ( DataObject , payload ) ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
2017-09-24 10:56:52 +02:00
if ( ! VALID64 ( le64toh ( o - > data . next_hash_offset ) ) | |
! VALID64 ( le64toh ( o - > data . next_field_offset ) ) | |
! VALID64 ( le64toh ( o - > data . entry_offset ) ) | |
! VALID64 ( le64toh ( o - > data . entry_array_offset ) ) ) {
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
log_debug ( " Invalid offset, next_hash_offset= " OFSfmt " , next_field_offset= " OFSfmt
" , entry_offset= " OFSfmt " , entry_array_offset= " OFSfmt " : % " PRIu64 ,
2017-09-24 10:56:52 +02:00
le64toh ( o - > data . next_hash_offset ) ,
le64toh ( o - > data . next_field_offset ) ,
le64toh ( o - > data . entry_offset ) ,
le64toh ( o - > data . entry_array_offset ) ,
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
offset ) ;
return - EBADMSG ;
}
break ;
}
case OBJECT_FIELD :
if ( le64toh ( o - > object . size ) - offsetof ( FieldObject , payload ) < = 0 ) {
log_debug (
" Bad field size (<= %zu): % " PRIu64 " : % " PRIu64 ,
offsetof ( FieldObject , payload ) ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
2017-09-24 10:56:52 +02:00
if ( ! VALID64 ( le64toh ( o - > field . next_hash_offset ) ) | |
! VALID64 ( le64toh ( o - > field . head_data_offset ) ) ) {
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
log_debug (
" Invalid offset, next_hash_offset= " OFSfmt
" , head_data_offset= " OFSfmt " : % " PRIu64 ,
2017-09-24 10:56:52 +02:00
le64toh ( o - > field . next_hash_offset ) ,
le64toh ( o - > field . head_data_offset ) ,
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
offset ) ;
return - EBADMSG ;
}
break ;
case OBJECT_ENTRY :
if ( ( le64toh ( o - > object . size ) - offsetof ( EntryObject , items ) ) % sizeof ( EntryItem ) ! = 0 ) {
log_debug (
" Bad entry size (<= %zu): % " PRIu64 " : % " PRIu64 ,
offsetof ( EntryObject , items ) ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
if ( ( le64toh ( o - > object . size ) - offsetof ( EntryObject , items ) ) / sizeof ( EntryItem ) < = 0 ) {
log_debug (
" Invalid number items in entry: % " PRIu64 " : % " PRIu64 ,
( le64toh ( o - > object . size ) - offsetof ( EntryObject , items ) ) / sizeof ( EntryItem ) ,
offset ) ;
return - EBADMSG ;
}
if ( le64toh ( o - > entry . seqnum ) < = 0 ) {
log_debug (
" Invalid entry seqnum: % " PRIx64 " : % " PRIu64 ,
le64toh ( o - > entry . seqnum ) ,
offset ) ;
return - EBADMSG ;
}
if ( ! VALID_REALTIME ( le64toh ( o - > entry . realtime ) ) ) {
log_debug (
" Invalid entry realtime timestamp: % " PRIu64 " : % " PRIu64 ,
le64toh ( o - > entry . realtime ) ,
offset ) ;
return - EBADMSG ;
}
if ( ! VALID_MONOTONIC ( le64toh ( o - > entry . monotonic ) ) ) {
log_debug (
" Invalid entry monotonic timestamp: % " PRIu64 " : % " PRIu64 ,
le64toh ( o - > entry . monotonic ) ,
offset ) ;
return - EBADMSG ;
}
break ;
case OBJECT_DATA_HASH_TABLE :
case OBJECT_FIELD_HASH_TABLE :
if ( ( le64toh ( o - > object . size ) - offsetof ( HashTableObject , items ) ) % sizeof ( HashItem ) ! = 0 | |
( le64toh ( o - > object . size ) - offsetof ( HashTableObject , items ) ) / sizeof ( HashItem ) < = 0 ) {
log_debug (
" Invalid %s hash table size: % " PRIu64 " : % " PRIu64 ,
o - > object . type = = OBJECT_DATA_HASH_TABLE ? " data " : " field " ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
break ;
case OBJECT_ENTRY_ARRAY :
if ( ( le64toh ( o - > object . size ) - offsetof ( EntryArrayObject , items ) ) % sizeof ( le64_t ) ! = 0 | |
( le64toh ( o - > object . size ) - offsetof ( EntryArrayObject , items ) ) / sizeof ( le64_t ) < = 0 ) {
log_debug (
" Invalid object entry array size: % " PRIu64 " : % " PRIu64 ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
2017-09-24 10:56:52 +02:00
if ( ! VALID64 ( le64toh ( o - > entry_array . next_entry_array_offset ) ) ) {
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
log_debug (
" Invalid object entry array next_entry_array_offset: " OFSfmt " : % " PRIu64 ,
2017-09-24 10:56:52 +02:00
le64toh ( o - > entry_array . next_entry_array_offset ) ,
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
offset ) ;
return - EBADMSG ;
}
break ;
case OBJECT_TAG :
if ( le64toh ( o - > object . size ) ! = sizeof ( TagObject ) ) {
log_debug (
" Invalid object tag size: % " PRIu64 " : % " PRIu64 ,
le64toh ( o - > object . size ) ,
offset ) ;
return - EBADMSG ;
}
2017-09-24 10:56:52 +02:00
if ( ! VALID_EPOCH ( le64toh ( o - > tag . epoch ) ) ) {
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
log_debug (
" Invalid object tag epoch: % " PRIu64 " : % " PRIu64 ,
2017-09-24 10:56:52 +02:00
le64toh ( o - > tag . epoch ) ,
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
offset ) ;
return - EBADMSG ;
}
break ;
}
return 0 ;
}
2014-12-10 15:18:49 +01:00
int journal_file_move_to_object ( JournalFile * f , ObjectType type , uint64_t offset , Object * * ret ) {
2011-10-08 02:20:44 +02:00
int r ;
void * t ;
2017-07-13 07:17:06 +02:00
size_t tsize ;
2011-10-08 02:20:44 +02:00
Object * o ;
uint64_t s ;
assert ( f ) ;
assert ( ret ) ;
2012-08-18 00:37:21 +02:00
/* Objects may only be located at multiple of 64 bit */
2016-10-11 19:12:41 +02:00
if ( ! VALID64 ( offset ) ) {
log_debug ( " Attempt to move to object at non-64bit boundary: % " PRIu64 , offset ) ;
2016-04-26 11:37:22 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2012-08-18 00:37:21 +02:00
2016-04-25 21:42:15 +02:00
/* Object may not be located in the file header */
2016-10-11 19:12:41 +02:00
if ( offset < le64toh ( f - > header - > header_size ) ) {
log_debug ( " Attempt to move to object located in file header: % " PRIu64 , offset ) ;
2016-04-25 21:42:15 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2016-04-25 21:42:15 +02:00
2017-07-13 07:17:06 +02:00
r = journal_file_move_to ( f , type , false , offset , sizeof ( ObjectHeader ) , & t , & tsize ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
o = ( Object * ) t ;
s = le64toh ( o - > object . size ) ;
2016-10-12 12:22:57 +02:00
if ( s = = 0 ) {
log_debug ( " Attempt to move to uninitialized object: % " PRIu64 , offset ) ;
return - EBADMSG ;
}
2016-10-11 19:12:41 +02:00
if ( s < sizeof ( ObjectHeader ) ) {
log_debug ( " Attempt to move to overly short object: % " PRIu64 , offset ) ;
2011-10-08 02:20:44 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2011-10-08 02:20:44 +02:00
2016-10-11 19:12:41 +02:00
if ( o - > object . type < = OBJECT_UNUSED ) {
log_debug ( " Attempt to move to object with invalid type: % " PRIu64 , offset ) ;
2012-08-14 22:02:24 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2012-08-14 22:02:24 +02:00
2016-10-11 19:12:41 +02:00
if ( s < minimum_header_size ( o ) ) {
log_debug ( " Attempt to move to truncated object: % " PRIu64 , offset ) ;
2012-08-14 22:02:24 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2012-08-14 22:02:24 +02:00
2016-10-11 19:12:41 +02:00
if ( type > OBJECT_UNUSED & & o - > object . type ! = type ) {
log_debug ( " Attempt to move to object of unexpected type: % " PRIu64 , offset ) ;
2011-10-08 02:20:44 +02:00
return - EBADMSG ;
2016-10-11 19:12:41 +02:00
}
2011-10-08 02:20:44 +02:00
2017-07-13 07:17:06 +02:00
if ( s > tsize ) {
r = journal_file_move_to ( f , type , false , offset , s , & t , NULL ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
o = ( Object * ) t ;
}
journal: add object sanity check to journal_file_move_to_object()
Introduce journal_file_check_object(), which does lightweight object
sanity checks, and use it in journal_file_move_to_object(), so that we
will catch certain corrupted objects in the journal file.
This fixes #6447, where we had only partially written out OBJECT_ENTRY
(ObjectHeader written, but rest of object zero bytes), causing
"journalctl --list-boots" to fail.
$ builddir.vanilla/journalctl --list-boots -D bug6447/
Failed to determine boots: No data available
$ builddir.patched/journalctl --list-boots -D bug6447/
-52 22633da1c5374a728d6c215e2c301dc2 Mon 2017-07-10 05:29:21 EEST—Mon 2017-07-10 05:31:51 EEST
-51 2253aab9ea7e4a2598f2abda82939eff Mon 2017-07-10 05:32:22 EEST—Mon 2017-07-10 05:36:49 EEST
-50 ef0d85d35c74486fa4104f9d6391b6ba Mon 2017-07-10 05:40:33 EEST—Mon 2017-07-10 05:40:40 EEST
[...]
Note that journal_file_check_object() is similar to
journal_file_object_verify(). The most expensive checks are omitted, as
they would slow down every journal_file_move_to_object() call too much.
With this implementation, the added overhead is small, for example when
dumping some journal content to /dev/null
(built with -Dbuildtype=debugoptimized -Db_ndebug=true):
Performance counter stats for 'builddir.vanilla/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12542,311634 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 100 page-faults:u # 0,006 M/sec
41 786 963 456 cycles:u # 3,332 GHz
105 453 864 770 instructions:u # 2,52 insn per cycle
24 342 227 334 branches:u # 1940,809 M/sec
105 709 217 branch-misses:u # 0,43% of all branches
12,545199291 seconds time elapsed
Performance counter stats for 'builddir.patched/journalctl -D 76f4d4c3406945f9a60d3ca8763aa754/':
12734,723233 task-clock:u (msec) # 1,000 CPUs utilized
0 context-switches:u # 0,000 K/sec
0 cpu-migrations:u # 0,000 K/sec
80 693 page-faults:u # 0,006 M/sec
42 661 017 429 cycles:u # 3,350 GHz
107 696 985 865 instructions:u # 2,52 insn per cycle
24 950 526 745 branches:u # 1959,252 M/sec
101 762 806 branch-misses:u # 0,41% of all branches
12,737527327 seconds time elapsed
Fixes #6447.
2017-09-19 10:10:49 +02:00
r = journal_file_check_object ( f , offset , o ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
* ret = o ;
return 0 ;
}
2012-08-13 21:52:58 +02:00
static uint64_t journal_file_entry_seqnum ( JournalFile * f , uint64_t * seqnum ) {
2011-10-08 02:20:44 +02:00
uint64_t r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2012-08-15 01:54:09 +02:00
r = le64toh ( f - > header - > tail_entry_seqnum ) + 1 ;
2011-10-14 05:12:58 +02:00
if ( seqnum ) {
2011-11-08 18:20:03 +01:00
/* If an external seqnum counter was passed, we update
2011-10-14 05:12:58 +02:00
* both the local and the external one , and set it to
* the maximum of both */
if ( * seqnum + 1 > r )
r = * seqnum + 1 ;
* seqnum = r ;
}
2012-08-15 01:54:09 +02:00
f - > header - > tail_entry_seqnum = htole64 ( r ) ;
2011-10-08 02:20:44 +02:00
2012-08-15 01:54:09 +02:00
if ( f - > header - > head_entry_seqnum = = 0 )
f - > header - > head_entry_seqnum = htole64 ( r ) ;
2011-11-08 18:20:03 +01:00
2011-10-08 02:20:44 +02:00
return r ;
}
2014-12-10 15:18:49 +01:00
int journal_file_append_object ( JournalFile * f , ObjectType type , uint64_t size , Object * * ret , uint64_t * offset ) {
2011-10-08 02:20:44 +02:00
int r ;
uint64_t p ;
Object * tail , * o ;
void * t ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2014-12-12 22:51:24 +01:00
assert ( type > OBJECT_UNUSED & & type < _OBJECT_TYPE_MAX ) ;
2011-10-08 02:20:44 +02:00
assert ( size > = sizeof ( ObjectHeader ) ) ;
assert ( offset ) ;
assert ( ret ) ;
2013-03-25 17:49:03 +01:00
r = journal_file_set_online ( f ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
p = le64toh ( f - > header - > tail_object_offset ) ;
if ( p = = 0 )
2012-06-05 20:32:55 +02:00
p = le64toh ( f - > header - > header_size ) ;
2011-10-08 02:20:44 +02:00
else {
2014-12-12 22:51:24 +01:00
r = journal_file_move_to_object ( f , OBJECT_UNUSED , p , & tail ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
p + = ALIGN64 ( le64toh ( tail - > object . size ) ) ;
}
r = journal_file_allocate ( f , p , size ) ;
if ( r < 0 )
return r ;
2017-07-13 07:17:06 +02:00
r = journal_file_move_to ( f , type , false , p , size , & t , NULL ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
o = ( Object * ) t ;
zero ( o - > object ) ;
2011-11-08 18:20:03 +01:00
o - > object . type = type ;
2011-10-08 02:20:44 +02:00
o - > object . size = htole64 ( size ) ;
f - > header - > tail_object_offset = htole64 ( p ) ;
f - > header - > n_objects = htole64 ( le64toh ( f - > header - > n_objects ) + 1 ) ;
* ret = o ;
* offset = p ;
return 0 ;
}
2011-11-08 18:20:03 +01:00
static int journal_file_setup_data_hash_table ( JournalFile * f ) {
2011-10-08 02:20:44 +02:00
uint64_t s , p ;
Object * o ;
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2015-10-02 22:42:13 +02:00
/* We estimate that we need 1 hash table entry per 768 bytes
of journal file and we want to make sure we never get
beyond 75 % fill level . Calculate the hash table size for
the maximum file size based on these metrics . */
2012-07-17 00:36:15 +02:00
2012-07-18 16:22:25 +02:00
s = ( f - > metrics . max_size * 4 / 768 / 3 ) * sizeof ( HashItem ) ;
2012-07-17 00:36:15 +02:00
if ( s < DEFAULT_DATA_HASH_TABLE_SIZE )
s = DEFAULT_DATA_HASH_TABLE_SIZE ;
2013-06-06 00:44:16 +02:00
log_debug ( " Reserving % " PRIu64 " entries in hash table. " , s / sizeof ( HashItem ) ) ;
2012-07-17 00:36:15 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_append_object ( f ,
OBJECT_DATA_HASH_TABLE ,
offsetof ( Object , hash_table . items ) + s ,
& o , & p ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2014-01-31 06:51:32 +01:00
memzero ( o - > hash_table . items , s ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
f - > header - > data_hash_table_offset = htole64 ( p + offsetof ( Object , hash_table . items ) ) ;
f - > header - > data_hash_table_size = htole64 ( s ) ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
2011-11-08 18:20:03 +01:00
static int journal_file_setup_field_hash_table ( JournalFile * f ) {
2011-10-08 02:20:44 +02:00
uint64_t s , p ;
Object * o ;
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2012-10-18 03:29:19 +02:00
/* We use a fixed size hash table for the fields as this
* number should grow very slowly only */
2011-11-08 18:20:03 +01:00
s = DEFAULT_FIELD_HASH_TABLE_SIZE ;
r = journal_file_append_object ( f ,
OBJECT_FIELD_HASH_TABLE ,
offsetof ( Object , hash_table . items ) + s ,
& o , & p ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2014-01-31 06:51:32 +01:00
memzero ( o - > hash_table . items , s ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
f - > header - > field_hash_table_offset = htole64 ( p + offsetof ( Object , hash_table . items ) ) ;
f - > header - > field_hash_table_size = htole64 ( s ) ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
2015-07-24 01:55:45 +02:00
int journal_file_map_data_hash_table ( JournalFile * f ) {
2011-10-08 02:20:44 +02:00
uint64_t s , p ;
void * t ;
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2015-07-24 01:55:45 +02:00
if ( f - > data_hash_table )
return 0 ;
2011-11-08 18:20:03 +01:00
p = le64toh ( f - > header - > data_hash_table_offset ) ;
s = le64toh ( f - > header - > data_hash_table_size ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to ( f ,
2012-08-14 22:02:24 +02:00
OBJECT_DATA_HASH_TABLE ,
2012-08-21 15:33:21 +02:00
true ,
2011-11-08 18:20:03 +01:00
p , s ,
2017-07-13 07:08:58 +02:00
& t , NULL ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
f - > data_hash_table = t ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
2015-07-24 01:55:45 +02:00
int journal_file_map_field_hash_table ( JournalFile * f ) {
2011-10-08 02:20:44 +02:00
uint64_t s , p ;
void * t ;
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
2015-07-24 01:55:45 +02:00
if ( f - > field_hash_table )
return 0 ;
2011-11-08 18:20:03 +01:00
p = le64toh ( f - > header - > field_hash_table_offset ) ;
s = le64toh ( f - > header - > field_hash_table_size ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to ( f ,
2012-08-14 22:02:24 +02:00
OBJECT_FIELD_HASH_TABLE ,
2012-08-21 15:33:21 +02:00
true ,
2011-11-08 18:20:03 +01:00
p , s ,
2017-07-13 07:08:58 +02:00
& t , NULL ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
f - > field_hash_table = t ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
2012-10-18 03:29:19 +02:00
static int journal_file_link_field (
JournalFile * f ,
Object * o ,
uint64_t offset ,
uint64_t hash ) {
2015-01-05 01:20:44 +01:00
uint64_t p , h , m ;
2012-10-18 03:29:19 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2016-02-05 12:30:55 +01:00
assert ( f - > field_hash_table ) ;
2012-10-18 03:29:19 +02:00
assert ( o ) ;
assert ( offset > 0 ) ;
if ( o - > object . type ! = OBJECT_FIELD )
return - EINVAL ;
2015-01-05 01:20:44 +01:00
m = le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ;
if ( m < = 0 )
return - EBADMSG ;
2012-10-18 03:29:19 +02:00
2015-01-05 01:20:44 +01:00
/* This might alter the window we are looking at */
2012-10-18 03:29:19 +02:00
o - > field . next_hash_offset = o - > field . head_data_offset = 0 ;
2015-01-05 01:20:44 +01:00
h = hash % m ;
2012-10-18 03:29:19 +02:00
p = le64toh ( f - > field_hash_table [ h ] . tail_hash_offset ) ;
if ( p = = 0 )
f - > field_hash_table [ h ] . head_hash_offset = htole64 ( offset ) ;
else {
r = journal_file_move_to_object ( f , OBJECT_FIELD , p , & o ) ;
if ( r < 0 )
return r ;
o - > field . next_hash_offset = htole64 ( offset ) ;
}
f - > field_hash_table [ h ] . tail_hash_offset = htole64 ( offset ) ;
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_fields ) )
f - > header - > n_fields = htole64 ( le64toh ( f - > header - > n_fields ) + 1 ) ;
return 0 ;
}
static int journal_file_link_data (
JournalFile * f ,
Object * o ,
uint64_t offset ,
uint64_t hash ) {
2015-01-05 01:20:44 +01:00
uint64_t p , h , m ;
2011-10-08 02:20:44 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2016-02-05 12:30:55 +01:00
assert ( f - > data_hash_table ) ;
2011-10-08 02:20:44 +02:00
assert ( o ) ;
assert ( offset > 0 ) ;
2012-09-13 17:10:46 +02:00
if ( o - > object . type ! = OBJECT_DATA )
return - EINVAL ;
2011-10-08 02:20:44 +02:00
2015-01-05 01:20:44 +01:00
m = le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ;
if ( m < = 0 )
return - EBADMSG ;
2012-03-06 02:42:32 +01:00
2015-01-05 01:20:44 +01:00
/* This might alter the window we are looking at */
2011-11-08 18:20:03 +01:00
o - > data . next_hash_offset = o - > data . next_field_offset = 0 ;
o - > data . entry_offset = o - > data . entry_array_offset = 0 ;
o - > data . n_entries = 0 ;
2011-10-08 02:20:44 +02:00
2015-01-05 01:20:44 +01:00
h = hash % m ;
2012-06-09 10:30:44 +02:00
p = le64toh ( f - > data_hash_table [ h ] . tail_hash_offset ) ;
2012-10-18 03:29:19 +02:00
if ( p = = 0 )
2011-10-08 02:20:44 +02:00
/* Only entry in the hash table is easy */
2011-11-08 18:20:03 +01:00
f - > data_hash_table [ h ] . head_hash_offset = htole64 ( offset ) ;
2012-10-18 03:29:19 +02:00
else {
2012-03-06 02:42:32 +01:00
/* Move back to the previous data object, to patch in
* pointer */
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_DATA , p , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
o - > data . next_hash_offset = htole64 ( offset ) ;
2011-10-08 02:20:44 +02:00
}
2011-11-08 18:20:03 +01:00
f - > data_hash_table [ h ] . tail_hash_offset = htole64 ( offset ) ;
2011-10-08 02:20:44 +02:00
2012-07-16 22:24:02 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_data ) )
f - > header - > n_data = htole64 ( le64toh ( f - > header - > n_data ) + 1 ) ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
2012-10-18 03:29:19 +02:00
int journal_file_find_field_object_with_hash (
JournalFile * f ,
const void * field , uint64_t size , uint64_t hash ,
Object * * ret , uint64_t * offset ) {
2015-01-05 01:20:44 +01:00
uint64_t p , osize , h , m ;
2012-10-18 03:29:19 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2012-10-18 03:29:19 +02:00
assert ( field & & size > 0 ) ;
2015-07-24 01:55:45 +02:00
/* If the field hash table is empty, we can't find anything */
if ( le64toh ( f - > header - > field_hash_table_size ) < = 0 )
return 0 ;
/* Map the field hash table, if it isn't mapped yet. */
r = journal_file_map_field_hash_table ( f ) ;
if ( r < 0 )
return r ;
2012-10-18 03:29:19 +02:00
osize = offsetof ( Object , field . payload ) + size ;
2015-01-05 01:20:44 +01:00
m = le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ;
if ( m < = 0 )
2012-10-18 03:29:19 +02:00
return - EBADMSG ;
2015-01-05 01:20:44 +01:00
h = hash % m ;
2012-10-18 03:29:19 +02:00
p = le64toh ( f - > field_hash_table [ h ] . head_hash_offset ) ;
while ( p > 0 ) {
Object * o ;
r = journal_file_move_to_object ( f , OBJECT_FIELD , p , & o ) ;
if ( r < 0 )
return r ;
if ( le64toh ( o - > field . hash ) = = hash & &
le64toh ( o - > object . size ) = = osize & &
memcmp ( o - > field . payload , field , size ) = = 0 ) {
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 1 ;
}
p = le64toh ( o - > field . next_hash_offset ) ;
}
return 0 ;
}
int journal_file_find_field_object (
JournalFile * f ,
const void * field , uint64_t size ,
Object * * ret , uint64_t * offset ) {
uint64_t hash ;
assert ( f ) ;
assert ( field & & size > 0 ) ;
hash = hash64 ( field , size ) ;
return journal_file_find_field_object_with_hash ( f ,
field , size , hash ,
ret , offset ) ;
}
2011-11-08 18:20:03 +01:00
int journal_file_find_data_object_with_hash (
JournalFile * f ,
const void * data , uint64_t size , uint64_t hash ,
Object * * ret , uint64_t * offset ) {
2012-03-06 02:42:32 +01:00
2015-01-05 01:20:44 +01:00
uint64_t p , osize , h , m ;
2011-10-08 02:20:44 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
assert ( data | | size = = 0 ) ;
2015-07-24 01:55:45 +02:00
/* If there's no data hash table, then there's no entry. */
if ( le64toh ( f - > header - > data_hash_table_size ) < = 0 )
return 0 ;
/* Map the data hash table, if it isn't mapped yet. */
r = journal_file_map_data_hash_table ( f ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
osize = offsetof ( Object , data . payload ) + size ;
2015-01-05 01:20:44 +01:00
m = le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ;
if ( m < = 0 )
2011-12-20 02:38:36 +01:00
return - EBADMSG ;
2015-01-05 01:20:44 +01:00
h = hash % m ;
2011-11-08 18:20:03 +01:00
p = le64toh ( f - > data_hash_table [ h ] . head_hash_offset ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
while ( p > 0 ) {
Object * o ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_DATA , p , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-12-21 02:40:59 +01:00
if ( le64toh ( o - > data . hash ) ! = hash )
2011-12-27 22:58:20 +01:00
goto next ;
2011-12-21 02:40:59 +01:00
2014-07-04 04:42:22 +02:00
if ( o - > object . flags & OBJECT_COMPRESSION_MASK ) {
2017-10-03 10:41:51 +02:00
# if HAVE_XZ || HAVE_LZ4
2014-08-04 04:50:00 +02:00
uint64_t l ;
2015-03-27 12:02:49 +01:00
size_t rsize = 0 ;
2011-10-08 02:20:44 +02:00
2011-12-21 02:40:59 +01:00
l = le64toh ( o - > object . size ) ;
if ( l < = offsetof ( Object , data . payload ) )
2011-10-08 02:20:44 +02:00
return - EBADMSG ;
2011-12-21 02:40:59 +01:00
l - = offsetof ( Object , data . payload ) ;
2014-07-04 04:42:22 +02:00
r = decompress_blob ( o - > object . flags & OBJECT_COMPRESSION_MASK ,
o - > data . payload , l , & f - > compress_buffer , & f - > compress_buffer_size , & rsize , 0 ) ;
if ( r < 0 )
return r ;
2011-12-21 02:40:59 +01:00
2012-01-11 22:44:43 +01:00
if ( rsize = = size & &
2011-12-21 02:40:59 +01:00
memcmp ( f - > compress_buffer , data , size ) = = 0 ) {
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 1 ;
}
2014-07-11 16:42:06 +02:00
# else
return - EPROTONOSUPPORT ;
# endif
2011-12-21 02:40:59 +01:00
} else if ( le64toh ( o - > object . size ) = = osize & &
memcmp ( o - > data . payload , data , size ) = = 0 ) {
2011-10-08 02:20:44 +02:00
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
2011-11-08 18:20:03 +01:00
return 1 ;
2011-10-08 02:20:44 +02:00
}
2011-12-27 22:58:20 +01:00
next :
2011-10-08 02:20:44 +02:00
p = le64toh ( o - > data . next_hash_offset ) ;
}
2011-11-08 18:20:03 +01:00
return 0 ;
}
int journal_file_find_data_object (
JournalFile * f ,
const void * data , uint64_t size ,
Object * * ret , uint64_t * offset ) {
uint64_t hash ;
assert ( f ) ;
assert ( data | | size = = 0 ) ;
hash = hash64 ( data , size ) ;
return journal_file_find_data_object_with_hash ( f ,
data , size , hash ,
ret , offset ) ;
}
2012-10-18 03:29:19 +02:00
static int journal_file_append_field (
JournalFile * f ,
const void * field , uint64_t size ,
Object * * ret , uint64_t * offset ) {
uint64_t hash , p ;
uint64_t osize ;
Object * o ;
int r ;
assert ( f ) ;
assert ( field & & size > 0 ) ;
hash = hash64 ( field , size ) ;
r = journal_file_find_field_object_with_hash ( f , field , size , hash , & o , & p ) ;
if ( r < 0 )
return r ;
else if ( r > 0 ) {
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 0 ;
}
osize = offsetof ( Object , field . payload ) + size ;
r = journal_file_append_object ( f , OBJECT_FIELD , osize , & o , & p ) ;
2013-10-02 19:36:43 +02:00
if ( r < 0 )
return r ;
2012-10-18 03:29:19 +02:00
o - > field . hash = htole64 ( hash ) ;
memcpy ( o - > field . payload , field , size ) ;
r = journal_file_link_field ( f , o , p , hash ) ;
if ( r < 0 )
return r ;
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object ( f , OBJECT_FIELD , p , & o ) ;
if ( r < 0 )
return r ;
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-10-18 03:29:19 +02:00
r = journal_file_hmac_put_object ( f , OBJECT_FIELD , o , p ) ;
if ( r < 0 )
return r ;
# endif
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 0 ;
}
2012-03-06 02:42:32 +01:00
static int journal_file_append_data (
JournalFile * f ,
const void * data , uint64_t size ,
Object * * ret , uint64_t * offset ) {
2011-11-08 18:20:03 +01:00
uint64_t hash , p ;
uint64_t osize ;
Object * o ;
2014-07-04 04:42:22 +02:00
int r , compression = 0 ;
2012-10-18 03:29:19 +02:00
const void * eq ;
2011-11-08 18:20:03 +01:00
assert ( f ) ;
assert ( data | | size = = 0 ) ;
hash = hash64 ( data , size ) ;
r = journal_file_find_data_object_with_hash ( f , data , size , hash , & o , & p ) ;
if ( r < 0 )
return r ;
2015-10-24 15:08:15 +02:00
if ( r > 0 ) {
2011-11-08 18:20:03 +01:00
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 0 ;
}
osize = offsetof ( Object , data . payload ) + size ;
r = journal_file_append_object ( f , OBJECT_DATA , osize , & o , & p ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
o - > data . hash = htole64 ( hash ) ;
2011-12-21 02:40:59 +01:00
2017-10-03 10:41:51 +02:00
# if HAVE_XZ || HAVE_LZ4
2015-10-24 13:17:54 +02:00
if ( JOURNAL_FILE_COMPRESS ( f ) & & size > = COMPRESSION_SIZE_THRESHOLD ) {
2015-03-27 12:02:49 +01:00
size_t rsize = 0 ;
2011-12-21 02:40:59 +01:00
2015-12-13 19:39:12 +01:00
compression = compress_blob ( data , size , o - > data . payload , size - 1 , & rsize ) ;
2011-12-21 02:40:59 +01:00
2015-10-24 13:17:54 +02:00
if ( compression > = 0 ) {
2011-12-21 02:40:59 +01:00
o - > object . size = htole64 ( offsetof ( Object , data . payload ) + rsize ) ;
2014-07-04 04:42:22 +02:00
o - > object . flags | = compression ;
2011-12-21 02:40:59 +01:00
2014-08-04 04:50:00 +02:00
log_debug ( " Compressed data object % " PRIu64 " -> %zu using %s " ,
2014-07-04 04:42:22 +02:00
size , rsize , object_compressed_to_string ( compression ) ) ;
2015-10-24 13:17:54 +02:00
} else
/* Compression didn't work, we don't really care why, let's continue without compression */
compression = 0 ;
2011-12-21 02:40:59 +01:00
}
# endif
2016-02-02 03:57:41 +01:00
if ( compression = = 0 )
memcpy_safe ( o - > data . payload , data , size ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_link_data ( f , o , p , hash ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2016-09-23 13:33:01 +02:00
r = journal_file_hmac_put_object ( f , OBJECT_DATA , o , p ) ;
if ( r < 0 )
return r ;
# endif
2012-03-06 02:42:32 +01:00
/* The linking might have altered the window, so let's
* refresh our pointer */
r = journal_file_move_to_object ( f , OBJECT_DATA , p , & o ) ;
if ( r < 0 )
return r ;
2013-12-17 00:41:00 +01:00
if ( ! data )
eq = NULL ;
else
eq = memchr ( data , ' = ' , size ) ;
2012-10-18 03:29:19 +02:00
if ( eq & & eq > data ) {
2014-02-20 17:24:36 +01:00
Object * fo = NULL ;
2012-10-18 03:29:19 +02:00
uint64_t fp ;
/* Create field object ... */
r = journal_file_append_field ( f , data , ( uint8_t * ) eq - ( uint8_t * ) data , & fo , & fp ) ;
if ( r < 0 )
return r ;
/* ... and link it in. */
o - > data . next_field_offset = fo - > field . head_data_offset ;
fo - > field . head_data_offset = le64toh ( p ) ;
}
2011-10-08 02:20:44 +02:00
if ( ret )
* ret = o ;
if ( offset )
2011-11-08 18:20:03 +01:00
* offset = p ;
2011-10-08 02:20:44 +02:00
return 0 ;
}
uint64_t journal_file_entry_n_items ( Object * o ) {
assert ( o ) ;
2012-09-13 17:10:46 +02:00
if ( o - > object . type ! = OBJECT_ENTRY )
return 0 ;
2011-10-08 02:20:44 +02:00
return ( le64toh ( o - > object . size ) - offsetof ( Object , entry . items ) ) / sizeof ( EntryItem ) ;
}
2012-08-16 01:51:54 +02:00
uint64_t journal_file_entry_array_n_items ( Object * o ) {
2011-11-08 18:20:03 +01:00
assert ( o ) ;
2012-09-13 17:10:46 +02:00
if ( o - > object . type ! = OBJECT_ENTRY_ARRAY )
return 0 ;
2011-11-08 18:20:03 +01:00
return ( le64toh ( o - > object . size ) - offsetof ( Object , entry_array . items ) ) / sizeof ( uint64_t ) ;
}
2012-08-18 01:45:39 +02:00
uint64_t journal_file_hash_table_n_items ( Object * o ) {
assert ( o ) ;
2012-09-13 17:10:46 +02:00
2017-09-29 09:58:22 +02:00
if ( ! IN_SET ( o - > object . type , OBJECT_DATA_HASH_TABLE , OBJECT_FIELD_HASH_TABLE ) )
2012-09-13 17:10:46 +02:00
return 0 ;
2012-08-18 01:45:39 +02:00
return ( le64toh ( o - > object . size ) - offsetof ( Object , hash_table . items ) ) / sizeof ( HashItem ) ;
}
2011-11-08 18:20:03 +01:00
static int link_entry_into_array ( JournalFile * f ,
2012-03-16 11:59:04 +01:00
le64_t * first ,
le64_t * idx ,
2011-11-08 18:20:03 +01:00
uint64_t p ) {
2011-10-08 02:20:44 +02:00
int r ;
2011-11-08 18:20:03 +01:00
uint64_t n = 0 , ap = 0 , q , i , a , hidx ;
Object * o ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-11-08 18:20:03 +01:00
assert ( first ) ;
assert ( idx ) ;
assert ( p > 0 ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
a = le64toh ( * first ) ;
i = hidx = le64toh ( * idx ) ;
while ( a > 0 ) {
r = journal_file_move_to_object ( f , OBJECT_ENTRY_ARRAY , a , & o ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
n = journal_file_entry_array_n_items ( o ) ;
if ( i < n ) {
o - > entry_array . items [ i ] = htole64 ( p ) ;
* idx = htole64 ( hidx + 1 ) ;
return 0 ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
i - = n ;
ap = a ;
a = le64toh ( o - > entry_array . next_entry_array_offset ) ;
}
if ( hidx > n )
n = ( hidx + 1 ) * 2 ;
else
n = n * 2 ;
if ( n < 4 )
n = 4 ;
r = journal_file_append_object ( f , OBJECT_ENTRY_ARRAY ,
offsetof ( Object , entry_array . items ) + n * sizeof ( uint64_t ) ,
& o , & q ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-09-13 17:06:04 +02:00
r = journal_file_hmac_put_object ( f , OBJECT_ENTRY_ARRAY , o , q ) ;
2012-08-13 20:57:38 +02:00
if ( r < 0 )
return r ;
2012-08-20 16:51:46 +02:00
# endif
2012-08-13 20:57:38 +02:00
2011-11-08 18:20:03 +01:00
o - > entry_array . items [ i ] = htole64 ( p ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( ap = = 0 )
2012-03-01 18:00:01 +01:00
* first = htole64 ( q ) ;
2011-10-08 02:20:44 +02:00
else {
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_ENTRY_ARRAY , ap , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
o - > entry_array . next_entry_array_offset = htole64 ( q ) ;
}
2011-10-08 02:20:44 +02:00
2012-08-16 20:51:24 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_entry_arrays ) )
f - > header - > n_entry_arrays = htole64 ( le64toh ( f - > header - > n_entry_arrays ) + 1 ) ;
2011-11-08 18:20:03 +01:00
* idx = htole64 ( hidx + 1 ) ;
return 0 ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
static int link_entry_into_array_plus_one ( JournalFile * f ,
2012-03-16 11:59:04 +01:00
le64_t * extra ,
le64_t * first ,
le64_t * idx ,
2011-11-08 18:20:03 +01:00
uint64_t p ) {
int r ;
assert ( f ) ;
assert ( extra ) ;
assert ( first ) ;
assert ( idx ) ;
assert ( p > 0 ) ;
if ( * idx = = 0 )
* extra = htole64 ( p ) ;
else {
2012-03-16 11:59:04 +01:00
le64_t i ;
2011-11-08 18:20:03 +01:00
2012-03-01 18:00:01 +01:00
i = htole64 ( le64toh ( * idx ) - 1 ) ;
2011-11-08 18:20:03 +01:00
r = link_entry_into_array ( f , first , & i , p ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
}
2011-11-08 18:20:03 +01:00
* idx = htole64 ( le64toh ( * idx ) + 1 ) ;
return 0 ;
}
static int journal_file_link_entry_item ( JournalFile * f , Object * o , uint64_t offset , uint64_t i ) {
uint64_t p ;
int r ;
assert ( f ) ;
assert ( o ) ;
assert ( offset > 0 ) ;
p = le64toh ( o - > entry . items [ i ] . object_offset ) ;
if ( p = = 0 )
return - EINVAL ;
r = journal_file_move_to_object ( f , OBJECT_DATA , p , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
return link_entry_into_array_plus_one ( f ,
& o - > data . entry_offset ,
& o - > data . entry_array_offset ,
& o - > data . n_entries ,
offset ) ;
2011-10-08 02:20:44 +02:00
}
static int journal_file_link_entry ( JournalFile * f , Object * o , uint64_t offset ) {
2011-11-08 18:20:03 +01:00
uint64_t n , i ;
2011-10-08 02:20:44 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
assert ( o ) ;
assert ( offset > 0 ) ;
2012-09-13 17:10:46 +02:00
if ( o - > object . type ! = OBJECT_ENTRY )
return - EINVAL ;
2011-10-08 02:20:44 +02:00
2012-01-04 02:15:45 +01:00
__sync_synchronize ( ) ;
2011-10-08 02:20:44 +02:00
/* Link up the entry itself */
2011-11-08 18:20:03 +01:00
r = link_entry_into_array ( f ,
& f - > header - > entry_array_offset ,
& f - > header - > n_entries ,
offset ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
2013-06-06 00:44:16 +02:00
/* log_debug("=> %s seqnr=%"PRIu64" n_entries=%"PRIu64, f->path, o->entry.seqnum, f->header->n_entries); */
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( f - > header - > head_entry_realtime = = 0 )
2011-10-13 05:19:35 +02:00
f - > header - > head_entry_realtime = o - > entry . realtime ;
2011-10-08 02:20:44 +02:00
2011-10-13 05:19:35 +02:00
f - > header - > tail_entry_realtime = o - > entry . realtime ;
2011-11-08 18:20:03 +01:00
f - > header - > tail_entry_monotonic = o - > entry . monotonic ;
f - > tail_entry_monotonic_valid = true ;
2011-10-08 02:20:44 +02:00
/* Link up the items */
n = journal_file_entry_n_items ( o ) ;
for ( i = 0 ; i < n ; i + + ) {
r = journal_file_link_entry_item ( f , o , offset , i ) ;
if ( r < 0 )
return r ;
}
return 0 ;
}
static int journal_file_append_entry_internal (
JournalFile * f ,
const dual_timestamp * ts ,
uint64_t xor_hash ,
const EntryItem items [ ] , unsigned n_items ,
2011-11-08 18:20:03 +01:00
uint64_t * seqnum ,
2011-10-08 02:20:44 +02:00
Object * * ret , uint64_t * offset ) {
uint64_t np ;
uint64_t osize ;
Object * o ;
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
assert ( items | | n_items = = 0 ) ;
2011-11-08 18:20:03 +01:00
assert ( ts ) ;
2011-10-08 02:20:44 +02:00
osize = offsetof ( Object , entry . items ) + ( n_items * sizeof ( EntryItem ) ) ;
2011-11-08 18:20:03 +01:00
r = journal_file_append_object ( f , OBJECT_ENTRY , osize , & o , & np ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2012-08-13 21:52:58 +02:00
o - > entry . seqnum = htole64 ( journal_file_entry_seqnum ( f , seqnum ) ) ;
2016-02-02 03:57:41 +01:00
memcpy_safe ( o - > entry . items , items , n_items * sizeof ( EntryItem ) ) ;
2011-11-08 18:20:03 +01:00
o - > entry . realtime = htole64 ( ts - > realtime ) ;
o - > entry . monotonic = htole64 ( ts - > monotonic ) ;
2011-10-08 02:20:44 +02:00
o - > entry . xor_hash = htole64 ( xor_hash ) ;
o - > entry . boot_id = f - > header - > boot_id ;
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-09-13 17:06:04 +02:00
r = journal_file_hmac_put_object ( f , OBJECT_ENTRY , o , np ) ;
2012-08-13 20:57:38 +02:00
if ( r < 0 )
return r ;
2012-08-20 16:51:46 +02:00
# endif
2012-08-13 20:57:38 +02:00
2011-10-08 02:20:44 +02:00
r = journal_file_link_entry ( f , o , np ) ;
if ( r < 0 )
return r ;
if ( ret )
* ret = o ;
if ( offset )
* offset = np ;
return 0 ;
}
2011-12-29 15:00:57 +01:00
void journal_file_post_change ( JournalFile * f ) {
2011-12-19 22:35:46 +01:00
assert ( f ) ;
/* inotify() does not receive IN_MODIFY events from file
* accesses done via mmap ( ) . After each access we hence
* trigger IN_MODIFY by truncating the journal file to its
* current size which triggers IN_MODIFY . */
2011-12-20 02:38:36 +01:00
__sync_synchronize ( ) ;
2011-12-19 22:35:46 +01:00
if ( ftruncate ( f - > fd , f - > last_stat . st_size ) < 0 )
2016-01-26 14:06:35 +01:00
log_debug_errno ( errno , " Failed to truncate file to its own size: %m " ) ;
2011-12-19 22:35:46 +01:00
}
2015-12-11 07:42:22 +01:00
static int post_change_thunk ( sd_event_source * timer , uint64_t usec , void * userdata ) {
assert ( userdata ) ;
journal_file_post_change ( userdata ) ;
return 1 ;
}
static void schedule_post_change ( JournalFile * f ) {
sd_event_source * timer ;
int enabled , r ;
uint64_t now ;
assert ( f ) ;
assert ( f - > post_change_timer ) ;
timer = f - > post_change_timer ;
r = sd_event_source_get_enabled ( timer , & enabled ) ;
if ( r < 0 ) {
2016-01-26 14:06:35 +01:00
log_debug_errno ( r , " Failed to get ftruncate timer state: %m " ) ;
goto fail ;
2015-12-11 07:42:22 +01:00
}
if ( enabled = = SD_EVENT_ONESHOT )
return ;
r = sd_event_now ( sd_event_source_get_event ( timer ) , CLOCK_MONOTONIC , & now ) ;
if ( r < 0 ) {
2016-01-26 14:06:35 +01:00
log_debug_errno ( r , " Failed to get clock's now for scheduling ftruncate: %m " ) ;
goto fail ;
2015-12-11 07:42:22 +01:00
}
r = sd_event_source_set_time ( timer , now + f - > post_change_timer_period ) ;
if ( r < 0 ) {
2016-01-26 14:06:35 +01:00
log_debug_errno ( r , " Failed to set time for scheduling ftruncate: %m " ) ;
goto fail ;
2015-12-11 07:42:22 +01:00
}
r = sd_event_source_set_enabled ( timer , SD_EVENT_ONESHOT ) ;
if ( r < 0 ) {
2016-01-26 14:06:35 +01:00
log_debug_errno ( r , " Failed to enable scheduled ftruncate: %m " ) ;
goto fail ;
2015-12-11 07:42:22 +01:00
}
2016-01-26 14:06:35 +01:00
return ;
fail :
/* On failure, let's simply post the change immediately. */
journal_file_post_change ( f ) ;
2015-12-11 07:42:22 +01:00
}
/* Enable coalesced change posting in a timer on the provided sd_event instance */
int journal_file_enable_post_change_timer ( JournalFile * f , sd_event * e , usec_t t ) {
_cleanup_ ( sd_event_source_unrefp ) sd_event_source * timer = NULL ;
int r ;
assert ( f ) ;
assert_return ( ! f - > post_change_timer , - EINVAL ) ;
assert ( e ) ;
assert ( t ) ;
r = sd_event_add_time ( e , & timer , CLOCK_MONOTONIC , 0 , 0 , post_change_thunk , f ) ;
if ( r < 0 )
return r ;
r = sd_event_source_set_enabled ( timer , SD_EVENT_OFF ) ;
if ( r < 0 )
return r ;
f - > post_change_timer = timer ;
timer = NULL ;
f - > post_change_timer_period = t ;
return r ;
}
2012-10-16 21:40:48 +02:00
static int entry_item_cmp ( const void * _a , const void * _b ) {
const EntryItem * a = _a , * b = _b ;
if ( le64toh ( a - > object_offset ) < le64toh ( b - > object_offset ) )
return - 1 ;
if ( le64toh ( a - > object_offset ) > le64toh ( b - > object_offset ) )
return 1 ;
return 0 ;
}
2011-11-08 18:20:03 +01:00
int journal_file_append_entry ( JournalFile * f , const dual_timestamp * ts , const struct iovec iovec [ ] , unsigned n_iovec , uint64_t * seqnum , Object * * ret , uint64_t * offset ) {
2011-10-08 02:20:44 +02:00
unsigned i ;
EntryItem * items ;
int r ;
uint64_t xor_hash = 0 ;
2011-11-08 18:20:03 +01:00
struct dual_timestamp _ts ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-10-08 02:20:44 +02:00
assert ( iovec | | n_iovec = = 0 ) ;
2011-11-08 18:20:03 +01:00
if ( ! ts ) {
dual_timestamp_get ( & _ts ) ;
ts = & _ts ;
}
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-13 20:31:10 +02:00
r = journal_file_maybe_append_tag ( f , ts - > realtime ) ;
if ( r < 0 )
return r ;
2012-08-20 16:51:46 +02:00
# endif
2012-08-13 20:31:10 +02:00
2012-08-08 23:54:21 +02:00
/* alloca() can't take 0, hence let's allocate at least one */
2013-04-01 08:08:05 +02:00
items = alloca ( sizeof ( EntryItem ) * MAX ( 1u , n_iovec ) ) ;
2011-10-08 02:20:44 +02:00
for ( i = 0 ; i < n_iovec ; i + + ) {
uint64_t p ;
Object * o ;
r = journal_file_append_data ( f , iovec [ i ] . iov_base , iovec [ i ] . iov_len , & o , & p ) ;
if ( r < 0 )
2011-12-29 15:00:57 +01:00
return r ;
2011-10-08 02:20:44 +02:00
xor_hash ^ = le64toh ( o - > data . hash ) ;
items [ i ] . object_offset = htole64 ( p ) ;
2011-10-15 01:13:37 +02:00
items [ i ] . hash = o - > data . hash ;
2011-10-08 02:20:44 +02:00
}
2012-10-16 21:40:48 +02:00
/* Order by the position on disk, in order to improve seek
* times for rotating media . */
2013-10-12 01:33:13 +02:00
qsort_safe ( items , n_iovec , sizeof ( EntryItem ) , entry_item_cmp ) ;
2012-10-16 21:40:48 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_append_entry_internal ( f , ts , xor_hash , items , n_iovec , seqnum , ret , offset ) ;
2011-10-08 02:20:44 +02:00
2014-12-30 20:57:53 +01:00
/* If the memory mapping triggered a SIGBUS then we return an
* IO error and ignore the error code passed down to us , since
* it is very likely just an effect of a nullified replacement
* mapping page */
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) )
2014-12-30 20:57:53 +01:00
r = - EIO ;
2015-12-11 07:42:22 +01:00
if ( f - > post_change_timer )
schedule_post_change ( f ) ;
else
journal_file_post_change ( f ) ;
2011-12-19 22:35:46 +01:00
2011-10-08 02:20:44 +02:00
return r ;
}
2012-10-26 03:24:03 +02:00
typedef struct ChainCacheItem {
2014-02-27 06:07:29 +01:00
uint64_t first ; /* the array at the beginning of the chain */
2012-10-26 03:24:03 +02:00
uint64_t array ; /* the cached array */
uint64_t begin ; /* the first item in the cached array */
uint64_t total ; /* the total number of items in all arrays before this one in the chain */
2013-11-26 20:37:53 +01:00
uint64_t last_index ; /* the last index we looked at, to optimize locality when bisecting */
2012-10-26 03:24:03 +02:00
} ChainCacheItem ;
static void chain_cache_put (
2014-10-14 17:58:13 +02:00
OrderedHashmap * h ,
2012-10-26 03:24:03 +02:00
ChainCacheItem * ci ,
uint64_t first ,
uint64_t array ,
uint64_t begin ,
2013-11-26 20:37:53 +01:00
uint64_t total ,
uint64_t last_index ) {
2012-10-26 03:24:03 +02:00
if ( ! ci ) {
2012-10-26 20:25:36 +02:00
/* If the chain item to cache for this chain is the
* first one it ' s not worth caching anything */
if ( array = = first )
return ;
2014-10-27 23:50:51 +01:00
if ( ordered_hashmap_size ( h ) > = CHAIN_CACHE_MAX ) {
2014-10-14 17:58:13 +02:00
ci = ordered_hashmap_steal_first ( h ) ;
2014-10-27 23:50:51 +01:00
assert ( ci ) ;
} else {
2012-10-26 03:24:03 +02:00
ci = new ( ChainCacheItem , 1 ) ;
if ( ! ci )
return ;
}
ci - > first = first ;
2014-10-14 17:58:13 +02:00
if ( ordered_hashmap_put ( h , & ci - > first , ci ) < 0 ) {
2012-10-26 03:24:03 +02:00
free ( ci ) ;
return ;
}
} else
assert ( ci - > first = = first ) ;
ci - > array = array ;
ci - > begin = begin ;
ci - > total = total ;
2013-11-26 20:37:53 +01:00
ci - > last_index = last_index ;
2012-10-26 03:24:03 +02:00
}
2013-11-26 20:37:53 +01:00
static int generic_array_get (
JournalFile * f ,
uint64_t first ,
uint64_t i ,
Object * * ret , uint64_t * offset ) {
2011-11-08 18:20:03 +01:00
2011-10-08 02:20:44 +02:00
Object * o ;
2012-10-26 03:24:03 +02:00
uint64_t p = 0 , a , t = 0 ;
2011-10-08 02:20:44 +02:00
int r ;
2012-10-26 03:24:03 +02:00
ChainCacheItem * ci ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
a = first ;
2012-10-26 03:24:03 +02:00
/* Try the chain cache first */
2014-10-14 17:58:13 +02:00
ci = ordered_hashmap_get ( f - > chain_cache , & first ) ;
2012-10-26 03:24:03 +02:00
if ( ci & & i > ci - > total ) {
a = ci - > array ;
i - = ci - > total ;
t = ci - > total ;
}
2011-11-08 18:20:03 +01:00
while ( a > 0 ) {
2012-10-26 03:24:03 +02:00
uint64_t k ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_ENTRY_ARRAY , a , & o ) ;
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
2012-10-26 03:24:03 +02:00
k = journal_file_entry_array_n_items ( o ) ;
if ( i < k ) {
2011-11-08 18:20:03 +01:00
p = le64toh ( o - > entry_array . items [ i ] ) ;
2012-10-26 03:24:03 +02:00
goto found ;
2011-10-08 02:20:44 +02:00
}
2012-10-26 03:24:03 +02:00
i - = k ;
t + = k ;
2011-11-08 18:20:03 +01:00
a = le64toh ( o - > entry_array . next_entry_array_offset ) ;
}
2012-10-26 03:24:03 +02:00
return 0 ;
found :
/* Let's cache this item for the next invocation */
2013-12-31 21:37:32 +01:00
chain_cache_put ( f - > chain_cache , ci , first , a , le64toh ( o - > entry_array . items [ 0 ] ) , t , i ) ;
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_ENTRY , p , & o ) ;
if ( r < 0 )
return r ;
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
return 1 ;
}
2013-11-26 20:37:53 +01:00
static int generic_array_get_plus_one (
JournalFile * f ,
uint64_t extra ,
uint64_t first ,
uint64_t i ,
Object * * ret , uint64_t * offset ) {
2011-11-08 18:20:03 +01:00
Object * o ;
assert ( f ) ;
if ( i = = 0 ) {
int r ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY , extra , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
if ( ret )
* ret = o ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( offset )
* offset = extra ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
return 1 ;
2011-10-08 02:20:44 +02:00
}
2011-11-08 18:20:03 +01:00
return generic_array_get ( f , first , i - 1 , ret , offset ) ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
enum {
TEST_FOUND ,
TEST_LEFT ,
TEST_RIGHT
} ;
2011-10-08 02:20:44 +02:00
2013-11-26 20:37:53 +01:00
static int generic_array_bisect (
JournalFile * f ,
uint64_t first ,
uint64_t n ,
uint64_t needle ,
int ( * test_object ) ( JournalFile * f , uint64_t p , uint64_t needle ) ,
direction_t direction ,
Object * * ret ,
uint64_t * offset ,
uint64_t * idx ) {
uint64_t a , p , t = 0 , i = 0 , last_p = 0 , last_index = ( uint64_t ) - 1 ;
2011-11-08 18:20:03 +01:00
bool subtract_one = false ;
Object * o , * array = NULL ;
int r ;
2012-10-26 03:24:03 +02:00
ChainCacheItem * ci ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
assert ( f ) ;
assert ( test_object ) ;
2011-10-08 02:20:44 +02:00
2012-10-26 03:24:03 +02:00
/* Start with the first array in the chain */
2011-11-08 18:20:03 +01:00
a = first ;
2012-10-26 03:24:03 +02:00
2014-10-14 17:58:13 +02:00
ci = ordered_hashmap_get ( f - > chain_cache , & first ) ;
2012-10-26 03:24:03 +02:00
if ( ci & & n > ci - > total ) {
/* Ah, we have iterated this bisection array chain
* previously ! Let ' s see if we can skip ahead in the
* chain , as far as the last time . But we can ' t jump
* backwards in the chain , so let ' s check that
* first . */
r = test_object ( f , ci - > begin , needle ) ;
if ( r < 0 )
return r ;
if ( r = = TEST_LEFT ) {
2013-11-26 20:37:53 +01:00
/* OK, what we are looking for is right of the
2012-10-26 03:24:03 +02:00
* begin of this EntryArray , so let ' s jump
* straight to previously cached array in the
* chain */
a = ci - > array ;
n - = ci - > total ;
t = ci - > total ;
2013-11-26 20:37:53 +01:00
last_index = ci - > last_index ;
2012-10-26 03:24:03 +02:00
}
}
2011-11-08 18:20:03 +01:00
while ( a > 0 ) {
uint64_t left , right , k , lp ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY_ARRAY , a , & array ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
k = journal_file_entry_array_n_items ( array ) ;
right = MIN ( k , n ) ;
if ( right < = 0 )
return 0 ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
i = right - 1 ;
lp = p = le64toh ( array - > entry_array . items [ i ] ) ;
if ( p < = 0 )
2016-04-26 11:39:48 +02:00
r = - EBADMSG ;
else
r = test_object ( f , p , needle ) ;
if ( r = = - EBADMSG ) {
log_debug_errno ( r , " Encountered invalid entry while bisecting, cutting algorithm short. (1) " ) ;
n = i ;
continue ;
}
2011-11-08 18:20:03 +01:00
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( r = = TEST_FOUND )
r = direction = = DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT ;
if ( r = = TEST_RIGHT ) {
left = 0 ;
right - = 1 ;
2013-11-26 20:37:53 +01:00
if ( last_index ! = ( uint64_t ) - 1 ) {
assert ( last_index < = right ) ;
/* If we cached the last index we
* looked at , let ' s try to not to jump
* too wildly around and see if we can
* limit the range to look at early to
* the immediate neighbors of the last
* index we looked at . */
if ( last_index > 0 ) {
uint64_t x = last_index - 1 ;
p = le64toh ( array - > entry_array . items [ x ] ) ;
if ( p < = 0 )
return - EBADMSG ;
r = test_object ( f , p , needle ) ;
if ( r < 0 )
return r ;
if ( r = = TEST_FOUND )
r = direction = = DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT ;
if ( r = = TEST_RIGHT )
right = x ;
else
left = x + 1 ;
}
if ( last_index < right ) {
uint64_t y = last_index + 1 ;
p = le64toh ( array - > entry_array . items [ y ] ) ;
if ( p < = 0 )
return - EBADMSG ;
r = test_object ( f , p , needle ) ;
if ( r < 0 )
return r ;
if ( r = = TEST_FOUND )
r = direction = = DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT ;
if ( r = = TEST_RIGHT )
right = y ;
else
left = y + 1 ;
}
}
2011-11-08 18:20:03 +01:00
for ( ; ; ) {
if ( left = = right ) {
if ( direction = = DIRECTION_UP )
subtract_one = true ;
i = left ;
goto found ;
}
assert ( left < right ) ;
i = ( left + right ) / 2 ;
2013-11-26 20:37:53 +01:00
2011-11-08 18:20:03 +01:00
p = le64toh ( array - > entry_array . items [ i ] ) ;
if ( p < = 0 )
2016-04-26 11:39:48 +02:00
r = - EBADMSG ;
else
r = test_object ( f , p , needle ) ;
if ( r = = - EBADMSG ) {
log_debug_errno ( r , " Encountered invalid entry while bisecting, cutting algorithm short. (2) " ) ;
right = n = i ;
continue ;
}
2011-11-08 18:20:03 +01:00
if ( r < 0 )
return r ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( r = = TEST_FOUND )
r = direction = = DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT ;
if ( r = = TEST_RIGHT )
right = i ;
else
left = i + 1 ;
}
}
2014-12-05 16:06:45 +01:00
if ( k > = n ) {
2012-07-13 00:29:26 +02:00
if ( direction = = DIRECTION_UP ) {
i = n ;
subtract_one = true ;
goto found ;
}
2011-10-08 02:20:44 +02:00
return 0 ;
2012-07-13 00:29:26 +02:00
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
last_p = lp ;
n - = k ;
t + = k ;
2013-11-26 20:37:53 +01:00
last_index = ( uint64_t ) - 1 ;
2011-11-08 18:20:03 +01:00
a = le64toh ( array - > entry_array . next_entry_array_offset ) ;
2011-10-08 02:20:44 +02:00
}
return 0 ;
2011-11-08 18:20:03 +01:00
found :
if ( subtract_one & & t = = 0 & & i = = 0 )
return 0 ;
2012-10-26 03:24:03 +02:00
/* Let's cache this item for the next invocation */
2013-12-31 21:37:32 +01:00
chain_cache_put ( f - > chain_cache , ci , first , a , le64toh ( array - > entry_array . items [ 0 ] ) , t , subtract_one ? ( i > 0 ? i - 1 : ( uint64_t ) - 1 ) : i ) ;
2012-10-26 03:24:03 +02:00
2011-11-08 18:20:03 +01:00
if ( subtract_one & & i = = 0 )
p = last_p ;
else if ( subtract_one )
p = le64toh ( array - > entry_array . items [ i - 1 ] ) ;
else
p = le64toh ( array - > entry_array . items [ i ] ) ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY , p , & o ) ;
if ( r < 0 )
return r ;
if ( ret )
* ret = o ;
if ( offset )
* offset = p ;
if ( idx )
2012-07-13 00:29:26 +02:00
* idx = t + i + ( subtract_one ? - 1 : 0 ) ;
2011-11-08 18:20:03 +01:00
return 1 ;
2011-10-08 02:20:44 +02:00
}
2013-11-26 20:37:53 +01:00
static int generic_array_bisect_plus_one (
JournalFile * f ,
uint64_t extra ,
uint64_t first ,
uint64_t n ,
uint64_t needle ,
int ( * test_object ) ( JournalFile * f , uint64_t p , uint64_t needle ) ,
direction_t direction ,
Object * * ret ,
uint64_t * offset ,
uint64_t * idx ) {
2011-11-08 18:20:03 +01:00
2011-10-08 02:20:44 +02:00
int r ;
2012-07-13 00:29:26 +02:00
bool step_back = false ;
Object * o ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
assert ( test_object ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( n < = 0 )
return 0 ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
/* This bisects the array in object 'first', but first checks
* an extra */
r = test_object ( f , extra , needle ) ;
if ( r < 0 )
return r ;
2012-07-12 17:36:04 +02:00
if ( r = = TEST_FOUND )
r = direction = = DIRECTION_DOWN ? TEST_RIGHT : TEST_LEFT ;
2012-07-13 00:29:26 +02:00
/* if we are looking with DIRECTION_UP then we need to first
see if in the actual array there is a matching entry , and
return the last one of that . But if there isn ' t any we need
to return this one . Hence remember this , and return it
below . */
if ( r = = TEST_LEFT )
step_back = direction = = DIRECTION_UP ;
2011-11-08 18:20:03 +01:00
2012-07-13 00:29:26 +02:00
if ( r = = TEST_RIGHT ) {
if ( direction = = DIRECTION_DOWN )
goto found ;
else
return 0 ;
2012-07-12 17:36:04 +02:00
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = generic_array_bisect ( f , first , n - 1 , needle , test_object , direction , ret , offset , idx ) ;
2012-07-13 00:29:26 +02:00
if ( r = = 0 & & step_back )
goto found ;
2012-07-12 17:36:24 +02:00
if ( r > 0 & & idx )
2016-02-23 05:32:04 +01:00
( * idx ) + + ;
2011-11-08 18:20:03 +01:00
return r ;
2012-07-13 00:29:26 +02:00
found :
r = journal_file_move_to_object ( f , OBJECT_ENTRY , extra , & o ) ;
if ( r < 0 )
return r ;
if ( ret )
* ret = o ;
if ( offset )
* offset = extra ;
if ( idx )
* idx = 0 ;
return 1 ;
}
2013-05-03 04:51:50 +02:00
_pure_ static int test_object_offset ( JournalFile * f , uint64_t p , uint64_t needle ) {
2012-07-13 00:29:26 +02:00
assert ( f ) ;
assert ( p > 0 ) ;
if ( p = = needle )
return TEST_FOUND ;
else if ( p < needle )
return TEST_LEFT ;
else
return TEST_RIGHT ;
}
2011-11-08 18:20:03 +01:00
static int test_object_seqnum ( JournalFile * f , uint64_t p , uint64_t needle ) {
Object * o ;
int r ;
assert ( f ) ;
assert ( p > 0 ) ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY , p , & o ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
return r ;
2011-11-08 18:20:03 +01:00
if ( le64toh ( o - > entry . seqnum ) = = needle )
return TEST_FOUND ;
else if ( le64toh ( o - > entry . seqnum ) < needle )
return TEST_LEFT ;
else
return TEST_RIGHT ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
int journal_file_move_to_entry_by_seqnum (
JournalFile * f ,
uint64_t seqnum ,
direction_t direction ,
Object * * ret ,
uint64_t * offset ) {
2016-02-05 12:25:03 +01:00
assert ( f ) ;
assert ( f - > header ) ;
2011-11-08 18:20:03 +01:00
return generic_array_bisect ( f ,
le64toh ( f - > header - > entry_array_offset ) ,
le64toh ( f - > header - > n_entries ) ,
seqnum ,
test_object_seqnum ,
direction ,
ret , offset , NULL ) ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
static int test_object_realtime ( JournalFile * f , uint64_t p , uint64_t needle ) {
Object * o ;
int r ;
assert ( f ) ;
assert ( p > 0 ) ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY , p , & o ) ;
if ( r < 0 )
return r ;
if ( le64toh ( o - > entry . realtime ) = = needle )
return TEST_FOUND ;
else if ( le64toh ( o - > entry . realtime ) < needle )
return TEST_LEFT ;
else
return TEST_RIGHT ;
2011-10-08 02:20:44 +02:00
}
2011-11-08 18:20:03 +01:00
int journal_file_move_to_entry_by_realtime (
JournalFile * f ,
uint64_t realtime ,
direction_t direction ,
Object * * ret ,
uint64_t * offset ) {
2016-02-05 12:25:03 +01:00
assert ( f ) ;
assert ( f - > header ) ;
2011-11-08 18:20:03 +01:00
return generic_array_bisect ( f ,
le64toh ( f - > header - > entry_array_offset ) ,
le64toh ( f - > header - > n_entries ) ,
realtime ,
test_object_realtime ,
direction ,
ret , offset , NULL ) ;
}
static int test_object_monotonic ( JournalFile * f , uint64_t p , uint64_t needle ) {
Object * o ;
int r ;
assert ( f ) ;
assert ( p > 0 ) ;
r = journal_file_move_to_object ( f , OBJECT_ENTRY , p , & o ) ;
if ( r < 0 )
return r ;
if ( le64toh ( o - > entry . monotonic ) = = needle )
return TEST_FOUND ;
else if ( le64toh ( o - > entry . monotonic ) < needle )
return TEST_LEFT ;
else
return TEST_RIGHT ;
}
2015-02-10 12:32:54 +01:00
static int find_data_object_by_boot_id (
2013-05-04 01:56:18 +02:00
JournalFile * f ,
sd_id128_t boot_id ,
Object * * o ,
uint64_t * b ) {
2015-02-10 12:32:54 +01:00
2017-12-14 19:02:29 +01:00
char t [ STRLEN ( " _BOOT_ID= " ) + 32 + 1 ] = " _BOOT_ID= " ;
2013-05-04 01:56:18 +02:00
sd_id128_to_string ( boot_id , t + 9 ) ;
return journal_file_find_data_object ( f , t , sizeof ( t ) - 1 , o , b ) ;
}
2011-11-08 18:20:03 +01:00
int journal_file_move_to_entry_by_monotonic (
JournalFile * f ,
sd_id128_t boot_id ,
uint64_t monotonic ,
direction_t direction ,
Object * * ret ,
uint64_t * offset ) {
Object * o ;
int r ;
2012-07-13 00:29:26 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
2013-05-04 01:56:18 +02:00
r = find_data_object_by_boot_id ( f , boot_id , & o , NULL ) ;
2011-11-08 18:20:03 +01:00
if ( r < 0 )
return r ;
2012-07-13 00:29:26 +02:00
if ( r = = 0 )
2011-11-08 18:20:03 +01:00
return - ENOENT ;
return generic_array_bisect_plus_one ( f ,
le64toh ( o - > data . entry_offset ) ,
le64toh ( o - > data . entry_array_offset ) ,
le64toh ( o - > data . n_entries ) ,
monotonic ,
test_object_monotonic ,
direction ,
ret , offset , NULL ) ;
}
2014-12-16 20:54:56 +01:00
void journal_file_reset_location ( JournalFile * f ) {
2014-12-16 21:03:36 +01:00
f - > location_type = LOCATION_HEAD ;
2014-12-16 20:54:56 +01:00
f - > current_offset = 0 ;
2014-12-16 21:03:36 +01:00
f - > current_seqnum = 0 ;
f - > current_realtime = 0 ;
f - > current_monotonic = 0 ;
zero ( f - > current_boot_id ) ;
f - > current_xor_hash = 0 ;
}
2015-02-24 19:45:17 +01:00
void journal_file_save_location ( JournalFile * f , Object * o , uint64_t offset ) {
2014-12-16 21:03:36 +01:00
f - > location_type = LOCATION_SEEK ;
f - > current_offset = offset ;
f - > current_seqnum = le64toh ( o - > entry . seqnum ) ;
f - > current_realtime = le64toh ( o - > entry . realtime ) ;
f - > current_monotonic = le64toh ( o - > entry . monotonic ) ;
f - > current_boot_id = o - > entry . boot_id ;
f - > current_xor_hash = le64toh ( o - > entry . xor_hash ) ;
2014-12-16 20:54:56 +01:00
}
2014-12-17 14:06:28 +01:00
int journal_file_compare_locations ( JournalFile * af , JournalFile * bf ) {
assert ( af ) ;
2016-02-05 12:25:03 +01:00
assert ( af - > header ) ;
2014-12-17 14:06:28 +01:00
assert ( bf ) ;
2016-02-05 12:25:03 +01:00
assert ( bf - > header ) ;
2014-12-17 14:06:28 +01:00
assert ( af - > location_type = = LOCATION_SEEK ) ;
assert ( bf - > location_type = = LOCATION_SEEK ) ;
/* If contents and timestamps match, these entries are
* identical , even if the seqnum does not match */
if ( sd_id128_equal ( af - > current_boot_id , bf - > current_boot_id ) & &
af - > current_monotonic = = bf - > current_monotonic & &
af - > current_realtime = = bf - > current_realtime & &
af - > current_xor_hash = = bf - > current_xor_hash )
return 0 ;
if ( sd_id128_equal ( af - > header - > seqnum_id , bf - > header - > seqnum_id ) ) {
/* If this is from the same seqnum source, compare
* seqnums */
if ( af - > current_seqnum < bf - > current_seqnum )
return - 1 ;
if ( af - > current_seqnum > bf - > current_seqnum )
return 1 ;
/* Wow! This is weird, different data but the same
* seqnums ? Something is borked , but let ' s make the
* best of it and compare by time . */
}
if ( sd_id128_equal ( af - > current_boot_id , bf - > current_boot_id ) ) {
/* If the boot id matches, compare monotonic time */
if ( af - > current_monotonic < bf - > current_monotonic )
return - 1 ;
if ( af - > current_monotonic > bf - > current_monotonic )
return 1 ;
}
/* Otherwise, compare UTC time */
if ( af - > current_realtime < bf - > current_realtime )
return - 1 ;
if ( af - > current_realtime > bf - > current_realtime )
return 1 ;
/* Finally, compare by contents */
if ( af - > current_xor_hash < bf - > current_xor_hash )
return - 1 ;
if ( af - > current_xor_hash > bf - > current_xor_hash )
return 1 ;
return 0 ;
}
2016-10-12 11:54:36 +02:00
static int bump_array_index ( uint64_t * i , direction_t direction , uint64_t n ) {
/* Increase or decrease the specified index, in the right direction. */
if ( direction = = DIRECTION_DOWN ) {
if ( * i > = n - 1 )
return 0 ;
( * i ) + + ;
} else {
if ( * i < = 0 )
return 0 ;
( * i ) - - ;
}
return 1 ;
}
2016-10-12 12:12:05 +02:00
static bool check_properly_ordered ( uint64_t new_offset , uint64_t old_offset , direction_t direction ) {
/* Consider it an error if any of the two offsets is uninitialized */
if ( old_offset = = 0 | | new_offset = = 0 )
return false ;
/* If we go down, the new offset must be larger than the old one. */
return direction = = DIRECTION_DOWN ?
new_offset > old_offset :
new_offset < old_offset ;
}
2011-11-08 18:20:03 +01:00
int journal_file_next_entry (
JournalFile * f ,
2014-12-17 15:45:10 +01:00
uint64_t p ,
2011-11-08 18:20:03 +01:00
direction_t direction ,
Object * * ret , uint64_t * offset ) {
2014-02-27 06:07:29 +01:00
uint64_t i , n , ofs ;
2011-10-08 02:20:44 +02:00
int r ;
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2011-11-08 18:20:03 +01:00
n = le64toh ( f - > header - > n_entries ) ;
if ( n < = 0 )
return 0 ;
2011-10-08 02:20:44 +02:00
2014-12-17 15:45:10 +01:00
if ( p = = 0 )
2011-11-08 18:20:03 +01:00
i = direction = = DIRECTION_DOWN ? 0 : n - 1 ;
2011-10-08 02:20:44 +02:00
else {
2011-11-08 18:20:03 +01:00
r = generic_array_bisect ( f ,
le64toh ( f - > header - > entry_array_offset ) ,
le64toh ( f - > header - > n_entries ) ,
p ,
test_object_offset ,
DIRECTION_DOWN ,
NULL , NULL ,
& i ) ;
if ( r < = 0 )
return r ;
2016-10-12 11:54:36 +02:00
r = bump_array_index ( & i , direction , n ) ;
if ( r < = 0 )
return r ;
2011-10-08 02:20:44 +02:00
}
2011-11-08 18:20:03 +01:00
/* And jump to it */
2016-10-12 12:36:01 +02:00
for ( ; ; ) {
r = generic_array_get ( f ,
le64toh ( f - > header - > entry_array_offset ) ,
i ,
ret , & ofs ) ;
if ( r > 0 )
break ;
if ( r ! = - EBADMSG )
return r ;
/* OK, so this entry is borked. Most likely some entry didn't get synced to disk properly, let's see if
* the next one might work for us instead . */
log_debug_errno ( r , " Entry item % " PRIu64 " is bad, skipping over it. " , i ) ;
r = bump_array_index ( & i , direction , n ) ;
if ( r < = 0 )
return r ;
2016-04-26 11:38:39 +02:00
}
2014-02-27 06:07:29 +01:00
2016-10-12 12:12:05 +02:00
/* Ensure our array is properly ordered. */
if ( p > 0 & & ! check_properly_ordered ( ofs , p , direction ) ) {
log_debug ( " %s: entry array not properly ordered at entry % " PRIu64 , f - > path , i ) ;
2014-02-27 06:07:29 +01:00
return - EBADMSG ;
}
if ( offset )
* offset = ofs ;
return 1 ;
2011-11-08 18:20:03 +01:00
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
int journal_file_next_entry_for_data (
JournalFile * f ,
Object * o , uint64_t p ,
uint64_t data_offset ,
direction_t direction ,
Object * * ret , uint64_t * offset ) {
2016-10-12 12:12:58 +02:00
uint64_t i , n , ofs ;
2011-11-08 18:20:03 +01:00
Object * d ;
2016-10-12 12:36:01 +02:00
int r ;
2011-10-08 02:20:44 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
assert ( p > 0 | | ! o ) ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_DATA , data_offset , & d ) ;
2011-12-20 00:38:14 +01:00
if ( r < 0 )
2011-11-08 18:20:03 +01:00
return r ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
n = le64toh ( d - > data . n_entries ) ;
if ( n < = 0 )
return n ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
if ( ! o )
i = direction = = DIRECTION_DOWN ? 0 : n - 1 ;
else {
if ( o - > object . type ! = OBJECT_ENTRY )
return - EINVAL ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
r = generic_array_bisect_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
le64toh ( d - > data . n_entries ) ,
p ,
test_object_offset ,
DIRECTION_DOWN ,
NULL , NULL ,
& i ) ;
if ( r < = 0 )
2011-10-08 02:20:44 +02:00
return r ;
2016-10-12 11:54:36 +02:00
r = bump_array_index ( & i , direction , n ) ;
if ( r < = 0 )
return r ;
2011-11-08 18:20:03 +01:00
}
2011-10-08 02:20:44 +02:00
2016-10-12 12:36:01 +02:00
for ( ; ; ) {
r = generic_array_get_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
i ,
ret , & ofs ) ;
if ( r > 0 )
break ;
if ( r ! = - EBADMSG )
return r ;
log_debug_errno ( r , " Data entry item % " PRIu64 " is bad, skipping over it. " , i ) ;
r = bump_array_index ( & i , direction , n ) ;
if ( r < = 0 )
return r ;
}
2016-10-12 12:12:58 +02:00
/* Ensure our array is properly ordered. */
if ( p > 0 & & check_properly_ordered ( ofs , p , direction ) ) {
log_debug ( " %s data entry array not properly ordered at entry % " PRIu64 , f - > path , i ) ;
return - EBADMSG ;
}
if ( offset )
* offset = ofs ;
return 1 ;
2011-11-08 18:20:03 +01:00
}
2011-10-08 02:20:44 +02:00
2012-07-13 00:29:26 +02:00
int journal_file_move_to_entry_by_offset_for_data (
JournalFile * f ,
uint64_t data_offset ,
uint64_t p ,
direction_t direction ,
Object * * ret , uint64_t * offset ) {
int r ;
Object * d ;
assert ( f ) ;
r = journal_file_move_to_object ( f , OBJECT_DATA , data_offset , & d ) ;
if ( r < 0 )
return r ;
return generic_array_bisect_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
le64toh ( d - > data . n_entries ) ,
p ,
test_object_offset ,
direction ,
ret , offset , NULL ) ;
}
int journal_file_move_to_entry_by_monotonic_for_data (
JournalFile * f ,
uint64_t data_offset ,
sd_id128_t boot_id ,
uint64_t monotonic ,
direction_t direction ,
Object * * ret , uint64_t * offset ) {
Object * o , * d ;
int r ;
uint64_t b , z ;
assert ( f ) ;
/* First, seek by time */
2013-05-04 01:56:18 +02:00
r = find_data_object_by_boot_id ( f , boot_id , & o , & b ) ;
2012-07-13 00:29:26 +02:00
if ( r < 0 )
return r ;
if ( r = = 0 )
return - ENOENT ;
r = generic_array_bisect_plus_one ( f ,
le64toh ( o - > data . entry_offset ) ,
le64toh ( o - > data . entry_array_offset ) ,
le64toh ( o - > data . n_entries ) ,
monotonic ,
test_object_monotonic ,
direction ,
NULL , & z , NULL ) ;
if ( r < = 0 )
return r ;
/* And now, continue seeking until we find an entry that
* exists in both bisection arrays */
for ( ; ; ) {
Object * qo ;
uint64_t p , q ;
r = journal_file_move_to_object ( f , OBJECT_DATA , data_offset , & d ) ;
if ( r < 0 )
return r ;
r = generic_array_bisect_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
le64toh ( d - > data . n_entries ) ,
z ,
test_object_offset ,
direction ,
NULL , & p , NULL ) ;
if ( r < = 0 )
return r ;
r = journal_file_move_to_object ( f , OBJECT_DATA , b , & o ) ;
if ( r < 0 )
return r ;
r = generic_array_bisect_plus_one ( f ,
le64toh ( o - > data . entry_offset ) ,
le64toh ( o - > data . entry_array_offset ) ,
le64toh ( o - > data . n_entries ) ,
p ,
test_object_offset ,
direction ,
& qo , & q , NULL ) ;
if ( r < = 0 )
return r ;
if ( p = = q ) {
if ( ret )
* ret = qo ;
if ( offset )
* offset = q ;
return 1 ;
}
z = q ;
}
}
2011-11-08 18:20:03 +01:00
int journal_file_move_to_entry_by_seqnum_for_data (
JournalFile * f ,
uint64_t data_offset ,
uint64_t seqnum ,
direction_t direction ,
Object * * ret , uint64_t * offset ) {
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
Object * d ;
int r ;
2011-10-08 02:20:44 +02:00
2012-07-12 17:36:51 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_DATA , data_offset , & d ) ;
2012-07-12 17:36:51 +02:00
if ( r < 0 )
2011-11-08 18:20:03 +01:00
return r ;
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
return generic_array_bisect_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
le64toh ( d - > data . n_entries ) ,
seqnum ,
test_object_seqnum ,
direction ,
ret , offset , NULL ) ;
}
2011-10-08 02:20:44 +02:00
2011-11-08 18:20:03 +01:00
int journal_file_move_to_entry_by_realtime_for_data (
JournalFile * f ,
uint64_t data_offset ,
uint64_t realtime ,
direction_t direction ,
Object * * ret , uint64_t * offset ) {
Object * d ;
int r ;
2012-07-12 17:36:51 +02:00
assert ( f ) ;
2011-11-08 18:20:03 +01:00
r = journal_file_move_to_object ( f , OBJECT_DATA , data_offset , & d ) ;
2012-07-12 17:36:51 +02:00
if ( r < 0 )
2011-11-08 18:20:03 +01:00
return r ;
return generic_array_bisect_plus_one ( f ,
le64toh ( d - > data . entry_offset ) ,
le64toh ( d - > data . entry_array_offset ) ,
le64toh ( d - > data . n_entries ) ,
realtime ,
test_object_realtime ,
direction ,
ret , offset , NULL ) ;
2011-10-08 02:20:44 +02:00
}
2012-08-16 01:51:54 +02:00
void journal_file_dump ( JournalFile * f ) {
2012-08-13 20:31:10 +02:00
Object * o ;
int r ;
2012-08-16 01:51:54 +02:00
uint64_t p ;
2012-08-13 20:31:10 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
journal_file_print_header ( f ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
p = le64toh ( f - > header - > header_size ) ;
while ( p ! = 0 ) {
2014-12-12 22:51:24 +01:00
r = journal_file_move_to_object ( f , OBJECT_UNUSED , p , & o ) ;
2012-08-16 01:51:54 +02:00
if ( r < 0 )
goto fail ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
switch ( o - > object . type ) {
2012-08-13 21:52:58 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_UNUSED :
printf ( " Type: OBJECT_UNUSED \n " ) ;
break ;
2012-08-13 21:52:58 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_DATA :
printf ( " Type: OBJECT_DATA \n " ) ;
break ;
2012-08-13 20:31:10 +02:00
2012-10-18 03:29:19 +02:00
case OBJECT_FIELD :
printf ( " Type: OBJECT_FIELD \n " ) ;
break ;
2012-08-16 01:51:54 +02:00
case OBJECT_ENTRY :
2013-06-06 00:44:16 +02:00
printf ( " Type: OBJECT_ENTRY seqnum=% " PRIu64 " monotonic=% " PRIu64 " realtime=% " PRIu64 " \n " ,
le64toh ( o - > entry . seqnum ) ,
le64toh ( o - > entry . monotonic ) ,
le64toh ( o - > entry . realtime ) ) ;
2012-08-16 01:51:54 +02:00
break ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_FIELD_HASH_TABLE :
printf ( " Type: OBJECT_FIELD_HASH_TABLE \n " ) ;
break ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_DATA_HASH_TABLE :
printf ( " Type: OBJECT_DATA_HASH_TABLE \n " ) ;
break ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_ENTRY_ARRAY :
printf ( " Type: OBJECT_ENTRY_ARRAY \n " ) ;
break ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
case OBJECT_TAG :
2013-06-06 00:44:16 +02:00
printf ( " Type: OBJECT_TAG seqnum=% " PRIu64 " epoch=% " PRIu64 " \n " ,
le64toh ( o - > tag . seqnum ) ,
le64toh ( o - > tag . epoch ) ) ;
2012-08-16 01:51:54 +02:00
break ;
2012-10-18 03:29:19 +02:00
default :
2015-01-22 05:47:37 +01:00
printf ( " Type: unknown (%i) \n " , o - > object . type ) ;
2012-10-18 03:29:19 +02:00
break ;
2012-08-16 01:51:54 +02:00
}
2012-08-13 20:31:10 +02:00
2014-07-04 04:42:22 +02:00
if ( o - > object . flags & OBJECT_COMPRESSION_MASK )
printf ( " Flags: %s \n " ,
object_compressed_to_string ( o - > object . flags & OBJECT_COMPRESSION_MASK ) ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( p = = le64toh ( f - > header - > tail_object_offset ) )
p = 0 ;
else
p = p + ALIGN64 ( le64toh ( o - > object . size ) ) ;
}
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
return ;
fail :
log_error ( " File corrupt " ) ;
2012-08-13 20:31:10 +02:00
}
2013-06-18 14:48:14 +02:00
static const char * format_timestamp_safe ( char * buf , size_t l , usec_t t ) {
const char * x ;
x = format_timestamp ( buf , l , t ) ;
if ( x )
return x ;
return " --- " ;
}
2012-08-16 01:51:54 +02:00
void journal_file_print_header ( JournalFile * f ) {
2013-06-06 00:40:44 +02:00
char a [ 33 ] , b [ 33 ] , c [ 33 ] , d [ 33 ] ;
2013-06-06 01:15:43 +02:00
char x [ FORMAT_TIMESTAMP_MAX ] , y [ FORMAT_TIMESTAMP_MAX ] , z [ FORMAT_TIMESTAMP_MAX ] ;
2012-09-07 23:20:28 +02:00
struct stat st ;
char bytes [ FORMAT_BYTES_MAX ] ;
2012-08-13 20:31:10 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
printf ( " File Path: %s \n "
" File ID: %s \n "
" Machine ID: %s \n "
" Boot ID: %s \n "
" Sequential Number ID: %s \n "
" State: %s \n "
" Compatible Flags:%s%s \n "
2014-07-04 04:42:22 +02:00
" Incompatible Flags:%s%s%s \n "
2013-06-06 00:44:16 +02:00
" Header size: % " PRIu64 " \n "
" Arena size: % " PRIu64 " \n "
" Data Hash Table Size: % " PRIu64 " \n "
" Field Hash Table Size: % " PRIu64 " \n "
2012-08-16 01:51:54 +02:00
" Rotate Suggested: %s \n "
2016-04-25 18:06:47 +02:00
" Head Sequential Number: % " PRIu64 " (% " PRIx64 " ) \n "
" Tail Sequential Number: % " PRIu64 " (% " PRIx64 " ) \n "
" Head Realtime Timestamp: %s (% " PRIx64 " ) \n "
" Tail Realtime Timestamp: %s (% " PRIx64 " ) \n "
" Tail Monotonic Timestamp: %s (% " PRIx64 " ) \n "
2013-06-06 00:44:16 +02:00
" Objects: % " PRIu64 " \n "
" Entry Objects: % " PRIu64 " \n " ,
2012-08-16 01:51:54 +02:00
f - > path ,
sd_id128_to_string ( f - > header - > file_id , a ) ,
sd_id128_to_string ( f - > header - > machine_id , b ) ,
sd_id128_to_string ( f - > header - > boot_id , c ) ,
2013-06-06 00:40:44 +02:00
sd_id128_to_string ( f - > header - > seqnum_id , d ) ,
2012-08-17 02:29:20 +02:00
f - > header - > state = = STATE_OFFLINE ? " OFFLINE " :
f - > header - > state = = STATE_ONLINE ? " ONLINE " :
f - > header - > state = = STATE_ARCHIVED ? " ARCHIVED " : " UNKNOWN " ,
2012-08-20 16:11:42 +02:00
JOURNAL_HEADER_SEALED ( f - > header ) ? " SEALED " : " " ,
2014-07-04 04:42:22 +02:00
( le32toh ( f - > header - > compatible_flags ) & ~ HEADER_COMPATIBLE_ANY ) ? " ??? " : " " ,
JOURNAL_HEADER_COMPRESSED_XZ ( f - > header ) ? " COMPRESSED-XZ " : " " ,
JOURNAL_HEADER_COMPRESSED_LZ4 ( f - > header ) ? " COMPRESSED-LZ4 " : " " ,
( le32toh ( f - > header - > incompatible_flags ) & ~ HEADER_INCOMPATIBLE_ANY ) ? " ??? " : " " ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > header_size ) ,
le64toh ( f - > header - > arena_size ) ,
le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ,
le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ,
2012-10-16 22:58:07 +02:00
yes_no ( journal_file_rotate_suggested ( f , 0 ) ) ,
2016-04-25 18:06:47 +02:00
le64toh ( f - > header - > head_entry_seqnum ) , le64toh ( f - > header - > head_entry_seqnum ) ,
le64toh ( f - > header - > tail_entry_seqnum ) , le64toh ( f - > header - > tail_entry_seqnum ) ,
format_timestamp_safe ( x , sizeof ( x ) , le64toh ( f - > header - > head_entry_realtime ) ) , le64toh ( f - > header - > head_entry_realtime ) ,
format_timestamp_safe ( y , sizeof ( y ) , le64toh ( f - > header - > tail_entry_realtime ) ) , le64toh ( f - > header - > tail_entry_realtime ) ,
format_timespan ( z , sizeof ( z ) , le64toh ( f - > header - > tail_entry_monotonic ) , USEC_PER_MSEC ) , le64toh ( f - > header - > tail_entry_monotonic ) ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > n_objects ) ,
le64toh ( f - > header - > n_entries ) ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_data ) )
2013-06-06 00:44:16 +02:00
printf ( " Data Objects: % " PRIu64 " \n "
2012-08-16 01:51:54 +02:00
" Data Hash Table Fill: %.1f%% \n " ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > n_data ) ,
2012-08-16 01:51:54 +02:00
100.0 * ( double ) le64toh ( f - > header - > n_data ) / ( ( double ) ( le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ) ) ) ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_fields ) )
2013-06-06 00:44:16 +02:00
printf ( " Field Objects: % " PRIu64 " \n "
2012-08-16 01:51:54 +02:00
" Field Hash Table Fill: %.1f%% \n " ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > n_fields ) ,
2012-08-16 01:51:54 +02:00
100.0 * ( double ) le64toh ( f - > header - > n_fields ) / ( ( double ) ( le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ) ) ) ;
2012-08-17 02:29:20 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_tags ) )
2013-06-06 00:44:16 +02:00
printf ( " Tag Objects: % " PRIu64 " \n " ,
le64toh ( f - > header - > n_tags ) ) ;
2012-08-17 02:29:20 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_entry_arrays ) )
2013-06-06 00:44:16 +02:00
printf ( " Entry Array Objects: % " PRIu64 " \n " ,
le64toh ( f - > header - > n_entry_arrays ) ) ;
2012-09-07 23:20:28 +02:00
if ( fstat ( f - > fd , & st ) > = 0 )
2015-09-10 18:16:18 +02:00
printf ( " Disk usage: %s \n " , format_bytes ( bytes , sizeof ( bytes ) , ( uint64_t ) st . st_blocks * 512ULL ) ) ;
2012-08-13 20:31:10 +02:00
}
2015-04-22 13:20:49 +02:00
static int journal_file_warn_btrfs ( JournalFile * f ) {
unsigned attrs ;
int r ;
assert ( f ) ;
/* Before we write anything, check if the COW logic is turned
* off on btrfs . Given our write pattern that is quite
* unfriendly to COW file systems this should greatly improve
* performance on COW file systems , such as btrfs , at the
* expense of data integrity features ( which shouldn ' t be too
* bad , given that we do our own checksumming ) . */
r = btrfs_is_filesystem ( f - > fd ) ;
if ( r < 0 )
return log_warning_errno ( r , " Failed to determine if journal is on btrfs: %m " ) ;
if ( ! r )
return 0 ;
r = read_attr_fd ( f - > fd , & attrs ) ;
if ( r < 0 )
return log_warning_errno ( r , " Failed to read file attributes: %m " ) ;
if ( attrs & FS_NOCOW_FL ) {
log_debug ( " Detected btrfs file system with copy-on-write disabled, all is good. " ) ;
return 0 ;
}
log_notice ( " Creating journal file %s on a btrfs file system, and copy-on-write is enabled. "
" This is likely to slow down journal access substantially, please consider turning "
" off the copy-on-write file attribute on the journal directory, using chattr +C. " , f - > path ) ;
return 1 ;
}
2012-08-16 01:51:54 +02:00
int journal_file_open (
2016-04-25 00:31:24 +02:00
int fd ,
2012-08-16 01:51:54 +02:00
const char * fname ,
int flags ,
mode_t mode ,
bool compress ,
2012-08-17 00:45:18 +02:00
bool seal ,
2012-08-16 01:51:54 +02:00
JournalMetrics * metrics ,
MMapCache * mmap_cache ,
2016-02-18 02:37:10 +01:00
Set * deferred_closes ,
2012-08-16 01:51:54 +02:00
JournalFile * template ,
JournalFile * * ret ) {
2012-08-13 20:31:10 +02:00
2014-12-30 20:57:53 +01:00
bool newly_created = false ;
2012-08-16 01:51:54 +02:00
JournalFile * f ;
2014-12-30 20:57:53 +01:00
void * h ;
2012-08-16 01:51:54 +02:00
int r ;
2012-08-13 20:31:10 +02:00
2012-09-21 16:16:39 +02:00
assert ( ret ) ;
2016-04-25 00:31:24 +02:00
assert ( fd > = 0 | | fname ) ;
2012-08-13 20:31:10 +02:00
2017-09-29 09:58:22 +02:00
if ( ! IN_SET ( ( flags & O_ACCMODE ) , O_RDONLY , O_RDWR ) )
2012-08-16 01:51:54 +02:00
return - EINVAL ;
2012-08-13 20:31:10 +02:00
2018-01-27 09:32:36 +01:00
if ( fname & & ( flags & O_CREAT ) & & ! endswith ( fname , " .journal " ) )
return - EINVAL ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
f = new0 ( JournalFile , 1 ) ;
if ( ! f )
return - ENOMEM ;
2012-08-13 20:31:10 +02:00
2016-04-25 00:31:24 +02:00
f - > fd = fd ;
2012-08-16 01:51:54 +02:00
f - > mode = mode ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
f - > flags = flags ;
f - > prot = prot_from_flags ( flags ) ;
f - > writable = ( flags & O_ACCMODE ) ! = O_RDONLY ;
2017-10-03 10:41:51 +02:00
# if HAVE_LZ4
2014-07-04 04:42:22 +02:00
f - > compress_lz4 = compress ;
2017-10-03 10:41:51 +02:00
# elif HAVE_XZ
2014-07-04 04:42:22 +02:00
f - > compress_xz = compress ;
2012-09-03 15:46:44 +02:00
# endif
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-17 00:45:18 +02:00
f - > seal = seal ;
2012-09-11 03:03:36 +02:00
# endif
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( mmap_cache )
f - > mmap = mmap_cache_ref ( mmap_cache ) ;
else {
2012-08-18 01:46:20 +02:00
f - > mmap = mmap_cache_new ( ) ;
2012-08-16 01:51:54 +02:00
if ( ! f - > mmap ) {
r = - ENOMEM ;
goto fail ;
}
}
2012-08-13 20:31:10 +02:00
2017-01-31 17:36:08 +01:00
if ( fname ) {
2016-04-25 00:31:24 +02:00
f - > path = strdup ( fname ) ;
2017-01-31 17:36:08 +01:00
if ( ! f - > path ) {
r = - ENOMEM ;
goto fail ;
}
} else {
2018-02-19 17:37:47 +01:00
assert ( fd > = 0 ) ;
2017-01-31 17:36:08 +01:00
/* If we don't know the path, fill in something explanatory and vaguely useful */
if ( asprintf ( & f - > path , " /proc/self/%i " , fd ) < 0 ) {
r = - ENOMEM ;
goto fail ;
}
2012-08-16 01:51:54 +02:00
}
2012-08-13 20:31:10 +02:00
2014-10-14 17:58:13 +02:00
f - > chain_cache = ordered_hashmap_new ( & uint64_hash_ops ) ;
2012-10-26 03:24:03 +02:00
if ( ! f - > chain_cache ) {
r = - ENOMEM ;
goto fail ;
}
2012-08-16 01:51:54 +02:00
if ( f - > fd < 0 ) {
2018-02-19 17:37:47 +01:00
/* We pass O_NONBLOCK here, so that in case somebody pointed us to some character device node or FIFO
* or so , we likely fail quickly than block for long . For regular files O_NONBLOCK has no effect , hence
* it doesn ' t hurt in that case . */
f - > fd = open ( f - > path , f - > flags | O_CLOEXEC | O_NONBLOCK , f - > mode ) ;
2016-04-25 00:31:24 +02:00
if ( f - > fd < 0 ) {
r = - errno ;
goto fail ;
}
/* fds we opened here by us should also be closed by us. */
f - > close_fd = true ;
2018-02-19 17:37:47 +01:00
r = fd_nonblock ( f - > fd , false ) ;
if ( r < 0 )
goto fail ;
2012-08-13 20:31:10 +02:00
}
2017-07-11 01:24:56 +02:00
f - > cache_fd = mmap_cache_add_fd ( f - > mmap , f - > fd ) ;
if ( ! f - > cache_fd ) {
r = - ENOMEM ;
goto fail ;
}
2015-01-05 02:09:01 +01:00
r = journal_file_fstat ( f ) ;
if ( r < 0 )
2012-08-16 01:51:54 +02:00
goto fail ;
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( f - > last_stat . st_size = = 0 & & f - > writable ) {
2015-01-08 01:22:29 +01:00
2015-04-22 13:20:49 +02:00
( void ) journal_file_warn_btrfs ( f ) ;
2015-01-08 01:22:29 +01:00
2012-10-16 22:58:07 +02:00
/* Let's attach the creation time to the journal file,
* so that the vacuuming code knows the age of this
* file even if the file might end up corrupted one
* day . . . Ideally we ' d just use the creation time many
* file systems maintain for each file , but there is
* currently no usable API to query this , hence let ' s
* emulate this via extended attributes . If extended
* attributes are not supported we ' ll just skip this ,
2014-12-10 20:00:06 +01:00
* and rely solely on mtime / atime / ctime of the file . */
2012-10-16 22:58:07 +02:00
2015-01-08 01:27:13 +01:00
fd_setcrtime ( f - > fd , 0 ) ;
2012-08-13 20:31:10 +02:00
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-16 01:51:54 +02:00
/* Try to load the FSPRG state, and if we can't, then
2012-08-17 00:45:18 +02:00
* just don ' t do sealing */
2012-09-11 03:03:36 +02:00
if ( f - > seal ) {
r = journal_file_fss_load ( f ) ;
if ( r < 0 )
f - > seal = false ;
}
2012-08-20 16:51:46 +02:00
# endif
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
r = journal_file_init_header ( f , template ) ;
if ( r < 0 )
goto fail ;
2012-08-13 20:31:10 +02:00
2015-01-05 02:09:01 +01:00
r = journal_file_fstat ( f ) ;
if ( r < 0 )
2012-08-16 01:51:54 +02:00
goto fail ;
2012-10-16 22:58:07 +02:00
newly_created = true ;
2012-08-16 01:51:54 +02:00
}
2012-08-13 20:31:10 +02:00
2012-08-16 01:51:54 +02:00
if ( f - > last_stat . st_size < ( off_t ) HEADER_SIZE_MIN ) {
2015-10-25 05:09:44 +01:00
r = - ENODATA ;
2012-08-16 01:51:54 +02:00
goto fail ;
}
2012-08-13 20:31:10 +02:00
2017-07-13 07:08:58 +02:00
r = mmap_cache_get ( f - > mmap , f - > cache_fd , f - > prot , CONTEXT_HEADER , true , 0 , PAGE_ALIGN ( sizeof ( Header ) ) , & f - > last_stat , & h , NULL ) ;
2015-03-09 22:46:30 +01:00
if ( r < 0 )
2012-08-16 01:51:54 +02:00
goto fail ;
2012-08-13 20:31:10 +02:00
2014-12-30 20:57:53 +01:00
f - > header = h ;
2012-08-16 01:51:54 +02:00
if ( ! newly_created ) {
2017-11-28 12:40:14 +01:00
set_clear_with_destructor ( deferred_closes , journal_file_close ) ;
2016-02-18 02:37:10 +01:00
2012-08-16 01:51:54 +02:00
r = journal_file_verify_header ( f ) ;
if ( r < 0 )
goto fail ;
}
2012-08-13 20:31:10 +02:00
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-16 01:51:54 +02:00
if ( ! newly_created & & f - > writable ) {
2012-08-17 00:45:18 +02:00
r = journal_file_fss_load ( f ) ;
2012-08-16 01:51:54 +02:00
if ( r < 0 )
goto fail ;
}
2012-08-20 16:51:46 +02:00
# endif
2011-10-08 02:20:44 +02:00
if ( f - > writable ) {
2012-07-17 00:36:15 +02:00
if ( metrics ) {
journal_default_metrics ( metrics , f - > fd ) ;
f - > metrics = * metrics ;
} else if ( template )
f - > metrics = template - > metrics ;
2011-10-08 02:20:44 +02:00
r = journal_file_refresh_header ( f ) ;
if ( r < 0 )
goto fail ;
}
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-17 00:45:18 +02:00
r = journal_file_hmac_setup ( f ) ;
2012-08-16 23:58:14 +02:00
if ( r < 0 )
goto fail ;
2012-08-20 16:51:46 +02:00
# endif
2012-08-16 23:58:14 +02:00
2011-10-08 02:20:44 +02:00
if ( newly_created ) {
2011-11-08 18:20:03 +01:00
r = journal_file_setup_field_hash_table ( f ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
goto fail ;
2011-11-08 18:20:03 +01:00
r = journal_file_setup_data_hash_table ( f ) ;
2011-10-08 02:20:44 +02:00
if ( r < 0 )
goto fail ;
2012-08-13 20:31:10 +02:00
2017-10-03 10:41:51 +02:00
# if HAVE_GCRYPT
2012-08-13 20:31:10 +02:00
r = journal_file_append_first_tag ( f ) ;
if ( r < 0 )
goto fail ;
2012-08-20 16:51:46 +02:00
# endif
2011-10-08 02:20:44 +02:00
}
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) ) {
2014-12-30 20:57:53 +01:00
r = - EIO ;
goto fail ;
}
2015-12-11 07:42:22 +01:00
if ( template & & template - > post_change_timer ) {
2016-01-26 14:06:35 +01:00
r = journal_file_enable_post_change_timer (
f ,
sd_event_source_get_event ( template - > post_change_timer ) ,
template - > post_change_timer_period ) ;
2015-12-11 07:42:22 +01:00
if ( r < 0 )
goto fail ;
}
2016-05-04 11:26:17 +02:00
/* The file is opened now successfully, thus we take possession of any passed in fd. */
2016-04-25 00:31:24 +02:00
f - > close_fd = true ;
2012-09-21 16:16:39 +02:00
* ret = f ;
2011-10-08 02:20:44 +02:00
return 0 ;
fail :
2017-07-11 01:24:56 +02:00
if ( f - > cache_fd & & mmap_cache_got_sigbus ( f - > mmap , f - > cache_fd ) )
2014-12-30 20:57:53 +01:00
r = - EIO ;
2016-02-20 01:51:41 +01:00
( void ) journal_file_close ( f ) ;
2011-10-08 02:20:44 +02:00
return r ;
}
2011-10-13 05:19:35 +02:00
2016-02-18 02:37:10 +01:00
int journal_file_rotate ( JournalFile * * f , bool compress , bool seal , Set * deferred_closes ) {
2013-10-10 04:13:04 +02:00
_cleanup_free_ char * p = NULL ;
2011-10-13 05:19:35 +02:00
size_t l ;
JournalFile * old_file , * new_file = NULL ;
int r ;
assert ( f ) ;
assert ( * f ) ;
old_file = * f ;
if ( ! old_file - > writable )
return - EINVAL ;
2016-04-25 00:31:24 +02:00
/* Is this a journal file that was passed to us as fd? If so, we synthesized a path name for it, and we refuse
2017-02-24 18:14:02 +01:00
* rotation , since we don ' t know the actual path , and couldn ' t rename the file hence . */
2016-04-25 00:31:24 +02:00
if ( path_startswith ( old_file - > path , " /proc/self/fd " ) )
return - EINVAL ;
2011-10-13 05:19:35 +02:00
if ( ! endswith ( old_file - > path , " .journal " ) )
return - EINVAL ;
l = strlen ( old_file - > path ) ;
2013-10-10 04:13:04 +02:00
r = asprintf ( & p , " %.*s@ " SD_ID128_FORMAT_STR " -%016 " PRIx64 " -%016 " PRIx64 " .journal " ,
( int ) l - 8 , old_file - > path ,
SD_ID128_FORMAT_VAL ( old_file - > header - > seqnum_id ) ,
le64toh ( ( * f ) - > header - > head_entry_seqnum ) ,
le64toh ( ( * f ) - > header - > head_entry_realtime ) ) ;
if ( r < 0 )
2011-10-13 05:19:35 +02:00
return - ENOMEM ;
2015-01-05 02:09:01 +01:00
/* Try to rename the file to the archived version. If the file
* already was deleted , we ' ll get ENOENT , let ' s ignore that
* case . */
2011-10-13 05:19:35 +02:00
r = rename ( old_file - > path , p ) ;
2015-01-05 02:09:01 +01:00
if ( r < 0 & & errno ! = ENOENT )
2011-10-13 05:19:35 +02:00
return - errno ;
2016-04-29 12:21:52 +02:00
/* Sync the rename to disk */
( void ) fsync_directory_of_file ( old_file - > fd ) ;
2016-04-27 08:29:43 +02:00
/* Set as archive so offlining commits w/state=STATE_ARCHIVED.
* Previously we would set old_file - > header - > state to STATE_ARCHIVED directly here ,
* but journal_file_set_offline ( ) short - circuits when state ! = STATE_ONLINE , which
* would result in the rotated journal never getting fsync ( ) called before closing .
* Now we simply queue the archive state by setting an archive bit , leaving the state
* as STATE_ONLINE so proper offlining occurs . */
old_file - > archive = true ;
2011-10-13 05:19:35 +02:00
2015-01-06 19:51:03 +01:00
/* Currently, btrfs is not very good with out write patterns
* and fragments heavily . Let ' s defrag our journal files when
* we archive them */
old_file - > defrag_on_close = true ;
2016-04-25 00:31:24 +02:00
r = journal_file_open ( - 1 , old_file - > path , old_file - > flags , old_file - > mode , compress , seal , NULL , old_file - > mmap , deferred_closes , old_file , & new_file ) ;
2016-02-18 02:37:10 +01:00
if ( deferred_closes & &
set_put ( deferred_closes , old_file ) > = 0 )
( void ) journal_file_set_offline ( old_file , false ) ;
else
( void ) journal_file_close ( old_file ) ;
2011-10-13 05:19:35 +02:00
* f = new_file ;
return r ;
}
2012-03-15 01:13:01 +01:00
int journal_file_open_reliably (
const char * fname ,
int flags ,
mode_t mode ,
2012-08-13 20:31:10 +02:00
bool compress ,
2012-08-17 00:45:18 +02:00
bool seal ,
2012-07-17 00:36:15 +02:00
JournalMetrics * metrics ,
2012-08-16 19:30:36 +02:00
MMapCache * mmap_cache ,
2016-02-18 02:37:10 +01:00
Set * deferred_closes ,
2012-03-15 01:13:01 +01:00
JournalFile * template ,
JournalFile * * ret ) {
int r ;
size_t l ;
2013-06-06 01:15:43 +02:00
_cleanup_free_ char * p = NULL ;
2012-03-15 01:13:01 +01:00
2016-04-25 00:31:24 +02:00
r = journal_file_open ( - 1 , fname , flags , mode , compress , seal , metrics , mmap_cache , deferred_closes , template , ret ) ;
2015-03-09 22:10:33 +01:00
if ( ! IN_SET ( r ,
2017-02-02 00:36:09 +01:00
- EBADMSG , /* Corrupted */
- ENODATA , /* Truncated */
- EHOSTDOWN , /* Other machine */
- EPROTONOSUPPORT , /* Incompatible feature */
- EBUSY , /* Unclean shutdown */
- ESHUTDOWN , /* Already archived */
2015-03-09 22:10:33 +01:00
- EIO , /* IO error, including SIGBUS on mmap */
2016-10-12 18:53:35 +02:00
- EIDRM , /* File has been deleted */
- ETXTBSY ) ) /* File is from the future */
2012-03-15 01:13:01 +01:00
return r ;
if ( ( flags & O_ACCMODE ) = = O_RDONLY )
return r ;
if ( ! ( flags & O_CREAT ) )
return r ;
2012-08-13 20:31:10 +02:00
if ( ! endswith ( fname , " .journal " ) )
return r ;
2012-03-15 01:35:03 +01:00
/* The file is corrupted. Rotate it away and try it again (but only once) */
2012-03-15 01:13:01 +01:00
l = strlen ( fname ) ;
2015-03-09 22:22:50 +01:00
if ( asprintf ( & p , " %.*s@%016 " PRIx64 " -%016 " PRIx64 " .journal~ " ,
2013-10-10 04:13:04 +02:00
( int ) l - 8 , fname ,
2015-03-09 22:22:50 +01:00
now ( CLOCK_REALTIME ) ,
2013-12-22 19:59:12 +01:00
random_u64 ( ) ) < 0 )
2012-03-15 01:13:01 +01:00
return - ENOMEM ;
2015-10-02 23:19:00 +02:00
if ( rename ( fname , p ) < 0 )
2012-03-15 01:13:01 +01:00
return - errno ;
2015-01-06 19:51:03 +01:00
/* btrfs doesn't cope well with our write pattern and
* fragments heavily . Let ' s defrag all files we rotate */
2015-01-08 01:22:29 +01:00
2016-04-29 20:05:44 +02:00
( void ) chattr_path ( p , 0 , FS_NOCOW_FL ) ;
2015-01-06 19:51:03 +01:00
( void ) btrfs_defrag ( p ) ;
2015-10-02 23:19:00 +02:00
log_warning_errno ( r , " File %s corrupted or uncleanly shut down, renaming and replacing. " , fname ) ;
2012-03-15 01:13:01 +01:00
2016-04-25 00:31:24 +02:00
return journal_file_open ( - 1 , fname , flags , mode , compress , seal , metrics , mmap_cache , deferred_closes , template , ret ) ;
2012-03-15 01:13:01 +01:00
}
2011-12-29 15:00:57 +01:00
int journal_file_copy_entry ( JournalFile * from , JournalFile * to , Object * o , uint64_t p , uint64_t * seqnum , Object * * ret , uint64_t * offset ) {
uint64_t i , n ;
uint64_t q , xor_hash = 0 ;
int r ;
EntryItem * items ;
dual_timestamp ts ;
assert ( from ) ;
assert ( to ) ;
assert ( o ) ;
assert ( p ) ;
if ( ! to - > writable )
return - EPERM ;
ts . monotonic = le64toh ( o - > entry . monotonic ) ;
ts . realtime = le64toh ( o - > entry . realtime ) ;
n = journal_file_entry_n_items ( o ) ;
2013-12-16 23:35:30 +01:00
/* alloca() can't take 0, hence let's allocate at least one */
items = alloca ( sizeof ( EntryItem ) * MAX ( 1u , n ) ) ;
2011-12-29 15:00:57 +01:00
for ( i = 0 ; i < n ; i + + ) {
2012-03-16 11:59:04 +01:00
uint64_t l , h ;
le64_t le_hash ;
2011-12-29 15:00:57 +01:00
size_t t ;
void * data ;
Object * u ;
q = le64toh ( o - > entry . items [ i ] . object_offset ) ;
le_hash = o - > entry . items [ i ] . hash ;
r = journal_file_move_to_object ( from , OBJECT_DATA , q , & o ) ;
if ( r < 0 )
return r ;
if ( le_hash ! = o - > data . hash )
return - EBADMSG ;
l = le64toh ( o - > object . size ) - offsetof ( Object , data . payload ) ;
t = ( size_t ) l ;
/* We hit the limit on 32bit machines */
if ( ( uint64_t ) t ! = l )
return - E2BIG ;
2014-07-04 04:42:22 +02:00
if ( o - > object . flags & OBJECT_COMPRESSION_MASK ) {
2017-10-03 10:41:51 +02:00
# if HAVE_XZ || HAVE_LZ4
2015-03-27 12:02:49 +01:00
size_t rsize = 0 ;
2011-12-29 15:00:57 +01:00
2014-07-04 04:42:22 +02:00
r = decompress_blob ( o - > object . flags & OBJECT_COMPRESSION_MASK ,
o - > data . payload , l , & from - > compress_buffer , & from - > compress_buffer_size , & rsize , 0 ) ;
if ( r < 0 )
return r ;
2011-12-29 15:00:57 +01:00
data = from - > compress_buffer ;
l = rsize ;
2014-07-11 16:42:06 +02:00
# else
return - EPROTONOSUPPORT ;
# endif
2011-12-29 15:00:57 +01:00
} else
data = o - > data . payload ;
r = journal_file_append_data ( to , data , l , & u , & h ) ;
if ( r < 0 )
return r ;
xor_hash ^ = le64toh ( u - > data . hash ) ;
items [ i ] . object_offset = htole64 ( h ) ;
items [ i ] . hash = u - > data . hash ;
r = journal_file_move_to_object ( from , OBJECT_ENTRY , p , & o ) ;
if ( r < 0 )
return r ;
}
2014-12-30 20:57:53 +01:00
r = journal_file_append_entry_internal ( to , & ts , xor_hash , items , n , seqnum , ret , offset ) ;
2017-07-11 01:24:56 +02:00
if ( mmap_cache_got_sigbus ( to - > mmap , to - > cache_fd ) )
2014-12-30 20:57:53 +01:00
return - EIO ;
return r ;
2011-12-29 15:00:57 +01:00
}
2011-12-31 02:31:54 +01:00
2015-10-02 23:21:59 +02:00
void journal_reset_metrics ( JournalMetrics * m ) {
assert ( m ) ;
/* Set everything to "pick automatic values". */
* m = ( JournalMetrics ) {
. min_use = ( uint64_t ) - 1 ,
. max_use = ( uint64_t ) - 1 ,
. min_size = ( uint64_t ) - 1 ,
. max_size = ( uint64_t ) - 1 ,
. keep_free = ( uint64_t ) - 1 ,
. n_max_files = ( uint64_t ) - 1 ,
} ;
}
2011-12-31 02:31:54 +01:00
void journal_default_metrics ( JournalMetrics * m , int fd ) {
2015-10-02 23:21:59 +02:00
char a [ FORMAT_BYTES_MAX ] , b [ FORMAT_BYTES_MAX ] , c [ FORMAT_BYTES_MAX ] , d [ FORMAT_BYTES_MAX ] , e [ FORMAT_BYTES_MAX ] ;
2011-12-31 02:31:54 +01:00
struct statvfs ss ;
2015-10-02 23:21:59 +02:00
uint64_t fs_size ;
2011-12-31 02:31:54 +01:00
assert ( m ) ;
assert ( fd > = 0 ) ;
if ( fstatvfs ( fd , & ss ) > = 0 )
fs_size = ss . f_frsize * ss . f_blocks ;
2015-10-02 23:21:59 +02:00
else {
log_debug_errno ( errno , " Failed to detremine disk size: %m " ) ;
fs_size = 0 ;
}
2011-12-31 02:31:54 +01:00
if ( m - > max_use = = ( uint64_t ) - 1 ) {
if ( fs_size > 0 ) {
m - > max_use = PAGE_ALIGN ( fs_size / 10 ) ; /* 10% of file system size */
if ( m - > max_use > DEFAULT_MAX_USE_UPPER )
m - > max_use = DEFAULT_MAX_USE_UPPER ;
if ( m - > max_use < DEFAULT_MAX_USE_LOWER )
m - > max_use = DEFAULT_MAX_USE_LOWER ;
} else
m - > max_use = DEFAULT_MAX_USE_LOWER ;
} else {
m - > max_use = PAGE_ALIGN ( m - > max_use ) ;
2015-10-02 23:21:59 +02:00
if ( m - > max_use ! = 0 & & m - > max_use < JOURNAL_FILE_SIZE_MIN * 2 )
2011-12-31 02:31:54 +01:00
m - > max_use = JOURNAL_FILE_SIZE_MIN * 2 ;
}
2015-10-02 23:21:59 +02:00
if ( m - > min_use = = ( uint64_t ) - 1 )
m - > min_use = DEFAULT_MIN_USE ;
if ( m - > min_use > m - > max_use )
m - > min_use = m - > max_use ;
2011-12-31 02:31:54 +01:00
if ( m - > max_size = = ( uint64_t ) - 1 ) {
m - > max_size = PAGE_ALIGN ( m - > max_use / 8 ) ; /* 8 chunks */
if ( m - > max_size > DEFAULT_MAX_SIZE_UPPER )
m - > max_size = DEFAULT_MAX_SIZE_UPPER ;
} else
m - > max_size = PAGE_ALIGN ( m - > max_size ) ;
2015-10-02 23:21:59 +02:00
if ( m - > max_size ! = 0 ) {
if ( m - > max_size < JOURNAL_FILE_SIZE_MIN )
m - > max_size = JOURNAL_FILE_SIZE_MIN ;
2011-12-31 02:31:54 +01:00
2015-10-02 23:21:59 +02:00
if ( m - > max_use ! = 0 & & m - > max_size * 2 > m - > max_use )
m - > max_use = m - > max_size * 2 ;
}
2011-12-31 02:31:54 +01:00
if ( m - > min_size = = ( uint64_t ) - 1 )
m - > min_size = JOURNAL_FILE_SIZE_MIN ;
else {
m - > min_size = PAGE_ALIGN ( m - > min_size ) ;
if ( m - > min_size < JOURNAL_FILE_SIZE_MIN )
m - > min_size = JOURNAL_FILE_SIZE_MIN ;
2015-10-02 23:21:59 +02:00
if ( m - > max_size ! = 0 & & m - > min_size > m - > max_size )
2011-12-31 02:31:54 +01:00
m - > max_size = m - > min_size ;
}
if ( m - > keep_free = = ( uint64_t ) - 1 ) {
if ( fs_size > 0 ) {
2013-04-30 12:48:11 +02:00
m - > keep_free = PAGE_ALIGN ( fs_size * 3 / 20 ) ; /* 15% of file system size */
2011-12-31 02:31:54 +01:00
if ( m - > keep_free > DEFAULT_KEEP_FREE_UPPER )
m - > keep_free = DEFAULT_KEEP_FREE_UPPER ;
} else
m - > keep_free = DEFAULT_KEEP_FREE ;
}
2015-10-02 23:21:59 +02:00
if ( m - > n_max_files = = ( uint64_t ) - 1 )
m - > n_max_files = DEFAULT_N_MAX_FILES ;
log_debug ( " Fixed min_use=%s max_use=%s max_size=%s min_size=%s keep_free=%s n_max_files=% " PRIu64 ,
format_bytes ( a , sizeof ( a ) , m - > min_use ) ,
format_bytes ( b , sizeof ( b ) , m - > max_use ) ,
format_bytes ( c , sizeof ( c ) , m - > max_size ) ,
format_bytes ( d , sizeof ( d ) , m - > min_size ) ,
format_bytes ( e , sizeof ( e ) , m - > keep_free ) ,
m - > n_max_files ) ;
2011-12-31 02:31:54 +01:00
}
2012-06-09 10:32:38 +02:00
int journal_file_get_cutoff_realtime_usec ( JournalFile * f , usec_t * from , usec_t * to ) {
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2012-06-09 10:32:38 +02:00
assert ( from | | to ) ;
if ( from ) {
2012-07-16 19:28:05 +02:00
if ( f - > header - > head_entry_realtime = = 0 )
return - ENOENT ;
2012-06-09 10:32:38 +02:00
2012-07-16 19:28:05 +02:00
* from = le64toh ( f - > header - > head_entry_realtime ) ;
2012-06-09 10:32:38 +02:00
}
if ( to ) {
2012-07-16 19:28:05 +02:00
if ( f - > header - > tail_entry_realtime = = 0 )
return - ENOENT ;
2012-06-09 10:32:38 +02:00
2012-07-16 19:28:05 +02:00
* to = le64toh ( f - > header - > tail_entry_realtime ) ;
2012-06-09 10:32:38 +02:00
}
return 1 ;
}
int journal_file_get_cutoff_monotonic_usec ( JournalFile * f , sd_id128_t boot_id , usec_t * from , usec_t * to ) {
Object * o ;
uint64_t p ;
int r ;
assert ( f ) ;
assert ( from | | to ) ;
2013-05-04 01:56:18 +02:00
r = find_data_object_by_boot_id ( f , boot_id , & o , & p ) ;
2012-06-09 10:32:38 +02:00
if ( r < = 0 )
return r ;
if ( le64toh ( o - > data . n_entries ) < = 0 )
return 0 ;
if ( from ) {
r = journal_file_move_to_object ( f , OBJECT_ENTRY , le64toh ( o - > data . entry_offset ) , & o ) ;
if ( r < 0 )
return r ;
* from = le64toh ( o - > entry . monotonic ) ;
}
if ( to ) {
r = journal_file_move_to_object ( f , OBJECT_DATA , p , & o ) ;
if ( r < 0 )
return r ;
r = generic_array_get_plus_one ( f ,
le64toh ( o - > data . entry_offset ) ,
le64toh ( o - > data . entry_array_offset ) ,
le64toh ( o - > data . n_entries ) - 1 ,
& o , NULL ) ;
if ( r < = 0 )
return r ;
* to = le64toh ( o - > entry . monotonic ) ;
}
return 1 ;
}
2012-07-16 22:24:02 +02:00
2012-10-16 22:58:07 +02:00
bool journal_file_rotate_suggested ( JournalFile * f , usec_t max_file_usec ) {
2012-07-16 22:24:02 +02:00
assert ( f ) ;
2016-02-05 12:25:03 +01:00
assert ( f - > header ) ;
2012-07-16 22:24:02 +02:00
/* If we gained new header fields we gained new features,
* hence suggest a rotation */
2012-07-18 16:22:40 +02:00
if ( le64toh ( f - > header - > header_size ) < sizeof ( Header ) ) {
log_debug ( " %s uses an outdated header, suggesting rotation. " , f - > path ) ;
2012-07-16 22:24:02 +02:00
return true ;
2012-07-18 16:22:40 +02:00
}
2012-07-16 22:24:02 +02:00
/* Let's check if the hash tables grew over a certain fill
* level ( 75 % , borrowing this value from Java ' s hash table
* implementation ) , and if so suggest a rotation . To calculate
* the fill level we need the n_data field , which only exists
* in newer versions . */
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_data ) )
2012-07-18 16:22:40 +02:00
if ( le64toh ( f - > header - > n_data ) * 4ULL > ( le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ) * 3ULL ) {
2013-06-06 00:44:16 +02:00
log_debug ( " Data hash table of %s has a fill level at %.1f (% " PRIu64 " of % " PRIu64 " items, %llu file size, % " PRIu64 " bytes per hash table item), suggesting rotation. " ,
2012-07-18 16:22:40 +02:00
f - > path ,
100.0 * ( double ) le64toh ( f - > header - > n_data ) / ( ( double ) ( le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ) ) ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > n_data ) ,
le64toh ( f - > header - > data_hash_table_size ) / sizeof ( HashItem ) ,
( unsigned long long ) f - > last_stat . st_size ,
f - > last_stat . st_size / le64toh ( f - > header - > n_data ) ) ;
2012-07-16 22:24:02 +02:00
return true ;
2012-07-18 16:22:40 +02:00
}
2012-07-16 22:24:02 +02:00
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_fields ) )
2012-07-18 16:22:40 +02:00
if ( le64toh ( f - > header - > n_fields ) * 4ULL > ( le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ) * 3ULL ) {
2013-06-06 00:44:16 +02:00
log_debug ( " Field hash table of %s has a fill level at %.1f (% " PRIu64 " of % " PRIu64 " items), suggesting rotation. " ,
2012-07-18 16:22:40 +02:00
f - > path ,
100.0 * ( double ) le64toh ( f - > header - > n_fields ) / ( ( double ) ( le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ) ) ,
2013-06-06 00:44:16 +02:00
le64toh ( f - > header - > n_fields ) ,
le64toh ( f - > header - > field_hash_table_size ) / sizeof ( HashItem ) ) ;
2012-07-16 22:24:02 +02:00
return true ;
2012-07-18 16:22:40 +02:00
}
2012-07-16 22:24:02 +02:00
2012-10-18 04:12:25 +02:00
/* Are the data objects properly indexed by field objects? */
if ( JOURNAL_HEADER_CONTAINS ( f - > header , n_data ) & &
JOURNAL_HEADER_CONTAINS ( f - > header , n_fields ) & &
le64toh ( f - > header - > n_data ) > 0 & &
le64toh ( f - > header - > n_fields ) = = 0 )
return true ;
2012-10-16 22:58:07 +02:00
if ( max_file_usec > 0 ) {
usec_t t , h ;
h = le64toh ( f - > header - > head_entry_realtime ) ;
t = now ( CLOCK_REALTIME ) ;
if ( h > 0 & & t > h + max_file_usec )
return true ;
}
2012-07-16 22:24:02 +02:00
return false ;
}