From c3950a9bbe0d6f8ddd735a6f902281a3b2b4ca15 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 8 Jun 2018 11:41:08 +0200 Subject: [PATCH 1/4] journal: optimize the common case where whitespace stripping is not needed If we can just use the original data, let's do that. --- src/journal/journald-syslog.c | 48 ++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c index 9dea116722..ea50bd766a 100644 --- a/src/journal/journald-syslog.c +++ b/src/journal/journald-syslog.c @@ -225,7 +225,7 @@ size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) return e; } -static void syslog_skip_date(char **buf) { +static void syslog_skip_date(const char **buf) { enum { LETTER, SPACE, @@ -245,7 +245,7 @@ static void syslog_skip_date(char **buf) { SPACE }; - char *p; + const char *p; unsigned i; assert(buf); @@ -297,23 +297,28 @@ static void syslog_skip_date(char **buf) { void server_process_syslog_message( Server *s, const char *buf, - size_t buf_len, + size_t raw_len, const struct ucred *ucred, const struct timeval *tv, const char *label, size_t label_len) { char syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)], - syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)], *msg; - const char *message = NULL, *syslog_identifier = NULL, *syslog_pid = NULL; + syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; + const char *message = NULL, *syslog_identifier = NULL, *syslog_pid = NULL, *msg; _cleanup_free_ char *identifier = NULL, *pid = NULL; int priority = LOG_USER | LOG_INFO, r; ClientContext *context = NULL; struct iovec *iovec; - size_t n = 0, m, i; + size_t n = 0, m, i, leading_ws; assert(s); assert(buf); + /* The message cannot be empty. */ + assert(raw_len > 0); + /* The buffer NUL-terminated and can be used a string. raw_len is the length + * without the terminating NUL byte, the buffer is actually one bigger. */ + assert(buf[raw_len] == '\0'); if (ucred && pid_is_valid(ucred->pid)) { r = client_context_get(s, ucred->pid, ucred, label, label_len, NULL, &context); @@ -321,26 +326,35 @@ void server_process_syslog_message( log_warning_errno(r, "Failed to retrieve credentials for PID " PID_FMT ", ignoring: %m", ucred->pid); } - /* We are creating copy of the message because we want to forward original message verbatim to the legacy - syslog implementation */ - for (i = buf_len; i > 0; i--) + /* We are creating a copy of the message because we want to forward the original message + verbatim to the legacy syslog implementation */ + for (i = raw_len; i > 0; i--) if (!strchr(WHITESPACE, buf[i-1])) break; - msg = newa(char, i + 1); - *((char *) mempcpy(msg, buf, i)) = 0; - msg = skip_leading_chars(msg, WHITESPACE); + leading_ws = strspn(buf, WHITESPACE); - syslog_parse_priority((const char **)&msg, &priority, true); + if (i == raw_len) + /* Nice! No need to strip anything on the end, let's optimize this a bit */ + msg = buf + leading_ws; + else { + char *t; + + msg = t = newa(char, i - leading_ws + 1); + memcpy(t, buf + leading_ws, i - leading_ws); + t[i - leading_ws] = 0; + } + + syslog_parse_priority(&msg, &priority, true); if (!client_context_test_priority(context, priority)) return; - if (s->forward_to_syslog) - forward_syslog_raw(s, priority, buf, buf_len, ucred, tv); - syslog_skip_date(&msg); - syslog_parse_identifier((const char**)&msg, &identifier, &pid); + syslog_parse_identifier(&msg, &identifier, &pid); + + if (s->forward_to_syslog) + forward_syslog_raw(s, priority, buf, raw_len, ucred, tv); if (s->forward_to_kmsg) server_forward_kmsg(s, priority, identifier, msg, ucred); From df8701a3f275aee7f898041285aeb24947c1e451 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Fri, 8 Jun 2018 13:52:22 +0200 Subject: [PATCH 2/4] journal: store the original syslog input as SYSLOG_RAW= This allows the original stream to be recreated and/or verified. The new field is written if any stripping was done or if the input message contained embeded NULs. $ printf '<13>Sep 15 15:07:58 HOST: x\0y' | nc -w1 -u -U /run/systemd/journal/dev-log $ journalctl -o json-pretty ... { ... "MESSAGE" : "x", "SYSLOG_RAW" : [ 60, 49, 51, 62, 83, 101, 112, 32, 49, 53, 32, 49, 53, 58, 48, 55, 58, 53, 56, 32, 72, 79, 83, 84, 58, 32, 120, 0, 121 ] } $ journalctl -o export ... | cat -v ... MESSAGE=x SYSLOG_RAW ^]^@^@^@^@^@^@^@<13>Sep 15 15:07:58 HOST: x^@y This mostly fixes #4863. --- man/systemd.journal-fields.xml | 15 +++++++++++++++ src/journal/journald-syslog.c | 21 ++++++++++++++++++--- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/man/systemd.journal-fields.xml b/man/systemd.journal-fields.xml index c079274c32..aeb5367d24 100644 --- a/man/systemd.journal-fields.xml +++ b/man/systemd.journal-fields.xml @@ -112,7 +112,22 @@ see program_invocation_short_name3.) + + + SYSLOG_RAW= + + The original contents of the syslog line as received in the syslog + datagram. This field is only included if the MESSAGE= + field was modified compared to the original payload. This happens when the + message contains leading or trailing whitespace (trailing and leading + whitespace is stripped), or it contains an embedded + NUL byte (the NUL byte and + anything after it is not included). Thus, the original syslog line is + either stored as SYSLOG_RAW= or it can be recreated + based on the stored priority, timestamp, hostname, and the message payload + stored in MESSAGE=. + diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c index ea50bd766a..e2794ec30f 100644 --- a/src/journal/journald-syslog.c +++ b/src/journal/journald-syslog.c @@ -311,6 +311,7 @@ void server_process_syslog_message( ClientContext *context = NULL; struct iovec *iovec; size_t n = 0, m, i, leading_ws; + bool store_raw; assert(s); assert(buf); @@ -345,6 +346,10 @@ void server_process_syslog_message( t[i - leading_ws] = 0; } + /* We will add the SYSLOG_RAW= field when we stripped anything + * _or_ if the input message contained NUL bytes. */ + store_raw = msg != buf || strlen(msg) != raw_len; + syslog_parse_priority(&msg, &priority, true); if (!client_context_test_priority(context, priority)) @@ -365,7 +370,7 @@ void server_process_syslog_message( if (s->forward_to_wall) server_forward_wall(s, priority, identifier, msg, ucred); - m = N_IOVEC_META_FIELDS + 6 + client_context_extra_fields_n_iovec(context); + m = N_IOVEC_META_FIELDS + 7 + client_context_extra_fields_n_iovec(context); iovec = newa(struct iovec, m); iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog"); @@ -389,8 +394,18 @@ void server_process_syslog_message( } message = strjoina("MESSAGE=", msg); - if (message) - iovec[n++] = IOVEC_MAKE_STRING(message); + iovec[n++] = IOVEC_MAKE_STRING(message); + + if (store_raw) { + const size_t hlen = strlen("SYSLOG_RAW="); + char *t; + + t = newa(char, hlen + raw_len); + memcpy(t, "SYSLOG_RAW=", hlen); + memcpy(t + hlen, buf, raw_len); + + iovec[n++] = IOVEC_MAKE(t, hlen + raw_len); + } server_dispatch_message(s, iovec, n, m, context, tv, priority, 0); } From 855a86a3497178b368d3b04df9503e7051f85b8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Wed, 4 Jul 2018 23:54:43 +0200 Subject: [PATCH 3/4] systemctl: fix assert for failed mktime conversion mktime returns -1 on error, so checking for != 0 is not useful. --- src/systemctl/systemctl.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/systemctl/systemctl.c b/src/systemctl/systemctl.c index d673223a27..34b2969f6a 100644 --- a/src/systemctl/systemctl.c +++ b/src/systemctl/systemctl.c @@ -7823,7 +7823,8 @@ static int parse_shutdown_time_spec(const char *t, usec_t *_u) { tm.tm_min = (int) minute; tm.tm_sec = 0; - assert_se(s = mktime(&tm)); + s = mktime(&tm); + assert(s >= 0); *_u = (usec_t) s * USEC_PER_SEC; From 7c30c3c44f84421bbe510fb3339378e075b3ab48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zbigniew=20J=C4=99drzejewski-Szmek?= Date: Thu, 5 Jul 2018 00:17:55 +0200 Subject: [PATCH 4/4] journal: store the original timestamp as SYSLOG_TIMESTAMP= This is useful if someone wants to recreate the original syslog datagram. We already include timestamp information as _SOURCE_REALTIME_TIMESTAMP=, and in normal use that timestamp, converted back to the form used by syslog (Mth dd HH:MM:SS) would usually give the value. But there are various circumstances where this might not be true. Most obviously, if the datagram is sent a bit later after being prepared, the time is rounded to the nearest second, and it might be off. This is especially bad around New Year when the syslog timestamp wraps around. Then the same timezone and locale need to be used to recreate the original timestamp. In the end doing this reliably is complicated, and it seems much easier to just unconditionally include the original timestamp. If the original timestamp cannot be located, we store the full log line. This way, it should be always possible to recreate the original input. Example: MESSAGE=x SYSLOG_TIMESTAMP=Sep 15 15:07:58 SYSLOG_RAW ^]^@^@^@^@^@^@^@<13>Sep 15 15:07:58 HOST: x^@y _PID=3318 _SOURCE_REALTIME_TIMESTAMP=1530743976393553 Fixes #2398. --- man/systemd.journal-fields.xml | 23 +++++++------ src/journal/journald-syslog.c | 63 ++++++++++++++++++++-------------- 2 files changed, 50 insertions(+), 36 deletions(-) diff --git a/man/systemd.journal-fields.xml b/man/systemd.journal-fields.xml index aeb5367d24..942c6ba91a 100644 --- a/man/systemd.journal-fields.xml +++ b/man/systemd.journal-fields.xml @@ -103,13 +103,13 @@ SYSLOG_FACILITY= SYSLOG_IDENTIFIER= SYSLOG_PID= + SYSLOG_TIMESTAMP= - Syslog compatibility fields containing the facility - (formatted as decimal string), the identifier string (i.e. - "tag"), and the client PID. (Note that the tag is usually - derived from glibc's - program_invocation_short_name variable, - see + Syslog compatibility fields containing the facility (formatted as + decimal string), the identifier string (i.e. "tag"), the client PID, and + the timestamp as specified in the original datagram. (Note that the tag is + usually derived from glibc's + program_invocation_short_name variable, see program_invocation_short_name3.) @@ -119,14 +119,17 @@ The original contents of the syslog line as received in the syslog datagram. This field is only included if the MESSAGE= - field was modified compared to the original payload. This happens when the - message contains leading or trailing whitespace (trailing and leading + field was modified compared to the original payload or the timestamp could + not be located properly and is not included in + SYSLOG_TIMESTAMP=. Message truncation occurs when when + the message contains leading or trailing whitespace (trailing and leading whitespace is stripped), or it contains an embedded NUL byte (the NUL byte and anything after it is not included). Thus, the original syslog line is either stored as SYSLOG_RAW= or it can be recreated - based on the stored priority, timestamp, hostname, and the message payload - stored in MESSAGE=. + based on the stored priority and facility, timestamp, identifier, and the + message payload in MESSAGE=. + diff --git a/src/journal/journald-syslog.c b/src/journal/journald-syslog.c index e2794ec30f..8ad204c38b 100644 --- a/src/journal/journald-syslog.c +++ b/src/journal/journald-syslog.c @@ -225,7 +225,7 @@ size_t syslog_parse_identifier(const char **buf, char **identifier, char **pid) return e; } -static void syslog_skip_date(const char **buf) { +static int syslog_skip_timestamp(const char **buf) { enum { LETTER, SPACE, @@ -245,24 +245,21 @@ static void syslog_skip_date(const char **buf) { SPACE }; - const char *p; + const char *p, *t; unsigned i; assert(buf); assert(*buf); - p = *buf; - - for (i = 0; i < ELEMENTSOF(sequence); i++, p++) { - + for (i = 0, p = *buf; i < ELEMENTSOF(sequence); i++, p++) { if (!*p) - return; + return 0; switch (sequence[i]) { case SPACE: if (*p != ' ') - return; + return 0; break; case SPACE_OR_NUMBER: @@ -272,26 +269,28 @@ static void syslog_skip_date(const char **buf) { _fallthrough_; case NUMBER: if (*p < '0' || *p > '9') - return; + return 0; break; case LETTER: if (!(*p >= 'A' && *p <= 'Z') && !(*p >= 'a' && *p <= 'z')) - return; + return 0; break; case COLON: if (*p != ':') - return; + return 0; break; } } + t = *buf; *buf = p; + return p - t; } void server_process_syslog_message( @@ -303,14 +302,14 @@ void server_process_syslog_message( const char *label, size_t label_len) { - char syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)], - syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; - const char *message = NULL, *syslog_identifier = NULL, *syslog_pid = NULL, *msg; + char *t, syslog_priority[sizeof("PRIORITY=") + DECIMAL_STR_MAX(int)], + syslog_facility[sizeof("SYSLOG_FACILITY=") + DECIMAL_STR_MAX(int)]; + const char *msg, *syslog_ts, *a; _cleanup_free_ char *identifier = NULL, *pid = NULL; int priority = LOG_USER | LOG_INFO, r; ClientContext *context = NULL; struct iovec *iovec; - size_t n = 0, m, i, leading_ws; + size_t n = 0, m, i, leading_ws, syslog_ts_len; bool store_raw; assert(s); @@ -339,8 +338,6 @@ void server_process_syslog_message( /* Nice! No need to strip anything on the end, let's optimize this a bit */ msg = buf + leading_ws; else { - char *t; - msg = t = newa(char, i - leading_ws + 1); memcpy(t, buf + leading_ws, i - leading_ws); t[i - leading_ws] = 0; @@ -355,7 +352,12 @@ void server_process_syslog_message( if (!client_context_test_priority(context, priority)) return; - syslog_skip_date(&msg); + syslog_ts = msg; + syslog_ts_len = syslog_skip_timestamp(&msg); + if (syslog_ts_len == 0) + /* We failed to parse the full timestamp, store the raw message too */ + store_raw = true; + syslog_parse_identifier(&msg, &identifier, &pid); if (s->forward_to_syslog) @@ -370,7 +372,7 @@ void server_process_syslog_message( if (s->forward_to_wall) server_forward_wall(s, priority, identifier, msg, ucred); - m = N_IOVEC_META_FIELDS + 7 + client_context_extra_fields_n_iovec(context); + m = N_IOVEC_META_FIELDS + 8 + client_context_extra_fields_n_iovec(context); iovec = newa(struct iovec, m); iovec[n++] = IOVEC_MAKE_STRING("_TRANSPORT=syslog"); @@ -384,21 +386,30 @@ void server_process_syslog_message( } if (identifier) { - syslog_identifier = strjoina("SYSLOG_IDENTIFIER=", identifier); - iovec[n++] = IOVEC_MAKE_STRING(syslog_identifier); + a = strjoina("SYSLOG_IDENTIFIER=", identifier); + iovec[n++] = IOVEC_MAKE_STRING(a); } if (pid) { - syslog_pid = strjoina("SYSLOG_PID=", pid); - iovec[n++] = IOVEC_MAKE_STRING(syslog_pid); + a = strjoina("SYSLOG_PID=", pid); + iovec[n++] = IOVEC_MAKE_STRING(a); } - message = strjoina("MESSAGE=", msg); - iovec[n++] = IOVEC_MAKE_STRING(message); + if (syslog_ts_len > 0) { + const size_t hlen = strlen("SYSLOG_TIMESTAMP="); + + t = newa(char, hlen + raw_len); + memcpy(t, "SYSLOG_TIMESTAMP=", hlen); + memcpy(t + hlen, syslog_ts, syslog_ts_len); + + iovec[n++] = IOVEC_MAKE(t, hlen + syslog_ts_len); + } + + a = strjoina("MESSAGE=", msg); + iovec[n++] = IOVEC_MAKE_STRING(a); if (store_raw) { const size_t hlen = strlen("SYSLOG_RAW="); - char *t; t = newa(char, hlen + raw_len); memcpy(t, "SYSLOG_RAW=", hlen);