diff --git a/src/basic/device-nodes.c b/src/basic/device-nodes.c index 5fcdf24bd2..5ebe5b2483 100644 --- a/src/basic/device-nodes.c +++ b/src/basic/device-nodes.c @@ -28,7 +28,7 @@ int encode_devnode_name(const char *str, char *str_enc, size_t len) { for (i = 0, j = 0; str[i] != '\0'; i++) { int seqlen; - seqlen = utf8_encoded_valid_unichar(&str[i]); + seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1); if (seqlen > 1) { if (len-j < (size_t)seqlen) diff --git a/src/basic/utf8.c b/src/basic/utf8.c index ffe0adb33c..090c69d140 100644 --- a/src/basic/utf8.c +++ b/src/basic/utf8.c @@ -128,14 +128,14 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) { assert(str); - for (p = str; length;) { + for (p = str; length > 0;) { int encoded_len, r; char32_t val; - encoded_len = utf8_encoded_valid_unichar(p); - if (encoded_len < 0 || - (size_t) encoded_len > length) + encoded_len = utf8_encoded_valid_unichar(p, length); + if (encoded_len < 0) return false; + assert(encoded_len > 0 && (size_t) encoded_len <= length); r = utf8_encoded_to_unichar(p, &val); if (r < 0 || @@ -159,7 +159,7 @@ char *utf8_is_valid(const char *str) { while (*p) { int len; - len = utf8_encoded_valid_unichar(p); + len = utf8_encoded_valid_unichar(p, (size_t) -1); if (len < 0) return NULL; @@ -181,7 +181,7 @@ char *utf8_escape_invalid(const char *str) { while (*str) { int len; - len = utf8_encoded_valid_unichar(str); + len = utf8_encoded_valid_unichar(str, (size_t) -1); if (len > 0) { s = mempcpy(s, str, len); str += len; @@ -208,7 +208,7 @@ char *utf8_escape_non_printable(const char *str) { while (*str) { int len; - len = utf8_encoded_valid_unichar(str); + len = utf8_encoded_valid_unichar(str, (size_t) -1); if (len > 0) { if (utf8_is_printable(str, len)) { s = mempcpy(s, str, len); @@ -452,17 +452,24 @@ static int utf8_unichar_to_encoded_len(char32_t unichar) { } /* validate one encoded unicode char and return its length */ -int utf8_encoded_valid_unichar(const char *str) { +int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) { char32_t unichar; size_t len, i; int r; assert(str); + assert(length > 0); + + /* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */ len = utf8_encoded_expected_len(str[0]); if (len == 0) return -EINVAL; + /* Do we have a truncated multi-byte character? */ + if (len > length) + return -EINVAL; + /* ascii is valid */ if (len == 1) return 1; @@ -495,7 +502,7 @@ size_t utf8_n_codepoints(const char *str) { while (*str != 0) { int k; - k = utf8_encoded_valid_unichar(str); + k = utf8_encoded_valid_unichar(str, (size_t) -1); if (k < 0) return (size_t) -1; diff --git a/src/basic/utf8.h b/src/basic/utf8.h index 628456936e..6df70921db 100644 --- a/src/basic/utf8.h +++ b/src/basic/utf8.h @@ -32,7 +32,7 @@ char16_t *utf8_to_utf16(const char *s, size_t length); size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */ -int utf8_encoded_valid_unichar(const char *str); +int utf8_encoded_valid_unichar(const char *str, size_t length); int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar); static inline bool utf16_is_surrogate(char16_t c) { diff --git a/src/libudev/libudev-util.c b/src/libudev/libudev-util.c index 7e21719fbf..37660d0313 100644 --- a/src/libudev/libudev-util.c +++ b/src/libudev/libudev-util.c @@ -175,7 +175,7 @@ size_t util_replace_chars(char *str, const char *white) { } /* accept valid utf8 */ - len = utf8_encoded_valid_unichar(&str[i]); + len = utf8_encoded_valid_unichar(str + i, (size_t) -1); if (len > 1) { i += len; continue; diff --git a/src/shared/ask-password-api.c b/src/shared/ask-password-api.c index 072bf72c56..bc5e1cf669 100644 --- a/src/shared/ask-password-api.c +++ b/src/shared/ask-password-api.c @@ -385,13 +385,13 @@ int ask_password_tty( if (!(flags & ASK_PASSWORD_SILENT)) backspace_chars(ttyfd, 1); - /* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one - * begins */ + /* Remove a full UTF-8 codepoint from the end. For that, figure out where the + * last one begins */ q = 0; for (;;) { size_t z; - z = utf8_encoded_valid_unichar(passphrase + q); + z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1); if (z == 0) { q = (size_t) -1; /* Invalid UTF8! */ break; @@ -410,8 +410,8 @@ int ask_password_tty( flags |= ASK_PASSWORD_SILENT; - /* There are two ways to enter silent mode. Either by pressing backspace as first key - * (and only as first key), or ... */ + /* There are two ways to enter silent mode. Either by pressing backspace as + * first key (and only as first key), or ... */ if (ttyfd >= 0) (void) loop_write(ttyfd, "(no echo) ", 10, false); @@ -440,7 +440,7 @@ int ask_password_tty( if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) { /* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */ - n = utf8_encoded_valid_unichar(passphrase + codepoint); + n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1); if (n >= 0) { codepoint = p; (void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false); diff --git a/src/shared/json.c b/src/shared/json.c index 3786ff12b8..7ae1ffb1b2 100644 --- a/src/shared/json.c +++ b/src/shared/json.c @@ -1756,7 +1756,6 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s assert(s || n == 0); while (n > 0) { - if (*s == '\n') { (*line)++; *column = 1; @@ -1765,7 +1764,7 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s else { int w; - w = utf8_encoded_valid_unichar(s); + w = utf8_encoded_valid_unichar(s, n); if (w < 0) /* count invalid unichars as normal characters */ w = 1; else if ((size_t) w > n) /* never read more than the specified number of characters */ @@ -1930,7 +1929,7 @@ static int json_parse_string(const char **p, char **ret) { continue; } - len = utf8_encoded_valid_unichar(c); + len = utf8_encoded_valid_unichar(c, (size_t) -1); if (len < 0) return len; diff --git a/src/test/test-utf8.c b/src/test/test-utf8.c index 9849530ac8..d1e48da2a6 100644 --- a/src/test/test-utf8.c +++ b/src/test/test-utf8.c @@ -36,11 +36,21 @@ static void test_ascii_is_valid_n(void) { } static void test_utf8_encoded_valid_unichar(void) { - assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3); - assert_se(utf8_encoded_valid_unichar("\302\256") == 2); - assert_se(utf8_encoded_valid_unichar("a") == 1); - assert_se(utf8_encoded_valid_unichar("\341\204") < 0); - assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0); + assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */ + assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */ + assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3); + assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3); + assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */ + assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2); + assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2); + assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2); + assert_se(utf8_encoded_valid_unichar("a", 1) == 1); + assert_se(utf8_encoded_valid_unichar("a", 2) == 1); + assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */ + assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */ + assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL); + assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL); + assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL); } static void test_utf8_escaping(void) { diff --git a/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 new file mode 100644 index 0000000000..3ddac6b895 Binary files /dev/null and b/test/fuzz/fuzz-journal-remote/oss-fuzz-9341 differ