basic/utf8: do not read past end of string when looking for a multi-byte character

Fixes https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=9341.
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-02-26 12:37:40 +01:00
parent 84319aa76e
commit 92e068b465
8 changed files with 42 additions and 26 deletions

View File

@ -28,7 +28,7 @@ int encode_devnode_name(const char *str, char *str_enc, size_t len) {
for (i = 0, j = 0; str[i] != '\0'; i++) {
int seqlen;
seqlen = utf8_encoded_valid_unichar(&str[i]);
seqlen = utf8_encoded_valid_unichar(str + i, (size_t) -1);
if (seqlen > 1) {
if (len-j < (size_t)seqlen)

View File

@ -128,14 +128,14 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) {
assert(str);
for (p = str; length;) {
for (p = str; length > 0;) {
int encoded_len, r;
char32_t val;
encoded_len = utf8_encoded_valid_unichar(p);
if (encoded_len < 0 ||
(size_t) encoded_len > length)
encoded_len = utf8_encoded_valid_unichar(p, length);
if (encoded_len < 0)
return false;
assert(encoded_len > 0 && (size_t) encoded_len <= length);
r = utf8_encoded_to_unichar(p, &val);
if (r < 0 ||
@ -159,7 +159,7 @@ char *utf8_is_valid(const char *str) {
while (*p) {
int len;
len = utf8_encoded_valid_unichar(p);
len = utf8_encoded_valid_unichar(p, (size_t) -1);
if (len < 0)
return NULL;
@ -181,7 +181,7 @@ char *utf8_escape_invalid(const char *str) {
while (*str) {
int len;
len = utf8_encoded_valid_unichar(str);
len = utf8_encoded_valid_unichar(str, (size_t) -1);
if (len > 0) {
s = mempcpy(s, str, len);
str += len;
@ -208,7 +208,7 @@ char *utf8_escape_non_printable(const char *str) {
while (*str) {
int len;
len = utf8_encoded_valid_unichar(str);
len = utf8_encoded_valid_unichar(str, (size_t) -1);
if (len > 0) {
if (utf8_is_printable(str, len)) {
s = mempcpy(s, str, len);
@ -452,17 +452,24 @@ static int utf8_unichar_to_encoded_len(char32_t unichar) {
}
/* validate one encoded unicode char and return its length */
int utf8_encoded_valid_unichar(const char *str) {
int utf8_encoded_valid_unichar(const char *str, size_t length /* bytes */) {
char32_t unichar;
size_t len, i;
int r;
assert(str);
assert(length > 0);
/* We read until NUL, at most length bytes. (size_t) -1 may be used to disable the length check. */
len = utf8_encoded_expected_len(str[0]);
if (len == 0)
return -EINVAL;
/* Do we have a truncated multi-byte character? */
if (len > length)
return -EINVAL;
/* ascii is valid */
if (len == 1)
return 1;
@ -495,7 +502,7 @@ size_t utf8_n_codepoints(const char *str) {
while (*str != 0) {
int k;
k = utf8_encoded_valid_unichar(str);
k = utf8_encoded_valid_unichar(str, (size_t) -1);
if (k < 0)
return (size_t) -1;

View File

@ -32,7 +32,7 @@ char16_t *utf8_to_utf16(const char *s, size_t length);
size_t char16_strlen(const char16_t *s); /* returns the number of 16bit words in the string (not bytes!) */
int utf8_encoded_valid_unichar(const char *str);
int utf8_encoded_valid_unichar(const char *str, size_t length);
int utf8_encoded_to_unichar(const char *str, char32_t *ret_unichar);
static inline bool utf16_is_surrogate(char16_t c) {

View File

@ -175,7 +175,7 @@ size_t util_replace_chars(char *str, const char *white) {
}
/* accept valid utf8 */
len = utf8_encoded_valid_unichar(&str[i]);
len = utf8_encoded_valid_unichar(str + i, (size_t) -1);
if (len > 1) {
i += len;
continue;

View File

@ -385,13 +385,13 @@ int ask_password_tty(
if (!(flags & ASK_PASSWORD_SILENT))
backspace_chars(ttyfd, 1);
/* Remove a full UTF-8 codepoint from the end. For that, figure out where the last one
* begins */
/* Remove a full UTF-8 codepoint from the end. For that, figure out where the
* last one begins */
q = 0;
for (;;) {
size_t z;
z = utf8_encoded_valid_unichar(passphrase + q);
z = utf8_encoded_valid_unichar(passphrase + q, (size_t) -1);
if (z == 0) {
q = (size_t) -1; /* Invalid UTF8! */
break;
@ -410,8 +410,8 @@ int ask_password_tty(
flags |= ASK_PASSWORD_SILENT;
/* There are two ways to enter silent mode. Either by pressing backspace as first key
* (and only as first key), or ... */
/* There are two ways to enter silent mode. Either by pressing backspace as
* first key (and only as first key), or ... */
if (ttyfd >= 0)
(void) loop_write(ttyfd, "(no echo) ", 10, false);
@ -440,7 +440,7 @@ int ask_password_tty(
if (!(flags & ASK_PASSWORD_SILENT) && ttyfd >= 0) {
/* Check if we got a complete UTF-8 character now. If so, let's output one '*'. */
n = utf8_encoded_valid_unichar(passphrase + codepoint);
n = utf8_encoded_valid_unichar(passphrase + codepoint, (size_t) -1);
if (n >= 0) {
codepoint = p;
(void) loop_write(ttyfd, (flags & ASK_PASSWORD_ECHO) ? &c : "*", 1, false);

View File

@ -1756,7 +1756,6 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
assert(s || n == 0);
while (n > 0) {
if (*s == '\n') {
(*line)++;
*column = 1;
@ -1765,7 +1764,7 @@ static void inc_lines_columns(unsigned *line, unsigned *column, const char *s, s
else {
int w;
w = utf8_encoded_valid_unichar(s);
w = utf8_encoded_valid_unichar(s, n);
if (w < 0) /* count invalid unichars as normal characters */
w = 1;
else if ((size_t) w > n) /* never read more than the specified number of characters */
@ -1930,7 +1929,7 @@ static int json_parse_string(const char **p, char **ret) {
continue;
}
len = utf8_encoded_valid_unichar(c);
len = utf8_encoded_valid_unichar(c, (size_t) -1);
if (len < 0)
return len;

View File

@ -36,11 +36,21 @@ static void test_ascii_is_valid_n(void) {
}
static void test_utf8_encoded_valid_unichar(void) {
assert_se(utf8_encoded_valid_unichar("\342\204\242") == 3);
assert_se(utf8_encoded_valid_unichar("\302\256") == 2);
assert_se(utf8_encoded_valid_unichar("a") == 1);
assert_se(utf8_encoded_valid_unichar("\341\204") < 0);
assert_se(utf8_encoded_valid_unichar("\341\204\341\204") < 0);
assert_se(utf8_encoded_valid_unichar("\342\204\242", 1) == -EINVAL); /* truncated */
assert_se(utf8_encoded_valid_unichar("\342\204\242", 2) == -EINVAL); /* truncated */
assert_se(utf8_encoded_valid_unichar("\342\204\242", 3) == 3);
assert_se(utf8_encoded_valid_unichar("\342\204\242", 4) == 3);
assert_se(utf8_encoded_valid_unichar("\302\256", 1) == -EINVAL); /* truncated */
assert_se(utf8_encoded_valid_unichar("\302\256", 2) == 2);
assert_se(utf8_encoded_valid_unichar("\302\256", 3) == 2);
assert_se(utf8_encoded_valid_unichar("\302\256", (size_t) -1) == 2);
assert_se(utf8_encoded_valid_unichar("a", 1) == 1);
assert_se(utf8_encoded_valid_unichar("a", 2) == 1);
assert_se(utf8_encoded_valid_unichar("\341\204", 1) == -EINVAL); /* truncated, potentially valid */
assert_se(utf8_encoded_valid_unichar("\341\204", 2) == -EINVAL); /* truncated, potentially valid */
assert_se(utf8_encoded_valid_unichar("\341\204", 3) == -EINVAL);
assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 4) == -EINVAL);
assert_se(utf8_encoded_valid_unichar("\341\204\341\204", 5) == -EINVAL);
}
static void test_utf8_escaping(void) {

Binary file not shown.