Systemd/src/shared/json.c
Tom Gundersen 9bae67d49b shared: json - support escaping utf16 surrogate pairs
We originally only supported escaping ucs2 encoded characters (as \uxxxx). This
only covers the BMP. Support escaping also utf16 surrogate pairs (on the form
\uxxxx\uyyyy) to cover all of unicode.
2014-12-22 20:27:20 +01:00

447 lines
14 KiB
C

/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <sys/types.h>
#include <math.h>
#include "macro.h"
#include "log.h"
#include "util.h"
#include "utf8.h"
#include "json.h"
enum {
STATE_NULL,
STATE_VALUE,
STATE_VALUE_POST,
};
static void inc_lines(unsigned *line, const char *s, size_t n) {
const char *p = s;
if (!line)
return;
for (;;) {
const char *f;
f = memchr(p, '\n', n);
if (!f)
return;
n -= (f - p) + 1;
p = f + 1;
(*line)++;
}
}
static int unhex_ucs2(const char *c, uint16_t *ret) {
int aa, bb, cc, dd;
uint16_t x;
assert(c);
assert(ret);
aa = unhexchar(c[0]);
if (aa < 0)
return -EINVAL;
bb = unhexchar(c[1]);
if (bb < 0)
return -EINVAL;
cc = unhexchar(c[2]);
if (cc < 0)
return -EINVAL;
dd = unhexchar(c[3]);
if (dd < 0)
return -EINVAL;
x = ((uint16_t) aa << 12) |
((uint16_t) bb << 8) |
((uint16_t) cc << 4) |
((uint16_t) dd);
if (x <= 0)
return -EINVAL;
*ret = x;
return 0;
}
static int json_parse_string(const char **p, char **ret) {
_cleanup_free_ char *s = NULL;
size_t n = 0, allocated = 0;
const char *c;
assert(p);
assert(*p);
assert(ret);
c = *p;
if (*c != '"')
return -EINVAL;
c++;
for (;;) {
int len;
/* Check for EOF */
if (*c == 0)
return -EINVAL;
/* Check for control characters 0x00..0x1f */
if (*c > 0 && *c < ' ')
return -EINVAL;
/* Check for control character 0x7f */
if (*c == 0x7f)
return -EINVAL;
if (*c == '"') {
if (!s) {
s = strdup("");
if (!s)
return -ENOMEM;
} else
s[n] = 0;
*p = c + 1;
*ret = s;
s = NULL;
return JSON_STRING;
}
if (*c == '\\') {
char ch = 0;
c++;
if (*c == 0)
return -EINVAL;
if (IN_SET(*c, '"', '\\', '/'))
ch = *c;
else if (*c == 'b')
ch = '\b';
else if (*c == 'f')
ch = '\f';
else if (*c == 'n')
ch = '\n';
else if (*c == 'r')
ch = '\r';
else if (*c == 't')
ch = '\t';
else if (*c == 'u') {
uint16_t x;
int r;
r = unhex_ucs2(c + 1, &x);
if (r < 0)
return r;
c += 5;
if (!GREEDY_REALLOC(s, allocated, n + 4))
return -ENOMEM;
if (!utf16_is_surrogate(x))
n += utf8_encode_unichar(s + n, x);
else if (utf16_is_trailing_surrogate(x))
return -EINVAL;
else {
uint16_t y;
if (c[0] != '\\' || c[1] != 'u')
return -EINVAL;
r = unhex_ucs2(c + 2, &y);
if (r < 0)
return r;
c += 6;
if (!utf16_is_trailing_surrogate(y))
return -EINVAL;
n += utf8_encode_unichar(s + n, utf16_surrogate_pair_to_unichar(x, y));
}
continue;
} else
return -EINVAL;
if (!GREEDY_REALLOC(s, allocated, n + 2))
return -ENOMEM;
s[n++] = ch;
c ++;
continue;
}
len = utf8_encoded_valid_unichar(c);
if (len < 0)
return len;
if (!GREEDY_REALLOC(s, allocated, n + len + 1))
return -ENOMEM;
memcpy(s + n, c, len);
n += len;
c += len;
}
}
static int json_parse_number(const char **p, union json_value *ret) {
bool negative = false, exponent_negative = false, is_double = false;
double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
intmax_t i = 0;
const char *c;
assert(p);
assert(*p);
assert(ret);
c = *p;
if (*c == '-') {
negative = true;
c++;
}
if (*c == '0')
c++;
else {
if (!strchr("123456789", *c) || *c == 0)
return -EINVAL;
do {
if (!is_double) {
int64_t t;
t = 10 * i + (*c - '0');
if (t < i) /* overflow */
is_double = false;
else
i = t;
}
x = 10.0 * x + (*c - '0');
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c == '.') {
is_double = true;
c++;
if (!strchr("0123456789", *c) || *c == 0)
return -EINVAL;
do {
y = 10.0 * y + (*c - '0');
shift = 10.0 * shift;
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c == 'e' || *c == 'E') {
is_double = true;
c++;
if (*c == '-') {
exponent_negative = true;
c++;
} else if (*c == '+')
c++;
if (!strchr("0123456789", *c) || *c == 0)
return -EINVAL;
do {
exponent = 10.0 * exponent + (*c - '0');
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c != 0)
return -EINVAL;
*p = c;
if (is_double) {
ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
return JSON_REAL;
} else {
ret->integer = negative ? -i : i;
return JSON_INTEGER;
}
}
int json_tokenize(
const char **p,
char **ret_string,
union json_value *ret_value,
void **state,
unsigned *line) {
const char *c;
int t;
int r;
assert(p);
assert(*p);
assert(ret_string);
assert(ret_value);
assert(state);
t = PTR_TO_INT(*state);
c = *p;
if (t == STATE_NULL) {
if (line)
*line = 1;
t = STATE_VALUE;
}
for (;;) {
const char *b;
b = c + strspn(c, WHITESPACE);
if (*b == 0)
return JSON_END;
inc_lines(line, c, b - c);
c = b;
switch (t) {
case STATE_VALUE:
if (*c == '{') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_OBJECT_OPEN;
} else if (*c == '}') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_OBJECT_CLOSE;
} else if (*c == '[') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_ARRAY_OPEN;
} else if (*c == ']') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_ARRAY_CLOSE;
} else if (*c == '"') {
r = json_parse_string(&c, ret_string);
if (r < 0)
return r;
*ret_value = JSON_VALUE_NULL;
*p = c;
*state = INT_TO_PTR(STATE_VALUE_POST);
return r;
} else if (strchr("-0123456789", *c)) {
r = json_parse_number(&c, ret_value);
if (r < 0)
return r;
*ret_string = NULL;
*p = c;
*state = INT_TO_PTR(STATE_VALUE_POST);
return r;
} else if (startswith(c, "true")) {
*ret_string = NULL;
ret_value->boolean = true;
*p = c + 4;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_BOOLEAN;
} else if (startswith(c, "false")) {
*ret_string = NULL;
ret_value->boolean = false;
*p = c + 5;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_BOOLEAN;
} else if (startswith(c, "null")) {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 4;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_NULL;
} else
return -EINVAL;
case STATE_VALUE_POST:
if (*c == ':') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_COLON;
} else if (*c == ',') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_COMMA;
} else if (*c == '}') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_OBJECT_CLOSE;
} else if (*c == ']') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_ARRAY_CLOSE;
} else
return -EINVAL;
}
}
}