shared: add minimal JSON tokenizer

This commit is contained in:
Lennart Poettering 2014-12-15 22:26:56 +01:00
parent c532d8a00c
commit e7eebcfc42
8 changed files with 595 additions and 25 deletions

1
.gitignore vendored
View file

@ -199,6 +199,7 @@
/test-journal-stream
/test-journal-syslog
/test-journal-verify
/test-json
/test-libsystemd-sym*
/test-libudev
/test-libudev-sym*

View file

@ -868,6 +868,8 @@ libsystemd_shared_la_SOURCES = \
src/shared/audit.h \
src/shared/xml.c \
src/shared/xml.h \
src/shared/json.c \
src/shared/json.h \
src/shared/bus-label.c \
src/shared/bus-label.h \
src/shared/gpt.h \
@ -1366,6 +1368,7 @@ tests += \
test-tables \
test-device-nodes \
test-xml \
test-json \
test-architecture \
test-socket-util \
test-fdset \
@ -1686,6 +1689,13 @@ test_xml_SOURCES = \
test_xml_LDADD = \
libsystemd-shared.la
test_json_SOURCES = \
src/test/test-json.c
test_json_LDADD = \
libsystemd-shared.la \
-lm
test_list_SOURCES = \
src/test/test-list.c

409
src/shared/json.c Normal file
View file

@ -0,0 +1,409 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <sys/types.h>
#include <math.h>
#include "macro.h"
#include "log.h"
#include "util.h"
#include "utf8.h"
#include "json.h"
enum {
STATE_NULL,
STATE_VALUE,
STATE_VALUE_POST,
};
static void inc_lines(unsigned *line, const char *s, size_t n) {
const char *p = s;
if (!line)
return;
for (;;) {
const char *f;
f = memchr(p, '\n', n);
if (!f)
return;
n -= (f - p) + 1;
p = f + 1;
(*line)++;
}
}
static int json_parse_string(const char **p, char **ret) {
_cleanup_free_ char *s = NULL;
size_t n = 0, allocated = 0;
const char *c;
assert(p);
assert(*p);
assert(ret);
c = *p;
if (*c != '"')
return -EINVAL;
c++;
for (;;) {
int len;
/* Check for EOF */
if (*c == 0)
return -EINVAL;
/* Check for control characters 0x00..0x1f */
if (*c > 0 && *c < ' ')
return -EINVAL;
/* Check for control character 0x7f */
if (*c == 0x7f)
return -EINVAL;
if (*c == '"') {
if (!s) {
s = strdup("");
if (!s)
return -ENOMEM;
} else
s[n] = 0;
*p = c + 1;
*ret = s;
s = NULL;
return JSON_STRING;
}
if (*c == '\\') {
char ch = 0;
c++;
if (*c == 0)
return -EINVAL;
if (IN_SET(*c, '"', '\\', '/'))
ch = *c;
else if (*c == 'b')
ch = '\b';
else if (*c == 'f')
ch = '\f';
else if (*c == 'n')
ch = '\n';
else if (*c == 'r')
ch = '\r';
else if (*c == 't')
ch = '\t';
else if (*c == 'u') {
int aa, bb, cc, dd;
uint16_t x;
aa = unhexchar(c[1]);
if (aa < 0)
return -EINVAL;
bb = unhexchar(c[2]);
if (bb < 0)
return -EINVAL;
cc = unhexchar(c[3]);
if (cc < 0)
return -EINVAL;
dd = unhexchar(c[4]);
if (dd < 0)
return -EINVAL;
x = ((uint16_t) aa << 12) |
((uint16_t) bb << 8) |
((uint16_t) cc << 4) |
((uint16_t) dd);
if (x <= 0)
return -EINVAL;
if (!GREEDY_REALLOC(s, allocated, n + 4))
return -ENOMEM;
n += utf8_encode_unichar(x, s + n);
c += 5;
continue;
} else
return -EINVAL;
if (!GREEDY_REALLOC(s, allocated, n + 2))
return -ENOMEM;
s[n++] = ch;
c ++;
continue;
}
len = utf8_encoded_valid_unichar(c);
if (len < 0)
return len;
if (!GREEDY_REALLOC(s, allocated, n + len + 1))
return -ENOMEM;
memcpy(s + n, c, len);
n += len;
c += len;
}
}
static int json_parse_number(const char **p, union json_value *ret) {
bool negative = false, exponent_negative = false, is_double = false;
double x = 0.0, y = 0.0, exponent = 0.0, shift = 1.0;
intmax_t i = 0;
const char *c;
assert(p);
assert(*p);
assert(ret);
c = *p;
if (*c == '-') {
negative = true;
c++;
}
if (*c == '0')
c++;
else {
if (!strchr("123456789", *c) || *c == 0)
return -EINVAL;
do {
if (!is_double) {
int64_t t;
t = 10 * i + (*c - '0');
if (t < i) /* overflow */
is_double = false;
else
i = t;
}
x = 10.0 * x + (*c - '0');
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c == '.') {
is_double = true;
c++;
if (!strchr("0123456789", *c) || *c == 0)
return -EINVAL;
do {
y = 10.0 * y + (*c - '0');
shift = 10.0 * shift;
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c == 'e' || *c == 'E') {
is_double = true;
c++;
if (*c == '-') {
exponent_negative = true;
c++;
} else if (*c == '+')
c++;
if (!strchr("0123456789", *c) || *c == 0)
return -EINVAL;
do {
exponent = 10.0 * exponent + (*c - '0');
c++;
} while (strchr("0123456789", *c) && *c != 0);
}
if (*c != 0)
return -EINVAL;
*p = c;
if (is_double) {
ret->real = ((negative ? -1.0 : 1.0) * (x + (y / shift))) * exp10((exponent_negative ? -1.0 : 1.0) * exponent);
return JSON_REAL;
} else {
ret->integer = negative ? -i : i;
return JSON_INTEGER;
}
}
int json_tokenize(
const char **p,
char **ret_string,
union json_value *ret_value,
void **state,
unsigned *line) {
const char *c;
int t;
int r;
assert(p);
assert(*p);
assert(ret_string);
assert(ret_value);
assert(state);
t = PTR_TO_INT(*state);
c = *p;
if (t == STATE_NULL) {
if (line)
*line = 1;
t = STATE_VALUE;
}
for (;;) {
const char *b;
b = c + strspn(c, WHITESPACE);
if (*b == 0)
return JSON_END;
inc_lines(line, c, b - c);
c = b;
switch (t) {
case STATE_VALUE:
if (*c == '{') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_OBJECT_OPEN;
} else if (*c == '}') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_OBJECT_CLOSE;
} else if (*c == '[') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_ARRAY_OPEN;
} else if (*c == ']') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_ARRAY_CLOSE;
} else if (*c == '"') {
r = json_parse_string(&c, ret_string);
if (r < 0)
return r;
*ret_value = JSON_VALUE_NULL;
*p = c;
*state = INT_TO_PTR(STATE_VALUE_POST);
return r;
} else if (strchr("-0123456789", *c)) {
r = json_parse_number(&c, ret_value);
if (r < 0)
return r;
*ret_string = NULL;
*p = c;
*state = INT_TO_PTR(STATE_VALUE_POST);
return r;
} else if (startswith(c, "true")) {
*ret_string = NULL;
ret_value->boolean = true;
*p = c + 4;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_BOOLEAN;
} else if (startswith(c, "false")) {
*ret_string = NULL;
ret_value->boolean = false;
*p = c + 5;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_BOOLEAN;
} else if (startswith(c, "null")) {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 4;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_NULL;
} else
return -EINVAL;
case STATE_VALUE_POST:
if (*c == ':') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_COLON;
} else if (*c == ',') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE);
return JSON_COMMA;
} else if (*c == '}') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_OBJECT_CLOSE;
} else if (*c == ']') {
*ret_string = NULL;
*ret_value = JSON_VALUE_NULL;
*p = c + 1;
*state = INT_TO_PTR(STATE_VALUE_POST);
return JSON_ARRAY_CLOSE;
} else
return -EINVAL;
}
}
}

50
src/shared/json.h Normal file
View file

@ -0,0 +1,50 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#pragma once
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <stdbool.h>
#include <inttypes.h>
enum {
JSON_END,
JSON_COLON,
JSON_COMMA,
JSON_OBJECT_OPEN,
JSON_OBJECT_CLOSE,
JSON_ARRAY_OPEN,
JSON_ARRAY_CLOSE,
JSON_STRING,
JSON_REAL,
JSON_INTEGER,
JSON_BOOLEAN,
JSON_NULL,
};
union json_value {
bool boolean;
double real;
intmax_t integer;
};
#define JSON_VALUE_NULL ((union json_value) {})
int json_tokenize(const char **p, char **ret_string, union json_value *ret_value, void **state, unsigned *line);

View file

@ -263,39 +263,37 @@ char *ascii_is_valid(const char *str) {
return (char*) str;
}
int utf8_encode_unichar(uint16_t c, char *p) {
uint8_t *t = (uint8_t*) p;
int d;
if (c < 0x80) {
t[0] = (uint8_t) c;
return 1;
} else if (c < 0x800) {
t[0] = (uint8_t) (0xc0 | (c >> 6));
t[1] = (uint8_t) (0x80 | (c & 0x3f));
return 2;
} else {
t[0] = (uint8_t) (0xe0 | (c >> 12));
t[1] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
t[2] = (uint8_t) (0x80 | (c & 0x3f));
return 3;
}
}
char *utf16_to_utf8(const void *s, size_t length) {
char *r;
const uint8_t *f;
uint8_t *t;
char *r, *t;
r = new(char, (length*3+1)/2 + 1);
if (!r)
return NULL;
t = (uint8_t*) r;
for (f = s; f < (const uint8_t*) s + length; f += 2) {
uint16_t c;
c = (f[1] << 8) | f[0];
if (c == 0) {
*t = 0;
return r;
} else if (c < 0x80) {
*(t++) = (uint8_t) c;
} else if (c < 0x800) {
*(t++) = (uint8_t) (0xc0 | (c >> 6));
*(t++) = (uint8_t) (0x80 | (c & 0x3f));
} else {
*(t++) = (uint8_t) (0xe0 | (c >> 12));
*(t++) = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
*(t++) = (uint8_t) (0x80 | (c & 0x3f));
}
}
for (f = s, t = r; f < (const uint8_t*) s + length; f += 2)
t += utf8_encode_unichar((f[1] << 8) | f[0], t);
*t = 0;
return r;
}

View file

@ -36,6 +36,7 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool newline) _pu
char *utf8_escape_invalid(const char *s);
char *utf8_escape_non_printable(const char *str);
int utf8_encode_unichar(uint16_t c, char *p);
char *utf16_to_utf8(const void *s, size_t length);
int utf8_encoded_valid_unichar(const char *str);

View file

@ -28,7 +28,7 @@ enum {
XML_TAG_CLOSE,
XML_TAG_CLOSE_EMPTY,
XML_ATTRIBUTE_NAME,
XML_ATTRIBUTE_VALUE
XML_ATTRIBUTE_VALUE,
};
int xml_tokenize(const char **p, char **name, void **state, unsigned *line);

101
src/test/test-json.c Normal file
View file

@ -0,0 +1,101 @@
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2014 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include "log.h"
#include "util.h"
#include "json.h"
static void test_one(const char *data, ...) {
void *state = NULL;
va_list ap;
va_start(ap, data);
for (;;) {
_cleanup_free_ char *str = NULL;
union json_value v = {};
int t, tt;
t = json_tokenize(&data, &str, &v, &state, NULL);
tt = va_arg(ap, int);
assert_se(t == tt);
if (t == JSON_END || t < 0)
break;
else if (t == JSON_STRING) {
const char *nn;
nn = va_arg(ap, const char *);
assert_se(streq_ptr(nn, str));
} else if (t == JSON_REAL) {
double d;
d = va_arg(ap, double);
assert_se(abs(d - v.real) < 0.001);
} else if (t == JSON_INTEGER) {
intmax_t i;
i = va_arg(ap, intmax_t);
assert_se(i == v.integer);
} else if (t == JSON_BOOLEAN) {
bool b;
b = va_arg(ap, int);
assert_se(b == v.boolean);
}
}
va_end(ap);
}
int main(int argc, char *argv[]) {
test_one("x", -EINVAL);
test_one("", JSON_END);
test_one(" ", JSON_END);
test_one("0", JSON_INTEGER, (intmax_t) 0, JSON_END);
test_one("1234", JSON_INTEGER, (intmax_t) 1234, JSON_END);
test_one("3.141", JSON_REAL, 3.141, JSON_END);
test_one("0.0", JSON_REAL, 0.0, JSON_END);
test_one("7e3", JSON_REAL, 7e3, JSON_END);
test_one("-7e-3", JSON_REAL, -7e-3, JSON_END);
test_one("true", JSON_BOOLEAN, true, JSON_END);
test_one("false", JSON_BOOLEAN, false, JSON_END);
test_one("null", JSON_NULL, JSON_END);
test_one("{}", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
test_one("\t {\n} \n", JSON_OBJECT_OPEN, JSON_OBJECT_CLOSE, JSON_END);
test_one("[]", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
test_one("\t [] \n\n", JSON_ARRAY_OPEN, JSON_ARRAY_CLOSE, JSON_END);
test_one("\"\"", JSON_STRING, "", JSON_END);
test_one("\"foo\"", JSON_STRING, "foo", JSON_END);
test_one("\"foo\\nfoo\"", JSON_STRING, "foo\nfoo", JSON_END);
test_one("{\"foo\" : \"bar\"}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_STRING, "bar", JSON_OBJECT_CLOSE, JSON_END);
test_one("{\"foo\" : [true, false]}", JSON_OBJECT_OPEN, JSON_STRING, "foo", JSON_COLON, JSON_ARRAY_OPEN, JSON_BOOLEAN, true, JSON_COMMA, JSON_BOOLEAN, false, JSON_ARRAY_CLOSE, JSON_OBJECT_CLOSE, JSON_END);
test_one("\"\xef\xbf\xbd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
test_one("\"\\ufffd\"", JSON_STRING, "\xef\xbf\xbd", JSON_END);
return 0;
}