0c69794138
These lines are generally out-of-date, incomplete and unnecessary. With SPDX and git repository much more accurate and fine grained information about licensing and authorship is available, hence let's drop the per-file copyright notice. Of course, removing copyright lines of others is problematic, hence this commit only removes my own lines and leaves all others untouched. It might be nicer if sooner or later those could go away too, making git the only and accurate source of authorship information.
239 lines
7.1 KiB
C
239 lines
7.1 KiB
C
/* SPDX-License-Identifier: LGPL-2.1+ */
|
|
|
|
#include <errno.h>
|
|
#include <stddef.h>
|
|
#include <string.h>
|
|
|
|
#include "macro.h"
|
|
#include "string-util.h"
|
|
#include "xml.h"
|
|
|
|
enum {
|
|
STATE_NULL,
|
|
STATE_TEXT,
|
|
STATE_TAG,
|
|
STATE_ATTRIBUTE,
|
|
};
|
|
|
|
static void inc_lines(unsigned *line, const char *s, size_t n) {
|
|
const char *p = s;
|
|
|
|
if (!line)
|
|
return;
|
|
|
|
for (;;) {
|
|
const char *f;
|
|
|
|
f = memchr(p, '\n', n);
|
|
if (!f)
|
|
return;
|
|
|
|
n -= (f - p) + 1;
|
|
p = f + 1;
|
|
(*line)++;
|
|
}
|
|
}
|
|
|
|
/* We don't actually do real XML here. We only read a simplistic
|
|
* subset, that is a bit less strict that XML and lacks all the more
|
|
* complex features, like entities, or namespaces. However, we do
|
|
* support some HTML5-like simplifications */
|
|
|
|
int xml_tokenize(const char **p, char **name, void **state, unsigned *line) {
|
|
const char *c, *e, *b;
|
|
char *ret;
|
|
int t;
|
|
|
|
assert(p);
|
|
assert(*p);
|
|
assert(name);
|
|
assert(state);
|
|
|
|
t = PTR_TO_INT(*state);
|
|
c = *p;
|
|
|
|
if (t == STATE_NULL) {
|
|
if (line)
|
|
*line = 1;
|
|
t = STATE_TEXT;
|
|
}
|
|
|
|
for (;;) {
|
|
if (*c == 0)
|
|
return XML_END;
|
|
|
|
switch (t) {
|
|
|
|
case STATE_TEXT: {
|
|
int x;
|
|
|
|
e = strchrnul(c, '<');
|
|
if (e > c) {
|
|
/* More text... */
|
|
ret = strndup(c, e - c);
|
|
if (!ret)
|
|
return -ENOMEM;
|
|
|
|
inc_lines(line, c, e - c);
|
|
|
|
*name = ret;
|
|
*p = e;
|
|
*state = INT_TO_PTR(STATE_TEXT);
|
|
|
|
return XML_TEXT;
|
|
}
|
|
|
|
assert(*e == '<');
|
|
b = c + 1;
|
|
|
|
if (startswith(b, "!--")) {
|
|
/* A comment */
|
|
e = strstr(b + 3, "-->");
|
|
if (!e)
|
|
return -EINVAL;
|
|
|
|
inc_lines(line, b, e + 3 - b);
|
|
|
|
c = e + 3;
|
|
continue;
|
|
}
|
|
|
|
if (*b == '?') {
|
|
/* Processing instruction */
|
|
|
|
e = strstr(b + 1, "?>");
|
|
if (!e)
|
|
return -EINVAL;
|
|
|
|
inc_lines(line, b, e + 2 - b);
|
|
|
|
c = e + 2;
|
|
continue;
|
|
}
|
|
|
|
if (*b == '!') {
|
|
/* DTD */
|
|
|
|
e = strchr(b + 1, '>');
|
|
if (!e)
|
|
return -EINVAL;
|
|
|
|
inc_lines(line, b, e + 1 - b);
|
|
|
|
c = e + 1;
|
|
continue;
|
|
}
|
|
|
|
if (*b == '/') {
|
|
/* A closing tag */
|
|
x = XML_TAG_CLOSE;
|
|
b++;
|
|
} else
|
|
x = XML_TAG_OPEN;
|
|
|
|
e = strpbrk(b, WHITESPACE "/>");
|
|
if (!e)
|
|
return -EINVAL;
|
|
|
|
ret = strndup(b, e - b);
|
|
if (!ret)
|
|
return -ENOMEM;
|
|
|
|
*name = ret;
|
|
*p = e;
|
|
*state = INT_TO_PTR(STATE_TAG);
|
|
|
|
return x;
|
|
}
|
|
|
|
case STATE_TAG:
|
|
|
|
b = c + strspn(c, WHITESPACE);
|
|
if (*b == 0)
|
|
return -EINVAL;
|
|
|
|
inc_lines(line, c, b - c);
|
|
|
|
e = b + strcspn(b, WHITESPACE "=/>");
|
|
if (e > b) {
|
|
/* An attribute */
|
|
|
|
ret = strndup(b, e - b);
|
|
if (!ret)
|
|
return -ENOMEM;
|
|
|
|
*name = ret;
|
|
*p = e;
|
|
*state = INT_TO_PTR(STATE_ATTRIBUTE);
|
|
|
|
return XML_ATTRIBUTE_NAME;
|
|
}
|
|
|
|
if (startswith(b, "/>")) {
|
|
/* An empty tag */
|
|
|
|
*name = NULL; /* For empty tags we return a NULL name, the caller must be prepared for that */
|
|
*p = b + 2;
|
|
*state = INT_TO_PTR(STATE_TEXT);
|
|
|
|
return XML_TAG_CLOSE_EMPTY;
|
|
}
|
|
|
|
if (*b != '>')
|
|
return -EINVAL;
|
|
|
|
c = b + 1;
|
|
t = STATE_TEXT;
|
|
continue;
|
|
|
|
case STATE_ATTRIBUTE:
|
|
|
|
if (*c == '=') {
|
|
c++;
|
|
|
|
if (IN_SET(*c, '\'', '\"')) {
|
|
/* Tag with a quoted value */
|
|
|
|
e = strchr(c+1, *c);
|
|
if (!e)
|
|
return -EINVAL;
|
|
|
|
inc_lines(line, c, e - c);
|
|
|
|
ret = strndup(c+1, e - c - 1);
|
|
if (!ret)
|
|
return -ENOMEM;
|
|
|
|
*name = ret;
|
|
*p = e + 1;
|
|
*state = INT_TO_PTR(STATE_TAG);
|
|
|
|
return XML_ATTRIBUTE_VALUE;
|
|
|
|
}
|
|
|
|
/* Tag with a value without quotes */
|
|
|
|
b = strpbrk(c, WHITESPACE ">");
|
|
if (!b)
|
|
b = c;
|
|
|
|
ret = strndup(c, b - c);
|
|
if (!ret)
|
|
return -ENOMEM;
|
|
|
|
*name = ret;
|
|
*p = b;
|
|
*state = INT_TO_PTR(STATE_TAG);
|
|
return XML_ATTRIBUTE_VALUE;
|
|
}
|
|
|
|
t = STATE_TAG;
|
|
continue;
|
|
}
|
|
|
|
}
|
|
|
|
assert_not_reached("Bad state");
|
|
}
|