util: rework word parsing and c unescaping code

When parsing words from input files, optionally automatically unescape
the passed strings, controllable via a new flags parameter.

Make use of this in tmpfiles, and port everything else over, too.

This improves parsing quite a bit, since we no longer have to process the
same string multiple times with different calls, where an earlier call
might corrupt the input for a later call.
This commit is contained in:
Lennart Poettering 2015-03-23 18:55:36 +07:00
parent be3ce3014e
commit 4034a06ddb
11 changed files with 250 additions and 175 deletions

View file

@ -143,7 +143,7 @@ static int spawn_getter(const char *getter, const char *url) {
_cleanup_strv_free_ char **words = NULL;
assert(getter);
r = strv_split_quoted(&words, getter, false);
r = strv_split_quoted(&words, getter, 0);
if (r < 0)
return log_error_errno(r, "Failed to split getter option: %m");

View file

@ -227,7 +227,7 @@ static int x11_read_data(Context *c) {
if (in_section && first_word(l, "Option")) {
_cleanup_strv_free_ char **a = NULL;
r = strv_split_quoted(&a, l, false);
r = strv_split_quoted(&a, l, 0);
if (r < 0)
return r;
@ -250,7 +250,7 @@ static int x11_read_data(Context *c) {
} else if (!in_section && first_word(l, "Section")) {
_cleanup_strv_free_ char **a = NULL;
r = strv_split_quoted(&a, l, false);
r = strv_split_quoted(&a, l, 0);
if (r < 0)
return -ENOMEM;
@ -539,7 +539,7 @@ static int read_next_mapping(const char* filename,
if (l[0] == 0 || l[0] == '#')
continue;
r = strv_split_quoted(&b, l, false);
r = strv_split_quoted(&b, l, 0);
if (r < 0)
return r;

View file

@ -100,7 +100,7 @@ static int condition_test_kernel_command_line(Condition *c) {
_cleanup_free_ char *word = NULL;
bool found;
r = unquote_first_word(&p, &word, true);
r = unquote_first_word(&p, &word, UNQUOTE_RELAX);
if (r < 0)
return r;
if (r == 0)

View file

@ -278,7 +278,7 @@ char **strv_split_newlines(const char *s) {
return l;
}
int strv_split_quoted(char ***t, const char *s, bool relax) {
int strv_split_quoted(char ***t, const char *s, UnquoteFlags flags) {
size_t n = 0, allocated = 0;
_cleanup_strv_free_ char **l = NULL;
int r;
@ -289,7 +289,7 @@ int strv_split_quoted(char ***t, const char *s, bool relax) {
for (;;) {
_cleanup_free_ char *word = NULL;
r = unquote_first_word(&s, &word, relax);
r = unquote_first_word(&s, &word, flags);
if (r < 0)
return r;
if (r == 0)

View file

@ -73,7 +73,7 @@ static inline bool strv_isempty(char * const *l) {
char **strv_split(const char *s, const char *separator);
char **strv_split_newlines(const char *s);
int strv_split_quoted(char ***t, const char *s, bool relax);
int strv_split_quoted(char ***t, const char *s, UnquoteFlags flags);
char *strv_join(char **l, const char *separator);
char *strv_join_quoted(char **l);

View file

@ -1347,6 +1347,125 @@ char *cescape(const char *s) {
return r;
}
static int cunescape_one(const char *p, size_t length, char *ret) {
int r = 1;
assert(p);
assert(*p);
assert(ret);
if (length != (size_t) -1 && length < 1)
return -EINVAL;
switch (p[0]) {
case 'a':
*ret = '\a';
break;
case 'b':
*ret = '\b';
break;
case 'f':
*ret = '\f';
break;
case 'n':
*ret = '\n';
break;
case 'r':
*ret = '\r';
break;
case 't':
*ret = '\t';
break;
case 'v':
*ret = '\v';
break;
case '\\':
*ret = '\\';
break;
case '"':
*ret = '"';
break;
case '\'':
*ret = '\'';
break;
case 's':
/* This is an extension of the XDG syntax files */
*ret = ' ';
break;
case 'x': {
/* hexadecimal encoding */
int a, b;
if (length != (size_t) -1 && length < 3)
return -EINVAL;
a = unhexchar(p[1]);
if (a < 0)
return -EINVAL;
b = unhexchar(p[2]);
if (b < 0)
return -EINVAL;
/* don't allow NUL bytes */
if (a == 0 && b == 0)
return -EINVAL;
*ret = (char) ((a << 4) | b);
r = 3;
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
/* octal encoding */
int a, b, c, m;
if (length != (size_t) -1 && length < 4)
return -EINVAL;
a = unoctchar(p[0]);
if (a < 0)
return -EINVAL;
b = unoctchar(p[1]);
if (b < 0)
return -EINVAL;
c = unoctchar(p[2]);
if (c < 0)
return -EINVAL;
/* don't allow NUL bytes */
if (a == 0 && b == 0 && c == 0)
return -EINVAL;
/* Don't allow bytes above 255 */
m = (a << 6) | (b << 3) | c;
if (m > 255)
return -EINVAL;
*ret = (char) m;
r = 3;
break;
}
default:
return -EINVAL;
}
return r;
}
char *cunescape_length_with_prefix(const char *s, size_t length, const char *prefix) {
char *r, *t;
const char *f;
@ -1366,115 +1485,27 @@ char *cunescape_length_with_prefix(const char *s, size_t length, const char *pre
memcpy(r, prefix, pl);
for (f = s, t = r + pl; f < s + length; f++) {
size_t remaining = s + length - f;
size_t remaining;
int k;
remaining = s + length - f;
assert(remaining > 0);
if (*f != '\\') { /* a literal literal */
if (*f != '\\' || remaining == 1) {
/* a literal literal, or a trailing backslash, copy verbatim */
*(t++) = *f;
continue;
}
if (--remaining == 0) { /* copy trailing backslash verbatim */
*(t++) = *f;
break;
}
f++;
switch (*f) {
case 'a':
*(t++) = '\a';
break;
case 'b':
*(t++) = '\b';
break;
case 'f':
*(t++) = '\f';
break;
case 'n':
*(t++) = '\n';
break;
case 'r':
*(t++) = '\r';
break;
case 't':
*(t++) = '\t';
break;
case 'v':
*(t++) = '\v';
break;
case '\\':
*(t++) = '\\';
break;
case '"':
*(t++) = '"';
break;
case '\'':
*(t++) = '\'';
break;
case 's':
/* This is an extension of the XDG syntax files */
*(t++) = ' ';
break;
case 'x': {
/* hexadecimal encoding */
int a = -1, b = -1;
if (remaining >= 2) {
a = unhexchar(f[1]);
b = unhexchar(f[2]);
}
if (a < 0 || b < 0 || (a == 0 && b == 0)) {
/* Invalid escape code, let's take it literal then */
*(t++) = '\\';
*(t++) = 'x';
} else {
*(t++) = (char) ((a << 4) | b);
f += 2;
}
break;
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
/* octal encoding */
int a = -1, b = -1, c = -1;
if (remaining >= 3) {
a = unoctchar(f[0]);
b = unoctchar(f[1]);
c = unoctchar(f[2]);
}
if (a < 0 || b < 0 || c < 0 || (a == 0 && b == 0 && c == 0)) {
/* Invalid escape code, let's take it literal then */
*(t++) = '\\';
*(t++) = f[0];
} else {
*(t++) = (char) ((a << 6) | (b << 3) | c);
f += 2;
}
break;
}
default:
k = cunescape_one(f + 1, remaining - 1, t);
if (k < 0) {
/* Invalid escape code, let's take it literal then */
*(t++) = '\\';
*(t++) = *f;
break;
continue;
}
f += k;
t++;
}
*t = 0;
@ -3411,7 +3442,7 @@ char **replace_env_argv(char **argv, char **env) {
if (e) {
int r;
r = strv_split_quoted(&m, e, true);
r = strv_split_quoted(&m, e, UNQUOTE_RELAX);
if (r < 0) {
ret[k] = NULL;
strv_free(ret);
@ -6382,7 +6413,7 @@ int parse_proc_cmdline(int (*parse_item)(const char *key, const char *value)) {
_cleanup_free_ char *word = NULL;
char *value = NULL;
r = unquote_first_word(&p, &word, true);
r = unquote_first_word(&p, &word, UNQUOTE_RELAX);
if (r < 0)
return r;
if (r == 0)
@ -6422,7 +6453,7 @@ int get_proc_cmdline_key(const char *key, char **value) {
_cleanup_free_ char *word = NULL;
const char *e;
r = unquote_first_word(&p, &word, true);
r = unquote_first_word(&p, &word, UNQUOTE_RELAX);
if (r < 0)
return r;
if (r == 0)
@ -7275,9 +7306,10 @@ int is_dir(const char* path, bool follow) {
return !!S_ISDIR(st.st_mode);
}
int unquote_first_word(const char **p, char **ret, bool relax) {
int unquote_first_word(const char **p, char **ret, UnquoteFlags flags) {
_cleanup_free_ char *s = NULL;
size_t allocated = 0, sz = 0;
int r;
enum {
START,
@ -7335,7 +7367,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
case VALUE_ESCAPE:
if (c == 0) {
if (relax)
if (flags & UNQUOTE_RELAX)
goto finish;
return -EINVAL;
}
@ -7343,6 +7375,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
if (flags & UNQUOTE_CUNESCAPE) {
r = cunescape_one(*p, (size_t) -1, &c);
if (r < 0)
return -EINVAL;
(*p) += r - 1;
}
s[sz++] = c;
state = VALUE;
@ -7350,7 +7390,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
case SINGLE_QUOTE:
if (c == 0) {
if (relax)
if (flags & UNQUOTE_RELAX)
goto finish;
return -EINVAL;
} else if (c == '\'')
@ -7368,7 +7408,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
case SINGLE_QUOTE_ESCAPE:
if (c == 0) {
if (relax)
if (flags & UNQUOTE_RELAX)
goto finish;
return -EINVAL;
}
@ -7376,6 +7416,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
if (flags & UNQUOTE_CUNESCAPE) {
r = cunescape_one(*p, (size_t) -1, &c);
if (r < 0)
return -EINVAL;
(*p) += r - 1;
}
s[sz++] = c;
state = SINGLE_QUOTE;
break;
@ -7398,7 +7446,7 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
case DOUBLE_QUOTE_ESCAPE:
if (c == 0) {
if (relax)
if (flags & UNQUOTE_RELAX)
goto finish;
return -EINVAL;
}
@ -7406,6 +7454,14 @@ int unquote_first_word(const char **p, char **ret, bool relax) {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
if (flags & UNQUOTE_CUNESCAPE) {
r = cunescape_one(*p, (size_t) -1, &c);
if (r < 0)
return -EINVAL;
(*p) += r - 1;
}
s[sz++] = c;
state = DOUBLE_QUOTE;
break;
@ -7435,7 +7491,7 @@ finish:
return 1;
}
int unquote_many_words(const char **p, ...) {
int unquote_many_words(const char **p, UnquoteFlags flags, ...) {
va_list ap;
char **l;
int n = 0, i, c, r;
@ -7446,7 +7502,7 @@ int unquote_many_words(const char **p, ...) {
assert(p);
/* Count how many words are expected */
va_start(ap, p);
va_start(ap, flags);
for (;;) {
if (!va_arg(ap, char **))
break;
@ -7461,7 +7517,7 @@ int unquote_many_words(const char **p, ...) {
l = newa0(char*, n);
for (c = 0; c < n; c++) {
r = unquote_first_word(p, &l[c], false);
r = unquote_first_word(p, &l[c], flags);
if (r < 0) {
int j;
@ -7477,7 +7533,7 @@ int unquote_many_words(const char **p, ...) {
/* If we managed to parse all words, return them in the passed
* in parameters */
va_start(ap, p);
va_start(ap, flags);
for (i = 0; i < n; i++) {
char **v;

View file

@ -1017,8 +1017,13 @@ int take_password_lock(const char *root);
int is_symlink(const char *path);
int is_dir(const char *path, bool follow);
int unquote_first_word(const char **p, char **ret, bool relax);
int unquote_many_words(const char **p, ...) _sentinel_;
typedef enum UnquoteFlags{
UNQUOTE_RELAX = 1,
UNQUOTE_CUNESCAPE = 2,
} UnquoteFlags;
int unquote_first_word(const char **p, char **ret, UnquoteFlags flags);
int unquote_many_words(const char **p, UnquoteFlags flags, ...) _sentinel_;
int free_and_strdup(char **p, const char *s);

View file

@ -1384,7 +1384,7 @@ static int parse_line(const char *fname, unsigned line, const char *buffer) {
/* Parse columns */
p = buffer;
r = unquote_many_words(&p, &action, &name, &id, &description, &home, NULL);
r = unquote_many_words(&p, 0, &action, &name, &id, &description, &home, NULL);
if (r < 0) {
log_error("[%s:%u] Syntax error.", fname, line);
return r;

View file

@ -165,7 +165,7 @@ static void test_strv_quote_unquote(const char* const *split, const char *quoted
assert_se(p);
assert_se(streq(p, quoted));
r = strv_split_quoted(&s, quoted, false);
r = strv_split_quoted(&s, quoted, 0);
assert_se(r == 0);
assert_se(s);
STRV_FOREACH(t, s) {
@ -182,7 +182,7 @@ static void test_strv_unquote(const char *quoted, char **list) {
char **t;
int r;
r = strv_split_quoted(&s, quoted, false);
r = strv_split_quoted(&s, quoted, 0);
assert_se(r == 0);
assert_se(s);
j = strv_join(s, " | ");
@ -199,7 +199,7 @@ static void test_invalid_unquote(const char *quoted) {
char **s = NULL;
int r;
r = strv_split_quoted(&s, quoted, false);
r = strv_split_quoted(&s, quoted, 0);
assert_se(s == NULL);
assert_se(r == -EINVAL);
}

View file

@ -1283,64 +1283,76 @@ static void test_unquote_first_word(void) {
char *t;
p = original = "foobar waldo";
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "foobar"));
free(t);
assert_se(p == original + 7);
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "waldo"));
free(t);
assert_se(p == original + 12);
assert_se(unquote_first_word(&p, &t, false) == 0);
assert_se(unquote_first_word(&p, &t, 0) == 0);
assert_se(!t);
assert_se(p == original + 12);
p = original = "\"foobar\" \'waldo\'";
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "foobar"));
free(t);
assert_se(p == original + 9);
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "waldo"));
free(t);
assert_se(p == original + 16);
assert_se(unquote_first_word(&p, &t, false) == 0);
assert_se(unquote_first_word(&p, &t, 0) == 0);
assert_se(!t);
assert_se(p == original + 16);
p = original = "\"";
assert_se(unquote_first_word(&p, &t, false) == -EINVAL);
assert_se(unquote_first_word(&p, &t, 0) == -EINVAL);
assert_se(p == original + 1);
p = original = "\'";
assert_se(unquote_first_word(&p, &t, false) == -EINVAL);
assert_se(unquote_first_word(&p, &t, 0) == -EINVAL);
assert_se(p == original + 1);
p = original = "\'fooo";
assert_se(unquote_first_word(&p, &t, false) == -EINVAL);
assert_se(unquote_first_word(&p, &t, 0) == -EINVAL);
assert_se(p == original + 5);
p = original = "\'fooo";
assert_se(unquote_first_word(&p, &t, true) > 0);
assert_se(unquote_first_word(&p, &t, UNQUOTE_RELAX) > 0);
assert_se(streq(t, "fooo"));
free(t);
assert_se(p == original + 5);
p = original = "yay\'foo\'bar";
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "yayfoobar"));
free(t);
assert_se(p == original + 11);
p = original = " foobar ";
assert_se(unquote_first_word(&p, &t, false) > 0);
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "foobar"));
free(t);
assert_se(p == original + 12);
p = original = " foo\\ba\\x6ar ";
assert_se(unquote_first_word(&p, &t, UNQUOTE_CUNESCAPE) > 0);
assert_se(streq(t, "foo\ba\x6ar"));
free(t);
assert_se(p == original + 13);
p = original = " foo\\ba\\x6ar ";
assert_se(unquote_first_word(&p, &t, 0) > 0);
assert_se(streq(t, "foobax6ar"));
free(t);
assert_se(p == original + 13);
}
static void test_unquote_many_words(void) {
@ -1348,7 +1360,7 @@ static void test_unquote_many_words(void) {
char *a, *b, *c;
p = original = "foobar waldi piep";
assert_se(unquote_many_words(&p, &a, &b, &c, NULL) == 3);
assert_se(unquote_many_words(&p, 0, &a, &b, &c, NULL) == 3);
assert_se(p == original + 17);
assert_se(streq_ptr(a, "foobar"));
assert_se(streq_ptr(b, "waldi"));
@ -1358,7 +1370,7 @@ static void test_unquote_many_words(void) {
free(c);
p = original = "'foobar' wa\"ld\"i ";
assert_se(unquote_many_words(&p, &a, &b, &c, NULL) == 2);
assert_se(unquote_many_words(&p, 0, &a, &b, &c, NULL) == 2);
assert_se(p == original + 19);
assert_se(streq_ptr(a, "foobar"));
assert_se(streq_ptr(b, "waldi"));
@ -1367,31 +1379,31 @@ static void test_unquote_many_words(void) {
free(b);
p = original = "";
assert_se(unquote_many_words(&p, &a, &b, &c, NULL) == 0);
assert_se(unquote_many_words(&p, 0, &a, &b, &c, NULL) == 0);
assert_se(p == original);
assert_se(streq_ptr(a, NULL));
assert_se(streq_ptr(b, NULL));
assert_se(streq_ptr(c, NULL));
p = original = " ";
assert_se(unquote_many_words(&p, &a, &b, &c, NULL) == 0);
assert_se(unquote_many_words(&p, 0, &a, &b, &c, NULL) == 0);
assert_se(p == original+2);
assert_se(streq_ptr(a, NULL));
assert_se(streq_ptr(b, NULL));
assert_se(streq_ptr(c, NULL));
p = original = "foobar";
assert_se(unquote_many_words(&p, NULL) == 0);
assert_se(unquote_many_words(&p, 0, NULL) == 0);
assert_se(p == original);
p = original = "foobar waldi";
assert_se(unquote_many_words(&p, &a, NULL) == 1);
assert_se(unquote_many_words(&p, 0, &a, NULL) == 1);
assert_se(p == original+7);
assert_se(streq_ptr(a, "foobar"));
free(a);
p = original = " foobar ";
assert_se(unquote_many_words(&p, &a, NULL) == 1);
assert_se(unquote_many_words(&p, 0, &a, NULL) == 1);
assert_se(p == original+15);
assert_se(streq_ptr(a, "foobar"));
free(a);

View file

@ -620,7 +620,6 @@ static int path_set_perms(Item *i, const char *path) {
}
static int get_xattrs_from_arg(Item *i) {
char *xattr;
const char *p;
int r;
@ -629,35 +628,33 @@ static int get_xattrs_from_arg(Item *i) {
p = i->argument;
while ((r = unquote_first_word(&p, &xattr, false)) > 0) {
_cleanup_free_ char *tmp = NULL, *name = NULL,
*value = NULL, *value2 = NULL, *_xattr = xattr;
for (;;) {
_cleanup_free_ char *name = NULL, *value = NULL, *xattr = NULL;
r = unquote_first_word(&p, &xattr, UNQUOTE_CUNESCAPE);
if (r < 0)
log_warning_errno(r, "Failed to parse extended attribute, ignoring: %s", p);
if (r <= 0)
break;
r = split_pair(xattr, "=", &name, &value);
if (r < 0) {
log_warning("Illegal xattr found: \"%s\" - ignoring.", xattr);
log_warning_errno(r, "Failed to parse extended attribute, ignoring: %s", xattr);
continue;
}
if (strempty(name) || strempty(value)) {
log_warning("Malformed xattr found: \"%s\" - ignoring.", xattr);
if (isempty(name) || isempty(value)) {
log_warning("Malformed xattr found, ignoring: %s", xattr);
continue;
}
tmp = unquote(value, "\"");
if (!tmp)
if (strv_push_pair(&i->xattrs, name, value) < 0)
return log_oom();
value2 = cunescape(tmp);
if (!value2)
return log_oom();
if (strv_push_pair(&i->xattrs, name, value2) < 0)
return log_oom();
name = value2 = NULL;
name = value = NULL;
}
return r;
return 0;
}
static int path_set_xattrs(Item *i, const char *path) {
@ -690,8 +687,7 @@ static int get_acls_from_arg(Item *item) {
* afterwards, so the mask can be added now if necessary. */
r = parse_acl(item->argument, &item->acl_access, &item->acl_default, !item->force);
if (r < 0)
log_warning_errno(r, "Failed to parse ACL \"%s\": %m. Ignoring",
item->argument);
log_warning_errno(r, "Failed to parse ACL \"%s\": %m. Ignoring", item->argument);
#else
log_warning_errno(ENOSYS, "ACLs are not supported. Ignoring");
#endif
@ -918,8 +914,7 @@ static int write_one_file(Item *i, const char *path) {
if (i->argument) {
_cleanup_free_ char *unescaped;
log_debug("%s to \"%s\".",
i->type == CREATE_FILE ? "Appending" : "Writing", path);
log_debug("%s to \"%s\".", i->type == CREATE_FILE ? "Appending" : "Writing", path);
unescaped = cunescape(i->argument);
if (!unescaped)
@ -1651,15 +1646,16 @@ static int parse_line(const char *fname, unsigned line, const char *buffer) {
assert(line >= 1);
assert(buffer);
r = unquote_many_words(&buffer,
&action,
&path,
&mode,
&user,
&group,
&age,
&i.argument,
NULL);
r = unquote_many_words(
&buffer,
0,
&action,
&path,
&mode,
&user,
&group,
&age,
NULL);
if (r < 0)
return log_error_errno(r, "[%s:%u] Failed to parse line: %m", fname, line);
else if (r < 2) {
@ -1667,6 +1663,12 @@ static int parse_line(const char *fname, unsigned line, const char *buffer) {
return -EIO;
}
if (!isempty(buffer)) {
i.argument = strdup(buffer);
if (!i.argument)
return log_oom();
}
if (isempty(action)) {
log_error("[%s:%u] Command too short '%s'.", fname, line, action);
return -EINVAL;