Systemd/src/basic/extract-word.c

291 lines
11 KiB
C
Raw Normal View History

/* SPDX-License-Identifier: LGPL-2.1-or-later */
#include <errno.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <syslog.h>
#include "alloc-util.h"
#include "escape.h"
#include "extract-word.h"
#include "log.h"
#include "macro.h"
#include "string-util.h"
#include "strv.h"
#include "utf8.h"
int extract_first_word(const char **p, char **ret, const char *separators, ExtractFlags flags) {
_cleanup_free_ char *s = NULL;
size_t allocated = 0, sz = 0;
char c;
int r;
char quote = 0; /* 0 or ' or " */
bool backslash = false; /* whether we've just seen a backslash */
assert(p);
assert(ret);
/* Bail early if called after last value or with no input */
if (!*p)
goto finish;
c = **p;
if (!separators)
separators = WHITESPACE;
/* Parses the first word of a string, and returns it in
* *ret. Removes all quotes in the process. When parsing fails
* (because of an uneven number of quotes or similar), leaves
* the pointer *p at the first invalid character. */
if (flags & EXTRACT_DONT_COALESCE_SEPARATORS)
if (!GREEDY_REALLOC(s, allocated, sz+1))
return -ENOMEM;
for (;; (*p)++, c = **p) {
if (c == 0)
goto finish_force_terminate;
else if (strchr(separators, c)) {
if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
(*p)++;
goto finish_force_next;
}
} else {
/* We found a non-blank character, so we will always
* want to return a string (even if it is empty),
* allocate it here. */
if (!GREEDY_REALLOC(s, allocated, sz+1))
return -ENOMEM;
break;
}
}
for (;; (*p)++, c = **p) {
if (backslash) {
if (!GREEDY_REALLOC(s, allocated, sz+7))
return -ENOMEM;
if (c == 0) {
if ((flags & EXTRACT_CUNESCAPE_RELAX) &&
(!quote || flags & EXTRACT_RELAX)) {
/* If we find an unquoted trailing backslash and we're in
* EXTRACT_CUNESCAPE_RELAX mode, keep it verbatim in the
* output.
*
* Unbalanced quotes will only be allowed in EXTRACT_RELAX
* mode, EXTRACT_CUNESCAPE_RELAX mode does not allow them.
*/
s[sz++] = '\\';
goto finish_force_terminate;
}
if (flags & EXTRACT_RELAX)
goto finish_force_terminate;
return -EINVAL;
}
if (flags & (EXTRACT_CUNESCAPE|EXTRACT_UNESCAPE_SEPARATORS)) {
bool eight_bit = false;
char32_t u;
if ((flags & EXTRACT_CUNESCAPE) &&
(r = cunescape_one(*p, (size_t) -1, &u, &eight_bit, false)) >= 0) {
/* A valid escaped sequence */
assert(r >= 1);
(*p) += r - 1;
if (eight_bit)
s[sz++] = u;
else
sz += utf8_encode_unichar(s + sz, u);
} else if ((flags & EXTRACT_UNESCAPE_SEPARATORS) &&
strchr(separators, **p))
/* An escaped separator char */
s[sz++] = c;
else if (flags & EXTRACT_CUNESCAPE_RELAX) {
s[sz++] = '\\';
s[sz++] = c;
} else
return -EINVAL;
} else
s[sz++] = c;
backslash = false;
} else if (quote) { /* inside either single or double quotes */
for (;; (*p)++, c = **p) {
if (c == 0) {
if (flags & EXTRACT_RELAX)
goto finish_force_terminate;
return -EINVAL;
} else if (c == quote) { /* found the end quote */
quote = 0;
break;
} else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
backslash = true;
break;
} else {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
s[sz++] = c;
}
}
} else {
for (;; (*p)++, c = **p) {
if (c == 0)
goto finish_force_terminate;
else if (IN_SET(c, '\'', '"') && (flags & EXTRACT_UNQUOTE)) {
quote = c;
break;
} else if (c == '\\' && !(flags & EXTRACT_RETAIN_ESCAPE)) {
backslash = true;
break;
} else if (strchr(separators, c)) {
if (flags & EXTRACT_DONT_COALESCE_SEPARATORS) {
(*p)++;
goto finish_force_next;
}
/* Skip additional coalesced separators. */
for (;; (*p)++, c = **p) {
if (c == 0)
goto finish_force_terminate;
if (!strchr(separators, c))
break;
}
goto finish;
} else {
if (!GREEDY_REALLOC(s, allocated, sz+2))
return -ENOMEM;
s[sz++] = c;
}
}
}
}
finish_force_terminate:
*p = NULL;
finish:
if (!s) {
*p = NULL;
*ret = NULL;
return 0;
}
finish_force_next:
s[sz] = 0;
*ret = TAKE_PTR(s);
return 1;
}
int extract_first_word_and_warn(
const char **p,
char **ret,
const char *separators,
ExtractFlags flags,
const char *unit,
const char *filename,
unsigned line,
const char *rvalue) {
/* Try to unquote it, if it fails, warn about it and try again
* but this time using EXTRACT_CUNESCAPE_RELAX to keep the
* backslashes verbatim in invalid escape sequences. */
const char *save;
int r;
save = *p;
r = extract_first_word(p, ret, separators, flags);
if (r >= 0)
return r;
if (r == -EINVAL && !(flags & EXTRACT_CUNESCAPE_RELAX)) {
/* Retry it with EXTRACT_CUNESCAPE_RELAX. */
*p = save;
r = extract_first_word(p, ret, separators, flags|EXTRACT_CUNESCAPE_RELAX);
if (r >= 0) {
/* It worked this time, hence it must have been an invalid escape sequence. */
log_syntax(unit, LOG_WARNING, filename, line, EINVAL, "Ignoring unknown escape sequences: \"%s\"", *ret);
return r;
}
/* If it's still EINVAL; then it must be unbalanced quoting, report this. */
if (r == -EINVAL)
return log_syntax(unit, LOG_ERR, filename, line, r, "Unbalanced quoting, ignoring: \"%s\"", rvalue);
}
/* Can be any error, report it */
return log_syntax(unit, LOG_ERR, filename, line, r, "Unable to decode word \"%s\", ignoring: %m", rvalue);
}
shared/extract-word: replace enum with unsigned int to avoid undefined behaviour ../src/basic/extract-word.c:255:22: warning: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Wvarargs] va_start(ap, flags); ^ ../src/basic/extract-word.c:244:77: note: parameter of type 'ExtractFlags' (aka 'enum ExtractFlags') is declared here int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) { ^ ../src/basic/extract-word.c:286:22: warning: passing an object that undergoes default argument promotion to 'va_start' has undefined behavior [-Wvarargs] va_start(ap, flags); ^ ../src/basic/extract-word.c:244:77: note: parameter of type 'ExtractFlags' (aka 'enum ExtractFlags') is declared here int extract_many_words(const char **p, const char *separators, ExtractFlags flags, ...) { ^ 2 warnings generated. I think the relevant part of C99 is 6.7.2.2 Enumeration specifiers: Each enumerated type shall be compatible with char, a signed integer type, or an unsigned integer type. The choice of type is implementation-defined, but shall be capable of representing the values of all the members of the enumeration. and 7.16.1.4: The parameter parmN is the identifier of the rightmost parameter in the variable parameter list in the function definition (the one just before the ...). If the parameter parmN is declared with the register storage class, with a function or array type, or with a type that is not compatible with the type that results after application of the default argument promotions, the behavior is undefined. This might cause a real issue if the compiler chooses something that is not an integer for ExtractFlags. Rework the code to avoid the warning, but add an assert_cc in a large-valued ExtractFlags element is ever defined and the type is bumped to something wider than an int.
2017-04-23 01:04:02 +02:00
/* We pass ExtractFlags as unsigned int (to avoid undefined behaviour when passing
* an object that undergoes default argument promotion as an argument to va_start).
* Let's make sure that ExtractFlags fits into an unsigned int. */
assert_cc(sizeof(enum ExtractFlags) <= sizeof(unsigned));
int extract_many_words(const char **p, const char *separators, unsigned flags, ...) {
va_list ap;
char **l;
int n = 0, i, c, r;
/* Parses a number of words from a string, stripping any
* quotes if necessary. */
assert(p);
/* Count how many words are expected */
va_start(ap, flags);
for (;;) {
if (!va_arg(ap, char **))
break;
n++;
}
va_end(ap);
if (n <= 0)
return 0;
/* Read all words into a temporary array */
l = newa0(char*, n);
for (c = 0; c < n; c++) {
r = extract_first_word(p, &l[c], separators, flags);
if (r < 0) {
int j;
for (j = 0; j < c; j++)
free(l[j]);
return r;
}
if (r == 0)
break;
}
/* If we managed to parse all words, return them in the passed
* in parameters */
va_start(ap, flags);
for (i = 0; i < n; i++) {
char **v;
v = va_arg(ap, char **);
assert(v);
*v = l[i];
}
va_end(ap);
return c;
}