Systemd/src/basic/env-util.c
Zbigniew Jędrzejewski-Szmek b45c068dd8 basic/env-util: (mostly) follow POSIX for what variable names are allowed
There was some confusion about what POSIX says about variable names:

   names shall not contain the character '='. For values to be portable
   across systems conforming to POSIX.1-2008, the value shall be composed
   of characters from the portable character set (except NUL and as
   indicated below).

i.e. it allows almost all ASCII in variable names (without NUL and DEL and
'='). OTOH, it says that *utilities* use a smaller set of characters:

   Environment variable names used by the utilities in the Shell and
   Utilities volume of POSIX.1-2008 consist solely of uppercase letters,
   digits, and the <underscore> ( '_' ) from the characters defined in
   Portable Character Set and do not begin with a digit.

When enforcing variable names in environment blocks, we need to use this
first definition, so that we can propagate all valid variables.
I think having non-printable characters in variable names is too much, so
I took out the whitespace stuff from the first definition.

OTOH, when we use *shell syntax*, for example doing variable expansion,
it seems enough to support expansion of variables that the shell would allow.

Fixes #14878,
https://bugzilla.redhat.com/show_bug.cgi?id=1754395,
https://bugzilla.redhat.com/show_bug.cgi?id=1879216.
2020-10-12 18:24:28 +02:00

756 lines
21 KiB
C

/* SPDX-License-Identifier: LGPL-2.1+ */
#include <errno.h>
#include <limits.h>
#include <stdarg.h>
#include <stdlib.h>
#include <unistd.h>
#include "alloc-util.h"
#include "env-util.h"
#include "escape.h"
#include "extract-word.h"
#include "macro.h"
#include "parse-util.h"
#include "string-util.h"
#include "strv.h"
#include "utf8.h"
/* We follow bash for the character set. Different shells have different rules. */
#define VALID_BASH_ENV_NAME_CHARS \
DIGITS LETTERS \
"_"
static bool printable_portable_character(char c) {
/* POSIX.1-2008 specifies almost all ASCII characters as "portable". (Only DEL is excluded, and
* additionally NUL and = are not allowed in variable names). We are stricter, and additionally
* reject BEL, BS, HT, CR, LF, VT, FF and SPACE, i.e. all whitespace. */
return c >= '!' && c <= '~';
}
static bool env_name_is_valid_n(const char *e, size_t n) {
if (!e)
return false;
if (n <= 0)
return false;
/* POSIX says the overall size of the environment block cannot
* be > ARG_MAX, an individual assignment hence cannot be
* either. Discounting the equal sign and trailing NUL this
* hence leaves ARG_MAX-2 as longest possible variable
* name. */
if (n > (size_t) sysconf(_SC_ARG_MAX) - 2)
return false;
for (const char *p = e; p < e + n; p++)
if (!printable_portable_character(*p) || *p == '=')
return false;
return true;
}
bool env_name_is_valid(const char *e) {
if (!e)
return false;
return env_name_is_valid_n(e, strlen(e));
}
bool env_value_is_valid(const char *e) {
if (!e)
return false;
if (!utf8_is_valid(e))
return false;
/* bash allows tabs and newlines in environment variables, and so
* should we */
if (string_has_cc(e, "\t\n"))
return false;
/* POSIX says the overall size of the environment block cannot
* be > ARG_MAX, an individual assignment hence cannot be
* either. Discounting the shortest possible variable name of
* length 1, the equal sign and trailing NUL this hence leaves
* ARG_MAX-3 as longest possible variable value. */
if (strlen(e) > sc_arg_max() - 3)
return false;
return true;
}
bool env_assignment_is_valid(const char *e) {
const char *eq;
eq = strchr(e, '=');
if (!eq)
return false;
if (!env_name_is_valid_n(e, eq - e))
return false;
if (!env_value_is_valid(eq + 1))
return false;
/* POSIX says the overall size of the environment block cannot
* be > ARG_MAX, hence the individual variable assignments
* cannot be either, but let's leave room for one trailing NUL
* byte. */
if (strlen(e) > sc_arg_max() - 1)
return false;
return true;
}
bool strv_env_is_valid(char **e) {
char **p, **q;
STRV_FOREACH(p, e) {
size_t k;
if (!env_assignment_is_valid(*p))
return false;
/* Check if there are duplicate assignments */
k = strcspn(*p, "=");
STRV_FOREACH(q, p + 1)
if (strneq(*p, *q, k) && (*q)[k] == '=')
return false;
}
return true;
}
bool strv_env_name_is_valid(char **l) {
char **p;
STRV_FOREACH(p, l) {
if (!env_name_is_valid(*p))
return false;
if (strv_contains(p + 1, *p))
return false;
}
return true;
}
bool strv_env_name_or_assignment_is_valid(char **l) {
char **p;
STRV_FOREACH(p, l) {
if (!env_assignment_is_valid(*p) && !env_name_is_valid(*p))
return false;
if (strv_contains(p + 1, *p))
return false;
}
return true;
}
static int env_append(char **r, char ***k, char **a) {
assert(r);
assert(k);
assert(*k >= r);
if (!a)
return 0;
/* Expects the following arguments: 'r' shall point to the beginning of an strv we are going to append to, 'k'
* to a pointer pointing to the NULL entry at the end of the same array. 'a' shall point to another strv.
*
* This call adds every entry of 'a' to 'r', either overriding an existing matching entry, or appending to it.
*
* This call assumes 'r' has enough pre-allocated space to grow by all of 'a''s items. */
for (; *a; a++) {
char **j, *c;
size_t n;
n = strcspn(*a, "=");
if ((*a)[n] == '=')
n++;
for (j = r; j < *k; j++)
if (strneq(*j, *a, n))
break;
c = strdup(*a);
if (!c)
return -ENOMEM;
if (j >= *k) { /* Append to the end? */
(*k)[0] = c;
(*k)[1] = NULL;
(*k)++;
} else
free_and_replace(*j, c); /* Override existing item */
}
return 0;
}
char **strv_env_merge(size_t n_lists, ...) {
_cleanup_strv_free_ char **ret = NULL;
size_t n = 0, i;
char **l, **k;
va_list ap;
/* Merges an arbitrary number of environment sets */
va_start(ap, n_lists);
for (i = 0; i < n_lists; i++) {
l = va_arg(ap, char**);
n += strv_length(l);
}
va_end(ap);
ret = new(char*, n+1);
if (!ret)
return NULL;
*ret = NULL;
k = ret;
va_start(ap, n_lists);
for (i = 0; i < n_lists; i++) {
l = va_arg(ap, char**);
if (env_append(ret, &k, l) < 0) {
va_end(ap);
return NULL;
}
}
va_end(ap);
return TAKE_PTR(ret);
}
static bool env_match(const char *t, const char *pattern) {
assert(t);
assert(pattern);
/* pattern a matches string a
* a matches a=
* a matches a=b
* a= matches a=
* a=b matches a=b
* a= does not match a
* a=b does not match a=
* a=b does not match a
* a=b does not match a=c */
if (streq(t, pattern))
return true;
if (!strchr(pattern, '=')) {
size_t l = strlen(pattern);
return strneq(t, pattern, l) && t[l] == '=';
}
return false;
}
static bool env_entry_has_name(const char *entry, const char *name) {
const char *t;
assert(entry);
assert(name);
t = startswith(entry, name);
if (!t)
return false;
return *t == '=';
}
char **strv_env_delete(char **x, size_t n_lists, ...) {
size_t n, i = 0;
char **k, **r;
va_list ap;
/* Deletes every entry from x that is mentioned in the other
* string lists */
n = strv_length(x);
r = new(char*, n+1);
if (!r)
return NULL;
STRV_FOREACH(k, x) {
size_t v;
va_start(ap, n_lists);
for (v = 0; v < n_lists; v++) {
char **l, **j;
l = va_arg(ap, char**);
STRV_FOREACH(j, l)
if (env_match(*k, *j))
goto skip;
}
va_end(ap);
r[i] = strdup(*k);
if (!r[i]) {
strv_free(r);
return NULL;
}
i++;
continue;
skip:
va_end(ap);
}
r[i] = NULL;
assert(i <= n);
return r;
}
char **strv_env_unset(char **l, const char *p) {
char **f, **t;
if (!l)
return NULL;
assert(p);
/* Drops every occurrence of the env var setting p in the
* string list. Edits in-place. */
for (f = t = l; *f; f++) {
if (env_match(*f, p)) {
free(*f);
continue;
}
*(t++) = *f;
}
*t = NULL;
return l;
}
char **strv_env_unset_many(char **l, ...) {
char **f, **t;
if (!l)
return NULL;
/* Like strv_env_unset() but applies many at once. Edits in-place. */
for (f = t = l; *f; f++) {
bool found = false;
const char *p;
va_list ap;
va_start(ap, l);
while ((p = va_arg(ap, const char*))) {
if (env_match(*f, p)) {
found = true;
break;
}
}
va_end(ap);
if (found) {
free(*f);
continue;
}
*(t++) = *f;
}
*t = NULL;
return l;
}
int strv_env_replace(char ***l, char *p) {
const char *t, *name;
char **f;
int r;
assert(p);
/* Replace first occurrence of the env var or add a new one in the string list. Drop other occurrences. Edits
* in-place. Does not copy p. p must be a valid key=value assignment.
*/
t = strchr(p, '=');
if (!t)
return -EINVAL;
name = strndupa(p, t - p);
STRV_FOREACH(f, *l)
if (env_entry_has_name(*f, name)) {
free_and_replace(*f, p);
strv_env_unset(f + 1, *f);
return 0;
}
/* We didn't find a match, we need to append p or create a new strv */
r = strv_push(l, p);
if (r < 0)
return r;
return 1;
}
char **strv_env_set(char **x, const char *p) {
_cleanup_strv_free_ char **ret = NULL;
size_t n, m;
char **k;
/* Overrides the env var setting of p, returns a new copy */
n = strv_length(x);
m = n + 2;
if (m < n) /* overflow? */
return NULL;
ret = new(char*, m);
if (!ret)
return NULL;
*ret = NULL;
k = ret;
if (env_append(ret, &k, x) < 0)
return NULL;
if (env_append(ret, &k, STRV_MAKE(p)) < 0)
return NULL;
return TAKE_PTR(ret);
}
char *strv_env_get_n(char **l, const char *name, size_t k, unsigned flags) {
char **i;
assert(name);
if (k <= 0)
return NULL;
STRV_FOREACH_BACKWARDS(i, l)
if (strneq(*i, name, k) &&
(*i)[k] == '=')
return *i + k + 1;
if (flags & REPLACE_ENV_USE_ENVIRONMENT) {
const char *t;
t = strndupa(name, k);
return getenv(t);
};
return NULL;
}
char *strv_env_get(char **l, const char *name) {
assert(name);
return strv_env_get_n(l, name, strlen(name), 0);
}
char **strv_env_clean_with_callback(char **e, void (*invalid_callback)(const char *p, void *userdata), void *userdata) {
char **p, **q;
int k = 0;
STRV_FOREACH(p, e) {
size_t n;
bool duplicate = false;
if (!env_assignment_is_valid(*p)) {
if (invalid_callback)
invalid_callback(*p, userdata);
free(*p);
continue;
}
n = strcspn(*p, "=");
STRV_FOREACH(q, p + 1)
if (strneq(*p, *q, n) && (*q)[n] == '=') {
duplicate = true;
break;
}
if (duplicate) {
free(*p);
continue;
}
e[k++] = *p;
}
if (e)
e[k] = NULL;
return e;
}
char *replace_env_n(const char *format, size_t n, char **env, unsigned flags) {
enum {
WORD,
CURLY,
VARIABLE,
VARIABLE_RAW,
TEST,
DEFAULT_VALUE,
ALTERNATE_VALUE,
} state = WORD;
const char *e, *word = format, *test_value;
char *k;
_cleanup_free_ char *r = NULL;
size_t i, len;
int nest = 0;
assert(format);
for (e = format, i = 0; *e && i < n; e ++, i ++)
switch (state) {
case WORD:
if (*e == '$')
state = CURLY;
break;
case CURLY:
if (*e == '{') {
k = strnappend(r, word, e-word-1);
if (!k)
return NULL;
free_and_replace(r, k);
word = e-1;
state = VARIABLE;
nest++;
} else if (*e == '$') {
k = strnappend(r, word, e-word);
if (!k)
return NULL;
free_and_replace(r, k);
word = e+1;
state = WORD;
} else if (flags & REPLACE_ENV_ALLOW_BRACELESS && strchr(VALID_BASH_ENV_NAME_CHARS, *e)) {
k = strnappend(r, word, e-word-1);
if (!k)
return NULL;
free_and_replace(r, k);
word = e-1;
state = VARIABLE_RAW;
} else
state = WORD;
break;
case VARIABLE:
if (*e == '}') {
const char *t;
t = strv_env_get_n(env, word+2, e-word-2, flags);
k = strjoin(r, t);
if (!k)
return NULL;
free_and_replace(r, k);
word = e+1;
state = WORD;
} else if (*e == ':') {
if (!(flags & REPLACE_ENV_ALLOW_EXTENDED))
/* Treat this as unsupported syntax, i.e. do no replacement */
state = WORD;
else {
len = e-word-2;
state = TEST;
}
}
break;
case TEST:
if (*e == '-')
state = DEFAULT_VALUE;
else if (*e == '+')
state = ALTERNATE_VALUE;
else {
state = WORD;
break;
}
test_value = e+1;
break;
case DEFAULT_VALUE: /* fall through */
case ALTERNATE_VALUE:
assert(flags & REPLACE_ENV_ALLOW_EXTENDED);
if (*e == '{') {
nest++;
break;
}
if (*e != '}')
break;
nest--;
if (nest == 0) {
const char *t;
_cleanup_free_ char *v = NULL;
t = strv_env_get_n(env, word+2, len, flags);
if (t && state == ALTERNATE_VALUE)
t = v = replace_env_n(test_value, e-test_value, env, flags);
else if (!t && state == DEFAULT_VALUE)
t = v = replace_env_n(test_value, e-test_value, env, flags);
k = strjoin(r, t);
if (!k)
return NULL;
free_and_replace(r, k);
word = e+1;
state = WORD;
}
break;
case VARIABLE_RAW:
assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
if (!strchr(VALID_BASH_ENV_NAME_CHARS, *e)) {
const char *t;
t = strv_env_get_n(env, word+1, e-word-1, flags);
k = strjoin(r, t);
if (!k)
return NULL;
free_and_replace(r, k);
word = e--;
i--;
state = WORD;
}
break;
}
if (state == VARIABLE_RAW) {
const char *t;
assert(flags & REPLACE_ENV_ALLOW_BRACELESS);
t = strv_env_get_n(env, word+1, e-word-1, flags);
return strjoin(r, t);
} else
return strnappend(r, word, e-word);
}
char **replace_env_argv(char **argv, char **env) {
char **ret, **i;
size_t k = 0, l = 0;
l = strv_length(argv);
ret = new(char*, l+1);
if (!ret)
return NULL;
STRV_FOREACH(i, argv) {
/* If $FOO appears as single word, replace it by the split up variable */
if ((*i)[0] == '$' && !IN_SET((*i)[1], '{', '$')) {
char *e;
char **w, **m = NULL;
size_t q;
e = strv_env_get(env, *i+1);
if (e) {
int r;
r = strv_split_full(&m, e, WHITESPACE, EXTRACT_RELAX|EXTRACT_UNQUOTE);
if (r < 0) {
ret[k] = NULL;
strv_free(ret);
return NULL;
}
} else
m = NULL;
q = strv_length(m);
l = l + q - 1;
w = reallocarray(ret, l + 1, sizeof(char *));
if (!w) {
ret[k] = NULL;
strv_free(ret);
strv_free(m);
return NULL;
}
ret = w;
if (m) {
memcpy(ret + k, m, q * sizeof(char*));
free(m);
}
k += q;
continue;
}
/* If ${FOO} appears as part of a word, replace it by the variable as-is */
ret[k] = replace_env(*i, env, 0);
if (!ret[k]) {
strv_free(ret);
return NULL;
}
k++;
}
ret[k] = NULL;
return ret;
}
int getenv_bool(const char *p) {
const char *e;
e = getenv(p);
if (!e)
return -ENXIO;
return parse_boolean(e);
}
int getenv_bool_secure(const char *p) {
const char *e;
e = secure_getenv(p);
if (!e)
return -ENXIO;
return parse_boolean(e);
}