Systemd/src/basic/string-util.c
Zbigniew Jędrzejewski-Szmek 481a2b02a3 Always use unicode ellipsis when ellipsizing
We were already unconditionally using the unicode character when the
input string was not pure ASCII, leading to different behaviour in
depending on the input string.

systemd[1]: Starting printit.service.
python3[19962]: foooooooooooooooooooooooooooooooooooo…oooo
python3[19964]: fooąęoooooooooooooooooooooooooooooooo…oooo
python3[19966]: fooąęoooooooooooooooooooooooooooooooo…ąęąę
python3[19968]: fooąęoooooooooooooooooąęąęąęąęąęąęąęą…ąęąę
systemd[1]: Started printit.service.
2016-09-13 20:10:57 -04:00

871 lines
20 KiB
C

/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "alloc-util.h"
#include "gunicode.h"
#include "macro.h"
#include "string-util.h"
#include "utf8.h"
#include "util.h"
int strcmp_ptr(const char *a, const char *b) {
/* Like strcmp(), but tries to make sense of NULL pointers */
if (a && b)
return strcmp(a, b);
if (!a && b)
return -1;
if (a && !b)
return 1;
return 0;
}
char* endswith(const char *s, const char *postfix) {
size_t sl, pl;
assert(s);
assert(postfix);
sl = strlen(s);
pl = strlen(postfix);
if (pl == 0)
return (char*) s + sl;
if (sl < pl)
return NULL;
if (memcmp(s + sl - pl, postfix, pl) != 0)
return NULL;
return (char*) s + sl - pl;
}
char* endswith_no_case(const char *s, const char *postfix) {
size_t sl, pl;
assert(s);
assert(postfix);
sl = strlen(s);
pl = strlen(postfix);
if (pl == 0)
return (char*) s + sl;
if (sl < pl)
return NULL;
if (strcasecmp(s + sl - pl, postfix) != 0)
return NULL;
return (char*) s + sl - pl;
}
char* first_word(const char *s, const char *word) {
size_t sl, wl;
const char *p;
assert(s);
assert(word);
/* Checks if the string starts with the specified word, either
* followed by NUL or by whitespace. Returns a pointer to the
* NUL or the first character after the whitespace. */
sl = strlen(s);
wl = strlen(word);
if (sl < wl)
return NULL;
if (wl == 0)
return (char*) s;
if (memcmp(s, word, wl) != 0)
return NULL;
p = s + wl;
if (*p == 0)
return (char*) p;
if (!strchr(WHITESPACE, *p))
return NULL;
p += strspn(p, WHITESPACE);
return (char*) p;
}
static size_t strcspn_escaped(const char *s, const char *reject) {
bool escaped = false;
int n;
for (n=0; s[n]; n++) {
if (escaped)
escaped = false;
else if (s[n] == '\\')
escaped = true;
else if (strchr(reject, s[n]))
break;
}
/* if s ends in \, return index of previous char */
return n - escaped;
}
/* Split a string into words. */
const char* split(const char **state, size_t *l, const char *separator, bool quoted) {
const char *current;
current = *state;
if (!*current) {
assert(**state == '\0');
return NULL;
}
current += strspn(current, separator);
if (!*current) {
*state = current;
return NULL;
}
if (quoted && strchr("\'\"", *current)) {
char quotechars[2] = {*current, '\0'};
*l = strcspn_escaped(current + 1, quotechars);
if (current[*l + 1] == '\0' || current[*l + 1] != quotechars[0] ||
(current[*l + 2] && !strchr(separator, current[*l + 2]))) {
/* right quote missing or garbage at the end */
*state = current;
return NULL;
}
*state = current++ + *l + 2;
} else if (quoted) {
*l = strcspn_escaped(current, separator);
if (current[*l] && !strchr(separator, current[*l])) {
/* unfinished escape */
*state = current;
return NULL;
}
*state = current + *l;
} else {
*l = strcspn(current, separator);
*state = current + *l;
}
return current;
}
char *strnappend(const char *s, const char *suffix, size_t b) {
size_t a;
char *r;
if (!s && !suffix)
return strdup("");
if (!s)
return strndup(suffix, b);
if (!suffix)
return strdup(s);
assert(s);
assert(suffix);
a = strlen(s);
if (b > ((size_t) -1) - a)
return NULL;
r = new(char, a+b+1);
if (!r)
return NULL;
memcpy(r, s, a);
memcpy(r+a, suffix, b);
r[a+b] = 0;
return r;
}
char *strappend(const char *s, const char *suffix) {
return strnappend(s, suffix, suffix ? strlen(suffix) : 0);
}
char *strjoin(const char *x, ...) {
va_list ap;
size_t l;
char *r, *p;
va_start(ap, x);
if (x) {
l = strlen(x);
for (;;) {
const char *t;
size_t n;
t = va_arg(ap, const char *);
if (!t)
break;
n = strlen(t);
if (n > ((size_t) -1) - l) {
va_end(ap);
return NULL;
}
l += n;
}
} else
l = 0;
va_end(ap);
r = new(char, l+1);
if (!r)
return NULL;
if (x) {
p = stpcpy(r, x);
va_start(ap, x);
for (;;) {
const char *t;
t = va_arg(ap, const char *);
if (!t)
break;
p = stpcpy(p, t);
}
va_end(ap);
} else
r[0] = 0;
return r;
}
char *strstrip(char *s) {
char *e;
/* Drops trailing whitespace. Modifies the string in
* place. Returns pointer to first non-space character */
s += strspn(s, WHITESPACE);
for (e = strchr(s, 0); e > s; e --)
if (!strchr(WHITESPACE, e[-1]))
break;
*e = 0;
return s;
}
char *delete_chars(char *s, const char *bad) {
char *f, *t;
/* Drops all whitespace, regardless where in the string */
for (f = s, t = s; *f; f++) {
if (strchr(bad, *f))
continue;
*(t++) = *f;
}
*t = 0;
return s;
}
char *truncate_nl(char *s) {
assert(s);
s[strcspn(s, NEWLINE)] = 0;
return s;
}
char ascii_tolower(char x) {
if (x >= 'A' && x <= 'Z')
return x - 'A' + 'a';
return x;
}
char ascii_toupper(char x) {
if (x >= 'a' && x <= 'z')
return x - 'a' + 'A';
return x;
}
char *ascii_strlower(char *t) {
char *p;
assert(t);
for (p = t; *p; p++)
*p = ascii_tolower(*p);
return t;
}
char *ascii_strupper(char *t) {
char *p;
assert(t);
for (p = t; *p; p++)
*p = ascii_toupper(*p);
return t;
}
char *ascii_strlower_n(char *t, size_t n) {
size_t i;
if (n <= 0)
return t;
for (i = 0; i < n; i++)
t[i] = ascii_tolower(t[i]);
return t;
}
int ascii_strcasecmp_n(const char *a, const char *b, size_t n) {
for (; n > 0; a++, b++, n--) {
int x, y;
x = (int) (uint8_t) ascii_tolower(*a);
y = (int) (uint8_t) ascii_tolower(*b);
if (x != y)
return x - y;
}
return 0;
}
int ascii_strcasecmp_nn(const char *a, size_t n, const char *b, size_t m) {
int r;
r = ascii_strcasecmp_n(a, b, MIN(n, m));
if (r != 0)
return r;
if (n < m)
return -1;
else if (n > m)
return 1;
else
return 0;
}
bool chars_intersect(const char *a, const char *b) {
const char *p;
/* Returns true if any of the chars in a are in b. */
for (p = a; *p; p++)
if (strchr(b, *p))
return true;
return false;
}
bool string_has_cc(const char *p, const char *ok) {
const char *t;
assert(p);
/*
* Check if a string contains control characters. If 'ok' is
* non-NULL it may be a string containing additional CCs to be
* considered OK.
*/
for (t = p; *t; t++) {
if (ok && strchr(ok, *t))
continue;
if (*t > 0 && *t < ' ')
return true;
if (*t == 127)
return true;
}
return false;
}
static char *ascii_ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
size_t x;
char *r;
assert(s);
assert(percent <= 100);
assert(new_length >= 3);
if (old_length <= 3 || old_length <= new_length)
return strndup(s, old_length);
r = new0(char, new_length+3);
if (!r)
return NULL;
x = (new_length * percent) / 100;
if (x > new_length - 3)
x = new_length - 3;
memcpy(r, s, x);
r[x] = 0xe2; /* tri-dot ellipsis: … */
r[x+1] = 0x80;
r[x+2] = 0xa6;
memcpy(r + x + 3,
s + old_length - (new_length - x - 1),
new_length - x - 1);
return r;
}
char *ellipsize_mem(const char *s, size_t old_length, size_t new_length, unsigned percent) {
size_t x;
char *e;
const char *i, *j;
unsigned k, len, len2;
int r;
assert(s);
assert(percent <= 100);
assert(new_length >= 3);
/* if no multibyte characters use ascii_ellipsize_mem for speed */
if (ascii_is_valid(s))
return ascii_ellipsize_mem(s, old_length, new_length, percent);
if (old_length <= 3 || old_length <= new_length)
return strndup(s, old_length);
x = (new_length * percent) / 100;
if (x > new_length - 3)
x = new_length - 3;
k = 0;
for (i = s; k < x && i < s + old_length; i = utf8_next_char(i)) {
char32_t c;
r = utf8_encoded_to_unichar(i, &c);
if (r < 0)
return NULL;
k += unichar_iswide(c) ? 2 : 1;
}
if (k > x) /* last character was wide and went over quota */
x++;
for (j = s + old_length; k < new_length && j > i; ) {
char32_t c;
j = utf8_prev_char(j);
r = utf8_encoded_to_unichar(j, &c);
if (r < 0)
return NULL;
k += unichar_iswide(c) ? 2 : 1;
}
assert(i <= j);
/* we don't actually need to ellipsize */
if (i == j)
return memdup(s, old_length + 1);
/* make space for ellipsis */
j = utf8_next_char(j);
len = i - s;
len2 = s + old_length - j;
e = new(char, len + 3 + len2 + 1);
if (!e)
return NULL;
/*
printf("old_length=%zu new_length=%zu x=%zu len=%u len2=%u k=%u\n",
old_length, new_length, x, len, len2, k);
*/
memcpy(e, s, len);
e[len] = 0xe2; /* tri-dot ellipsis: … */
e[len + 1] = 0x80;
e[len + 2] = 0xa6;
memcpy(e + len + 3, j, len2 + 1);
return e;
}
char *ellipsize(const char *s, size_t length, unsigned percent) {
return ellipsize_mem(s, strlen(s), length, percent);
}
bool nulstr_contains(const char*nulstr, const char *needle) {
const char *i;
if (!nulstr)
return false;
NULSTR_FOREACH(i, nulstr)
if (streq(i, needle))
return true;
return false;
}
char* strshorten(char *s, size_t l) {
assert(s);
if (l < strlen(s))
s[l] = 0;
return s;
}
char *strreplace(const char *text, const char *old_string, const char *new_string) {
const char *f;
char *t, *r;
size_t l, old_len, new_len;
assert(text);
assert(old_string);
assert(new_string);
old_len = strlen(old_string);
new_len = strlen(new_string);
l = strlen(text);
r = new(char, l+1);
if (!r)
return NULL;
f = text;
t = r;
while (*f) {
char *a;
size_t d, nl;
if (!startswith(f, old_string)) {
*(t++) = *(f++);
continue;
}
d = t - r;
nl = l - old_len + new_len;
a = realloc(r, nl + 1);
if (!a)
goto oom;
l = nl;
r = a;
t = r + d;
t = stpcpy(t, new_string);
f += old_len;
}
*t = 0;
return r;
oom:
free(r);
return NULL;
}
char *strip_tab_ansi(char **ibuf, size_t *_isz) {
const char *i, *begin = NULL;
enum {
STATE_OTHER,
STATE_ESCAPE,
STATE_BRACKET
} state = STATE_OTHER;
char *obuf = NULL;
size_t osz = 0, isz;
FILE *f;
assert(ibuf);
assert(*ibuf);
/* Strips ANSI color and replaces TABs by 8 spaces */
isz = _isz ? *_isz : strlen(*ibuf);
f = open_memstream(&obuf, &osz);
if (!f)
return NULL;
for (i = *ibuf; i < *ibuf + isz + 1; i++) {
switch (state) {
case STATE_OTHER:
if (i >= *ibuf + isz) /* EOT */
break;
else if (*i == '\x1B')
state = STATE_ESCAPE;
else if (*i == '\t')
fputs(" ", f);
else
fputc(*i, f);
break;
case STATE_ESCAPE:
if (i >= *ibuf + isz) { /* EOT */
fputc('\x1B', f);
break;
} else if (*i == '[') {
state = STATE_BRACKET;
begin = i + 1;
} else {
fputc('\x1B', f);
fputc(*i, f);
state = STATE_OTHER;
}
break;
case STATE_BRACKET:
if (i >= *ibuf + isz || /* EOT */
(!(*i >= '0' && *i <= '9') && *i != ';' && *i != 'm')) {
fputc('\x1B', f);
fputc('[', f);
state = STATE_OTHER;
i = begin-1;
} else if (*i == 'm')
state = STATE_OTHER;
break;
}
}
if (ferror(f)) {
fclose(f);
free(obuf);
return NULL;
}
fclose(f);
free(*ibuf);
*ibuf = obuf;
if (_isz)
*_isz = osz;
return obuf;
}
char *strextend(char **x, ...) {
va_list ap;
size_t f, l;
char *r, *p;
assert(x);
l = f = *x ? strlen(*x) : 0;
va_start(ap, x);
for (;;) {
const char *t;
size_t n;
t = va_arg(ap, const char *);
if (!t)
break;
n = strlen(t);
if (n > ((size_t) -1) - l) {
va_end(ap);
return NULL;
}
l += n;
}
va_end(ap);
r = realloc(*x, l+1);
if (!r)
return NULL;
p = r + f;
va_start(ap, x);
for (;;) {
const char *t;
t = va_arg(ap, const char *);
if (!t)
break;
p = stpcpy(p, t);
}
va_end(ap);
*p = 0;
*x = r;
return r + l;
}
char *strrep(const char *s, unsigned n) {
size_t l;
char *r, *p;
unsigned i;
assert(s);
l = strlen(s);
p = r = malloc(l * n + 1);
if (!r)
return NULL;
for (i = 0; i < n; i++)
p = stpcpy(p, s);
*p = 0;
return r;
}
int split_pair(const char *s, const char *sep, char **l, char **r) {
char *x, *a, *b;
assert(s);
assert(sep);
assert(l);
assert(r);
if (isempty(sep))
return -EINVAL;
x = strstr(s, sep);
if (!x)
return -EINVAL;
a = strndup(s, x - s);
if (!a)
return -ENOMEM;
b = strdup(x + strlen(sep));
if (!b) {
free(a);
return -ENOMEM;
}
*l = a;
*r = b;
return 0;
}
int free_and_strdup(char **p, const char *s) {
char *t;
assert(p);
/* Replaces a string pointer with an strdup()ed new string,
* possibly freeing the old one. */
if (streq_ptr(*p, s))
return 0;
if (s) {
t = strdup(s);
if (!t)
return -ENOMEM;
} else
t = NULL;
free(*p);
*p = t;
return 1;
}
/*
* Pointer to memset is volatile so that compiler must de-reference
* the pointer and can't assume that it points to any function in
* particular (such as memset, which it then might further "optimize")
* This approach is inspired by openssl's crypto/mem_clr.c.
*/
typedef void *(*memset_t)(void *,int,size_t);
static volatile memset_t memset_func = memset;
void* memory_erase(void *p, size_t l) {
return memset_func(p, 'x', l);
}
char* string_erase(char *x) {
if (!x)
return NULL;
/* A delicious drop of snake-oil! To be called on memory where
* we stored passphrases or so, after we used them. */
return memory_erase(x, strlen(x));
}
char *string_free_erase(char *s) {
return mfree(string_erase(s));
}
bool string_is_safe(const char *p) {
const char *t;
if (!p)
return false;
for (t = p; *t; t++) {
if (*t > 0 && *t < ' ') /* no control characters */
return false;
if (strchr(QUOTES "\\\x7f", *t))
return false;
}
return true;
}