basic/escape: add truncation to xescape too

This does for ASCII and non-unicode encodings what utf8_escape_non_printable_full()
does for utf8-based encodings.
This commit is contained in:
Zbigniew Jędrzejewski-Szmek 2019-05-16 13:12:37 +02:00
parent 390f0875e4
commit 70d558199c
3 changed files with 96 additions and 17 deletions

View file

@ -368,33 +368,71 @@ int cunescape(const char *s, UnescapeFlags flags, char **ret) {
return cunescape_length(s, strlen(s), flags, ret);
}
char *xescape(const char *s, const char *bad) {
char *r, *t;
char *xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) {
char *ans, *t, *prev, *prev2;
const char *f;
/* Escapes all chars in bad, in addition to \ and all special
* chars, in \xFF style escaping. May be reversed with
* cunescape(). */
/* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be
* reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged.
* This corresponds to non-ASCII printable characters in pre-unicode encodings.
*
* If console_width is reached, output is truncated and "..." is appended. */
r = new(char, strlen(s) * 4 + 1);
if (!r)
if (console_width == 0)
return strdup("");
ans = new(char, MIN(strlen(s), console_width) * 4 + 1);
if (!ans)
return NULL;
for (f = s, t = r; *f; f++) {
memset(ans, '_', MIN(strlen(s), console_width) * 4);
ans[MIN(strlen(s), console_width) * 4] = 0;
for (f = s, t = prev = prev2 = ans; ; f++) {
char *tmp_t = t;
if (!*f) {
*t = 0;
return ans;
}
if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) ||
*f == '\\' || strchr(bad, *f)) {
if ((size_t) (t - ans) + 4 > console_width)
break;
if ((*f < ' ') || (*f >= 127) ||
(*f == '\\') || strchr(bad, *f)) {
*(t++) = '\\';
*(t++) = 'x';
*(t++) = hexchar(*f >> 4);
*(t++) = hexchar(*f);
} else
} else {
if ((size_t) (t - ans) + 1 > console_width)
break;
*(t++) = *f;
}
/* We might need to go back two cycles to fit three dots, so remember two positions */
prev2 = prev;
prev = tmp_t;
}
*t = 0;
/* We can just write where we want, since chars are one-byte */
size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */
size_t off;
if (console_width - c >= (size_t) (t - ans))
off = (size_t) (t - ans);
else if (console_width - c >= (size_t) (prev - ans))
off = (size_t) (prev - ans);
else if (console_width - c >= (size_t) (prev2 - ans))
off = (size_t) (prev2 - ans);
else
off = console_width - c;
assert(off <= (size_t) (t - ans));
return r;
memcpy(ans + off, "...", c);
ans[off + c] = '\0';
return ans;
}
char *octescape(const char *s, size_t len) {

View file

@ -46,7 +46,10 @@ int cunescape_length(const char *s, size_t length, UnescapeFlags flags, char **r
int cunescape_length_with_prefix(const char *s, size_t length, const char *prefix, UnescapeFlags flags, char **ret);
int cunescape_one(const char *p, size_t length, char32_t *ret, bool *eight_bit);
char *xescape(const char *s, const char *bad);
char *xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits);
static inline char *xescape(const char *s, const char *bad) {
return xescape_full(s, bad, SIZE_MAX, false);
}
char *octescape(const char *s, size_t len);
char *shell_escape(const char *s, const char *bad);

View file

@ -6,10 +6,45 @@
#include "tests.h"
static void test_cescape(void) {
_cleanup_free_ char *escaped;
_cleanup_free_ char *t;
assert_se(escaped = cescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313"));
assert_se(streq(escaped, "abc\\\\\\\"\\b\\f\\n\\r\\t\\v\\a\\003\\177\\234\\313"));
assert_se(t = cescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313"));
assert_se(streq(t, "abc\\\\\\\"\\b\\f\\n\\r\\t\\v\\a\\003\\177\\234\\313"));
}
static void test_xescape(void) {
_cleanup_free_ char *t;
assert_se(t = xescape("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", ""));
assert_se(streq(t, "abc\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\\x7f\\x9c\\xcb"));
}
static void test_xescape_full(bool eight_bits) {
const char* escaped = !eight_bits ?
"a\\x62c\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\\x7f\\x9c\\xcb" :
"a\\x62c\\x5c\"\\x08\\x0c\\x0a\\x0d\\x09\\x0b\\x07\\x03\177\234\313";
const unsigned full_fit = !eight_bits ? 55 : 46;
for (unsigned i = 0; i < 60; i++) {
_cleanup_free_ char *t;
assert_se(t = xescape_full("abc\\\"\b\f\n\r\t\v\a\003\177\234\313", "b", i, eight_bits));
log_info("%02d: %s", i, t);
if (i >= full_fit)
assert_se(streq(t, escaped));
else if (i >= 3) {
/* We need up to four columns, so up to three three columns may be wasted */
assert_se(strlen(t) == i || strlen(t) == i - 1 || strlen(t) == i - 2 || strlen(t) == i - 3);
assert_se(strneq(t, escaped, i - 3) || strneq(t, escaped, i - 4) ||
strneq(t, escaped, i - 5) || strneq(t, escaped, i - 6));
assert_se(endswith(t, "..."));
} else {
assert_se(strlen(t) == i);
assert_se(strneq(t, "...", i));
}
}
}
static void test_cunescape(void) {
@ -123,6 +158,9 @@ int main(int argc, char *argv[]) {
test_setup_logging(LOG_DEBUG);
test_cescape();
test_xescape();
test_xescape_full(false);
test_xescape_full(true);
test_cunescape();
test_shell_escape();
test_shell_maybe_quote();