string-util: teach strip_tab_ansi() to deal with CSO sequences

With the recent terminal_urlify() APIs we'll now sometimes generate
clickable link CSO sequences. Hence we should also be able to remove
them again from strings. This beefs up the logic to do so.

Follow-up for: 23b27b39d2
This commit is contained in:
Lennart Poettering 2018-04-20 15:16:13 +02:00
parent 679def2a06
commit 695a944c64
2 changed files with 58 additions and 8 deletions

View File

@ -694,7 +694,8 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
enum {
STATE_OTHER,
STATE_ESCAPE,
STATE_BRACKET
STATE_CSI,
STATE_CSO,
} state = STATE_OTHER;
char *obuf = NULL;
size_t osz = 0, isz, shift[2] = {};
@ -703,7 +704,17 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
assert(ibuf);
assert(*ibuf);
/* Strips ANSI color and replaces TABs by 8 spaces */
/* This does three things:
*
* 1. Replaces TABs by 8 spaces
* 2. Strips ANSI color sequences (a subset of CSI), i.e. ESC '[' 'm' sequences
* 3. Strips ANSI operating system sequences (CSO), i.e. ESC ']' BEL sequences
*
* Everything else will be left as it is. In particular other ANSI sequences are left as they are, as are any
* other special characters. Truncated ANSI sequences are left-as is too. This call is supposed to suppress the
* most basic formatting noise, but nothing else.
*
* Why care for CSO sequences? Well, to undo what terminal_urlify() and friends generate. */
isz = _isz ? *_isz : strlen(*ibuf);
@ -738,8 +749,11 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
fputc('\x1B', f);
advance_offsets(i - *ibuf, highlight, shift, 1);
break;
} else if (*i == '[') {
state = STATE_BRACKET;
} else if (*i == '[') { /* ANSI CSI */
state = STATE_CSI;
begin = i + 1;
} else if (*i == ']') { /* ANSI CSO */
state = STATE_CSO;
begin = i + 1;
} else {
fputc('\x1B', f);
@ -750,10 +764,10 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
break;
case STATE_BRACKET:
case STATE_CSI:
if (i >= *ibuf + isz || /* EOT */
(!(*i >= '0' && *i <= '9') && !IN_SET(*i, ';', 'm'))) {
if (i >= *ibuf + isz || /* EOT */
!strchr("01234567890;m", *i)) { /* … or invalid chars in sequence */
fputc('\x1B', f);
fputc('[', f);
advance_offsets(i - *ibuf, highlight, shift, 2);
@ -761,6 +775,21 @@ char *strip_tab_ansi(char **ibuf, size_t *_isz, size_t highlight[2]) {
i = begin-1;
} else if (*i == 'm')
state = STATE_OTHER;
break;
case STATE_CSO:
if (i >= *ibuf + isz || /* EOT … */
(*i != '\a' && (uint8_t) *i < 32U) || (uint8_t) *i > 126U) { /* … or invalid chars in sequence */
fputc('\x1B', f);
fputc(']', f);
advance_offsets(i - *ibuf, highlight, shift, 2);
state = STATE_OTHER;
i = begin-1;
} else if (*i == '\a')
state = STATE_OTHER;
break;
}
}

View File

@ -7,12 +7,14 @@
#include <stdio.h>
#include "alloc-util.h"
#include "string-util.h"
#include "terminal-util.h"
#include "util.h"
int main(int argc, char *argv[]) {
char *p;
_cleanup_free_ char *urlified = NULL, *q = NULL, *qq = NULL;
char *p, *z;
assert_se(p = strdup("\tFoobar\tbar\twaldo\t"));
assert_se(strip_tab_ansi(&p, NULL, NULL));
@ -36,5 +38,24 @@ int main(int argc, char *argv[]) {
assert_se(streq(p, "\x1B[waldo"));
free(p);
assert_se(terminal_urlify_path("/etc/fstab", "i am a fabulous link", &urlified) >= 0);
assert_se(p = strjoin("something ", urlified, " something-else"));
assert_se(q = strdup(p));
printf("<%s>\n", p);
assert_se(strip_tab_ansi(&p, NULL, NULL));
printf("<%s>\n", p);
assert_se(streq(p, "something i am a fabulous link something-else"));
p = mfree(p);
/* Truncate the formatted string in the middle of an ANSI sequence (in which case we shouldn't touch the
* incomplete sequence) */
z = strstr(q, "fstab");
if (z) {
*z = 0;
assert_se(qq = strdup(q));
assert_se(strip_tab_ansi(&q, NULL, NULL));
assert_se(streq(q, qq));
}
return 0;
}