shared/utf8: add utf8_is_valid_n()
Sometimes we need to check strings without the terminating NUL. Add a variant that does that.
This commit is contained in:
parent
e12b6e1951
commit
80ab31a435
|
@ -150,18 +150,22 @@ bool utf8_is_printable_newline(const char* str, size_t length, bool allow_newlin
|
|||
return true;
|
||||
}
|
||||
|
||||
char *utf8_is_valid(const char *str) {
|
||||
const char *p;
|
||||
char *utf8_is_valid_n(const char *str, size_t len_bytes) {
|
||||
/* Check if the string is composed of valid utf8 characters. If length len_bytes is given, stop after
|
||||
* len_bytes. Otherwise, stop at NUL. */
|
||||
|
||||
assert(str);
|
||||
|
||||
p = str;
|
||||
while (*p) {
|
||||
for (const char *p = str; len_bytes != (size_t) -1 ? (size_t) (p - str) < len_bytes : *p != '\0'; ) {
|
||||
int len;
|
||||
|
||||
len = utf8_encoded_valid_unichar(p, (size_t) -1);
|
||||
if (len < 0)
|
||||
return NULL;
|
||||
if (_unlikely_(*p == '\0') && len_bytes != (size_t) -1)
|
||||
return NULL; /* embedded NUL */
|
||||
|
||||
len = utf8_encoded_valid_unichar(p,
|
||||
len_bytes != (size_t) -1 ? len_bytes - (p - str) : (size_t) -1);
|
||||
if (_unlikely_(len < 0))
|
||||
return NULL; /* invalid character */
|
||||
|
||||
p += len;
|
||||
}
|
||||
|
|
|
@ -14,7 +14,10 @@
|
|||
|
||||
bool unichar_is_valid(char32_t c);
|
||||
|
||||
char *utf8_is_valid(const char *s) _pure_;
|
||||
char *utf8_is_valid_n(const char *str, size_t len_bytes) _pure_;
|
||||
static inline char *utf8_is_valid(const char *s) {
|
||||
return utf8_is_valid_n(s, (size_t) -1);
|
||||
}
|
||||
char *ascii_is_valid(const char *s) _pure_;
|
||||
char *ascii_is_valid_n(const char *str, size_t len);
|
||||
|
||||
|
|
|
@ -18,6 +18,25 @@ static void test_utf8_is_printable(void) {
|
|||
assert_se(utf8_is_printable("\t", 1));
|
||||
}
|
||||
|
||||
static void test_utf8_n_is_valid(void) {
|
||||
log_info("/* %s */", __func__);
|
||||
|
||||
assert_se( utf8_is_valid_n("ascii is valid unicode", 21));
|
||||
assert_se( utf8_is_valid_n("ascii is valid unicode", 22));
|
||||
assert_se(!utf8_is_valid_n("ascii is valid unicode", 23));
|
||||
assert_se( utf8_is_valid_n("\342\204\242", 0));
|
||||
assert_se(!utf8_is_valid_n("\342\204\242", 1));
|
||||
assert_se(!utf8_is_valid_n("\342\204\242", 2));
|
||||
assert_se( utf8_is_valid_n("\342\204\242", 3));
|
||||
assert_se(!utf8_is_valid_n("\342\204\242", 4));
|
||||
assert_se( utf8_is_valid_n("<ZZ>", 0));
|
||||
assert_se( utf8_is_valid_n("<ZZ>", 1));
|
||||
assert_se( utf8_is_valid_n("<ZZ>", 2));
|
||||
assert_se( utf8_is_valid_n("<ZZ>", 3));
|
||||
assert_se( utf8_is_valid_n("<ZZ>", 4));
|
||||
assert_se(!utf8_is_valid_n("<ZZ>", 5));
|
||||
}
|
||||
|
||||
static void test_utf8_is_valid(void) {
|
||||
log_info("/* %s */", __func__);
|
||||
|
||||
|
@ -216,6 +235,7 @@ static void test_utf8_to_utf16(void) {
|
|||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
test_utf8_n_is_valid();
|
||||
test_utf8_is_valid();
|
||||
test_utf8_is_printable();
|
||||
test_ascii_is_valid();
|
||||
|
|
Loading…
Reference in a new issue