Fix posix/tst-regex by using UTF-8 and own test input

Problem reported by Stefan Liebler in:
https://sourceware.org/ml/libc-alpha/2019-08/msg00658.html
* posix/tst-regex.c: Convert this file from Latin-1 to UTF-8.
(do_test, test_expr): Adjust to the fact that this source file,
and the test data in ChangeLog.8, is now UTF-8 instead of Latin-1.
* posix/tst-regex.input: Copy from ChangeLog.old/ChangeLog.8,
so that it is now UTF-8.
This commit is contained in:
Paul Eggert 2019-08-29 02:34:13 -07:00
parent 0b3c9e57a4
commit a22a582e49
3 changed files with 46 additions and 36 deletions

View File

@ -1,3 +1,14 @@
2019-08-27 Paul Eggert <eggert@cs.ucla.edu>
Fix posix/tst-regex by using UTF-8 and own test input
Problem reported by Stefan Liebler in:
https://sourceware.org/ml/libc-alpha/2019-08/msg00658.html
* posix/tst-regex.c: Convert this file from Latin-1 to UTF-8.
(do_test, test_expr): Adjust to the fact that this source file,
and the test data in ChangeLog.8, is now UTF-8 instead of Latin-1.
* posix/tst-regex.input: Copy from ChangeLog.old/ChangeLog.8,
so that it is now UTF-8.
2019-08-28 Paul A. Clarke <pc@us.ibm.com>
* sysdeps/powerpc/fpu/fenv_libc.h (fegetenv_status_ISA300): Delete.

View File

@ -86,27 +86,26 @@ do_test (void)
close (fd);
/* We have to convert a few things from Latin-1 to UTF-8. */
cd = iconv_open ("UTF-8", "ISO-8859-1");
/* We have to convert a few things from UTF-8 to Latin-1. */
cd = iconv_open ("ISO-8859-1", "UTF-8");
if (cd == (iconv_t) -1)
error (EXIT_FAILURE, errno, "cannot get conversion descriptor");
/* For the second test we have to convert the file content to UTF-8.
Since the text is mostly ASCII it should be enough to allocate
twice as much memory for the UTF-8 text than for the Latin-1
text. */
umem = (char *) calloc (2, memlen);
/* For the second test we have to convert the file content to Latin-1.
This cannot grow the data. */
umem = (char *) malloc (memlen + 1);
if (umem == NULL)
error (EXIT_FAILURE, errno, "while allocating buffer");
inmem = mem;
inlen = memlen;
outmem = umem;
outlen = 2 * memlen - 1;
outlen = memlen;
iconv (cd, &inmem, &inlen, &outmem, &outlen);
umemlen = outmem - umem;
if (inlen != 0)
error (EXIT_FAILURE, errno, "cannot convert buffer");
umem[umemlen] = '\0';
#if defined _POSIX_CPUTIME && _POSIX_CPUTIME >= 0
# if _POSIX_CPUTIME == 0
@ -124,11 +123,11 @@ do_test (void)
/* Run the actual tests. All tests are run in a single-byte and a
multi-byte locale. */
result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 2, 2);
result = test_expr ("[äáàâéèêíìîñöóòôüúùû]", 4, 4);
result |= test_expr ("G.ran", 2, 3);
result |= test_expr ("G.\\{1\\}ran", 2, 3);
result |= test_expr ("G.*ran", 3, 44);
result |= test_expr ("[äáàâ]", 0, 0);
result |= test_expr ("[äáàâ]", 0, 0);
result |= test_expr ("Uddeborg", 2, 2);
result |= test_expr (".Uddeborg", 2, 2);
@ -151,27 +150,27 @@ test_expr (const char *expr, int expected, int expectedicase)
size_t outlen;
char *uexpr;
/* First test: search with an ISO-8859-1 locale. */
if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
printf ("\nTest \"%s\" with 8-bit locale\n", expr);
result = run_test (expr, mem, memlen, 0, expected);
printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
result |= run_test (expr, mem, memlen, 1, expectedicase);
printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
result |= run_test_backwards (expr, mem, memlen, 0, expected);
printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
expr);
result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
/* Second test: search with an UTF-8 locale. */
/* First test: search with an UTF-8 locale. */
if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
error (EXIT_FAILURE, 0, "cannot set locale de_DE.UTF-8");
printf ("\nTest \"%s\" with multi-byte locale\n", expr);
result = run_test (expr, mem, memlen, 0, expected);
printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
result |= run_test (expr, mem, memlen, 1, expectedicase);
printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
result |= run_test_backwards (expr, mem, memlen, 0, expected);
printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
expr);
result |= run_test_backwards (expr, mem, memlen, 1, expectedicase);
/* Second test: search with an ISO-8859-1 locale. */
if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
error (EXIT_FAILURE, 0, "cannot set locale de_DE.ISO-8859-1");
inmem = (char *) expr;
inlen = strlen (expr);
outlen = inlen * MB_CUR_MAX;
outlen = inlen;
outmem = uexpr = alloca (outlen + 1);
memset (outmem, '\0', outlen + 1);
iconv (cd, &inmem, &inlen, &outmem, &outlen);
@ -179,13 +178,13 @@ test_expr (const char *expr, int expected, int expectedicase)
error (EXIT_FAILURE, errno, "cannot convert expression");
/* Run the tests. */
printf ("\nTest \"%s\" with multi-byte locale\n", expr);
printf ("\nTest \"%s\" with 8-bit locale\n", expr);
result |= run_test (uexpr, umem, umemlen, 0, expected);
printf ("\nTest \"%s\" with multi-byte locale, case insensitive\n", expr);
printf ("\nTest \"%s\" with 8-bit locale, case insensitive\n", expr);
result |= run_test (uexpr, umem, umemlen, 1, expectedicase);
printf ("\nTest \"%s\" backwards with multi-byte locale\n", expr);
printf ("\nTest \"%s\" backwards with 8-bit locale\n", expr);
result |= run_test_backwards (uexpr, umem, umemlen, 0, expected);
printf ("\nTest \"%s\" backwards with multi-byte locale, case insensitive\n",
printf ("\nTest \"%s\" backwards with 8-bit locale, case insensitive\n",
expr);
result |= run_test_backwards (uexpr, umem, umemlen, 1, expectedicase);

View File

@ -6025,7 +6025,7 @@
(Host Address Functions): Use uint32_t consequently and add a
number of clarifications for IPv4/IPv6, classless addresses.
(Internet Namespace): Added some paragraphs about IPv6.
Based on suggestions by Francesco Potorti` <F.Potorti@cnuce.cnr.it>.
Based on suggestions by Francesco Potortì <F.Potorti@cnuce.cnr.it>.
1998-04-05 Philip Blundell <Philip.Blundell@pobox.com>
@ -6565,7 +6565,7 @@
* manual/examples/mkfsock.c (make_named_socket): Removed blank
lines for clarification.
(make_named_socket): Use strncpy instead of strcpy.
Reported by Francesco Potorti` <F.Potorti@cnuce.cnr.it>.
Reported by Francesco Potortì <F.Potorti@cnuce.cnr.it>.
1998-03-30 13:28 Ulrich Drepper <drepper@cygnus.com>
@ -7975,7 +7975,7 @@
* sysdeps/generic/getresuid.c (__getresuid): Use ISO C
declaration style to avoid warnings.
1998-03-06 11:48 Mark M._Kettenis <kettenis@hall.phys.uva.nl>
1998-03-06 11:48 Mark M. Kettenis <kettenis@hall.phys.uva.nl>
* elf/rtld.c (process_dl_debug): Fix typo: "DL_DEBUG" ->
"LD_DEBUG".
@ -8314,7 +8314,7 @@
1998-02-27 Ulrich Drepper <drepper@cygnus.com>
* misc/efgcvt_r.c (APPEND): Handle printing of 0.0 correctly.
Reported by Göran Uddeborg <goeran@uddeborg.pp.se>.
Reported by Göran Uddeborg <goeran@uddeborg.pp.se>.
* misc/tst-efgcvt.c (ecvt_tests): Add new test case for reported
bug.
@ -8322,7 +8322,7 @@
1998-02-25 Andreas Jaeger <aj@arthur.rhein-neckar.de>
* manual/arith.texi (Old-style number conversion): Correct
typo. Reported by Göran Uddeborg <goeran@uddeborg.pp.se>.
typo. Reported by Göran Uddeborg <goeran@uddeborg.pp.se>.
1998-02-27 Ulrich Drepper <drepper@cygnus.com>
@ -12044,7 +12044,7 @@
* libio/stdio.h: Correct comment of sys_nerr/sys_errlist.
1997-11-25 Paul Eggert <eggert@shade.twinsun.com>
1997-11-25 Paul Eggert <eggert@twinsun.com>
* strftime.c (strftime):
No longer any need to undef or declare if emacs is defined.