gconv: Correct Big5-HKSCS conversion to preserve all state bits. [BZ #25744]

This patch corrects the Big5-HKSCS converter to preserve the lowest 3 bits of
the mbstate_t __count data member when the converter encounters an incomplete
multibyte character.

This fixes BZ #25744.

Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
This commit is contained in:
Tom Honermann 2022-06-30 08:52:13 -04:00 committed by Adhemerval Zanella
parent 3c99806989
commit 598f790fb1
2 changed files with 73 additions and 8 deletions

View File

@ -17769,7 +17769,7 @@ static struct
the output state to the initial state. This has to be done during the the output state to the initial state. This has to be done during the
flushing. */ flushing. */
#define EMIT_SHIFT_TO_INIT \ #define EMIT_SHIFT_TO_INIT \
if (data->__statep->__count != 0) \ if ((data->__statep->__count >> 3) != 0) \
{ \ { \
if (FROM_DIRECTION) \ if (FROM_DIRECTION) \
{ \ { \
@ -17778,7 +17778,7 @@ static struct
/* Write out the last character. */ \ /* Write out the last character. */ \
*((uint32_t *) outbuf) = data->__statep->__count >> 3; \ *((uint32_t *) outbuf) = data->__statep->__count >> 3; \
outbuf += sizeof (uint32_t); \ outbuf += sizeof (uint32_t); \
data->__statep->__count = 0; \ data->__statep->__count &= 7; \
} \ } \
else \ else \
/* We don't have enough room in the output buffer. */ \ /* We don't have enough room in the output buffer. */ \
@ -17792,7 +17792,7 @@ static struct
uint32_t lasttwo = data->__statep->__count >> 3; \ uint32_t lasttwo = data->__statep->__count >> 3; \
*outbuf++ = (lasttwo >> 8) & 0xff; \ *outbuf++ = (lasttwo >> 8) & 0xff; \
*outbuf++ = lasttwo & 0xff; \ *outbuf++ = lasttwo & 0xff; \
data->__statep->__count = 0; \ data->__statep->__count &= 7; \
} \ } \
else \ else \
/* We don't have enough room in the output buffer. */ \ /* We don't have enough room in the output buffer. */ \
@ -17878,7 +17878,7 @@ static struct
\ \
/* Otherwise store only the first character now, and \ /* Otherwise store only the first character now, and \
put the second one into the queue. */ \ put the second one into the queue. */ \
*statep = ch2 << 3; \ *statep = (ch2 << 3) | (*statep & 7); \
/* Tell the caller why we terminate the loop. */ \ /* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \ result = __GCONV_FULL_OUTPUT; \
break; \ break; \
@ -17895,7 +17895,7 @@ static struct
} \ } \
else \ else \
/* Clear the queue and proceed to output the saved character. */ \ /* Clear the queue and proceed to output the saved character. */ \
*statep = 0; \ *statep &= 7; \
\ \
put32 (outptr, ch); \ put32 (outptr, ch); \
outptr += 4; \ outptr += 4; \
@ -17946,7 +17946,7 @@ static struct
} \ } \
*outptr++ = (ch >> 8) & 0xff; \ *outptr++ = (ch >> 8) & 0xff; \
*outptr++ = ch & 0xff; \ *outptr++ = ch & 0xff; \
*statep = 0; \ *statep &= 7; \
inptr += 4; \ inptr += 4; \
continue; \ continue; \
\ \
@ -17959,7 +17959,7 @@ static struct
} \ } \
*outptr++ = (lasttwo >> 8) & 0xff; \ *outptr++ = (lasttwo >> 8) & 0xff; \
*outptr++ = lasttwo & 0xff; \ *outptr++ = lasttwo & 0xff; \
*statep = 0; \ *statep &= 7; \
continue; \ continue; \
} \ } \
\ \
@ -17996,7 +17996,7 @@ static struct
/* Check for possible combining character. */ \ /* Check for possible combining character. */ \
if (__glibc_unlikely (ch == 0xca || ch == 0xea)) \ if (__glibc_unlikely (ch == 0xca || ch == 0xea)) \
{ \ { \
*statep = ((cp[0] << 8) | cp[1]) << 3; \ *statep = (((cp[0] << 8) | cp[1]) << 3) | (*statep & 7); \
inptr += 4; \ inptr += 4; \
continue; \ continue; \
} \ } \

View File

@ -128,6 +128,71 @@ check_conversion (struct testdata test)
printf ("error: Result of third conversion was wrong.\n"); printf ("error: Result of third conversion was wrong.\n");
err++; err++;
} }
/* Now perform the same test as above consuming one byte at a time. */
mbs = test.input;
memset (&st, 0, sizeof (st));
/* Consume the first byte; expect an incomplete multibyte character. */
ret = mbrtowc (&wc, mbs, 1, &st);
if (ret != -2)
{
printf ("error: First byte conversion returned %zd.\n", ret);
err++;
}
/* Advance past the first consumed byte. */
mbs += 1;
/* Consume the second byte; expect the first wchar_t. */
ret = mbrtowc (&wc, mbs, 1, &st);
if (ret != 1)
{
printf ("error: Second byte conversion returned %zd.\n", ret);
err++;
}
/* Advance past the second consumed byte. */
mbs += 1;
if (wc != test.expected[0])
{
printf ("error: Result of first wchar_t conversion was wrong.\n");
err++;
}
/* Consume no bytes; expect the second wchar_t. */
ret = mbrtowc (&wc, mbs, 1, &st);
if (ret != 0)
{
printf ("error: First attempt of third byte conversion returned %zd.\n", ret);
err++;
}
/* Do not advance past the third byte. */
mbs += 0;
if (wc != test.expected[1])
{
printf ("error: Result of second wchar_t conversion was wrong.\n");
err++;
}
/* After the second wchar_t conversion, the converter should be in
the initial state since the two input BIG5-HKSCS bytes have been
consumed and the two wchar_t's have been output. */
if (mbsinit (&st) == 0)
{
printf ("error: Converter not in initial state.\n");
err++;
}
/* Consume the third byte; expect the third wchar_t. */
ret = mbrtowc (&wc, mbs, 1, &st);
if (ret != 1)
{
printf ("error: Third byte conversion returned %zd.\n", ret);
err++;
}
/* Advance past the third consumed byte. */
mbs += 1;
if (wc != test.expected[2])
{
printf ("error: Result of third wchar_t conversion was wrong.\n");
err++;
}
/* Return 0 if we saw no errors. */ /* Return 0 if we saw no errors. */
return err; return err;
} }