* stdio-common/vfscanf.c: Handle localized digits etc for floating
	point numbers.
	Patch mostly by Hamed Malek <hamed@farsiweb.info>.
This commit is contained in:
Ulrich Drepper 2007-02-19 04:40:58 +00:00
parent ac2ca0229a
commit ecb72bd5b6
5 changed files with 296 additions and 17 deletions

View file

@ -1,5 +1,10 @@
2007-02-18 Ulrich Drepper <drepper@redhat.com>
[BZ #2211]
* stdio-common/vfscanf.c: Handle localized digits etc for floating
point numbers.
Patch mostly by Hamed Malek <hamed@farsiweb.info>.
* stdio-common/vfscanf.c: Fix problems in width accounting.
* stdio-common/tst-sscanf.c (double_tests): New tests.
(main): Hook them up.

View file

@ -1,5 +1,9 @@
2007-02-18 Ulrich Drepper <drepper@redhat.com>
* Makefile (tests): Add tst-sscanf.
(LOCALES): Add fa_IR.UTF-8.
* tst-sscanf.c: New file.
* da_DK.in: Adjust for unified collation.
* locales/vi_VN: Don't define HOK here as well.

View file

@ -1,4 +1,4 @@
# Copyright (C) 1996-2002, 2003, 2005 Free Software Foundation, Inc.
# Copyright (C) 1996-2002, 2003, 2005, 2007 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@ -93,7 +93,7 @@ locale_test_suite := tst_iswalnum tst_iswalpha tst_iswcntrl \
tests = $(locale_test_suite) tst-digits tst-setlocale bug-iconv-trans \
tst-leaks tst-mbswcs6 tst-xlocale1 tst-xlocale2 bug-usesetlocale \
tst-strfmon1
tst-strfmon1 tst-sscanf
ifeq (yes,$(build-shared))
ifneq (no,$(PERL))
tests: $(objpfx)mtrace-tst-leaks
@ -133,7 +133,7 @@ LOCALES := de_DE.ISO-8859-1 de_DE.UTF-8 en_US.ANSI_X3.4-1968 \
en_US.ISO-8859-1 ja_JP.EUC-JP da_DK.ISO-8859-1 \
hr_HR.ISO-8859-2 sv_SE.ISO-8859-1 ja_JP.SJIS fr_FR.ISO-8859-1 \
vi_VN.TCVN5712-1 nb_NO.ISO-8859-1 nn_NO.ISO-8859-1 \
tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW
tr_TR.UTF-8 cs_CZ.UTF-8 zh_TW.EUC-TW fa_IR.UTF-8
LOCALE_SRCS := $(shell echo "$(LOCALES)"|sed 's/\([^ .]*\)[^ ]*/\1/g')
CHARMAPS := $(shell echo "$(LOCALES)" | \
sed -e 's/[^ .]*[.]\([^ ]*\)/\1/g' -e s/SJIS/SHIFT_JIS/g)

56
localedata/tst-sscanf.c Normal file
View file

@ -0,0 +1,56 @@
#include <stdio.h>
#include <locale.h>
#include <assert.h>
#define P0 "\xDB\xB0"
#define P1 "\xDB\xB1"
#define P2 "\xDB\xB2"
#define P3 "\xDB\xB3"
#define P4 "\xDB\xB4"
#define P5 "\xDB\xB5"
#define P6 "\xDB\xB6"
#define P7 "\xDB\xB7"
#define P8 "\xDB\xB8"
#define P9 "\xDB\xB9"
#define PD "\xd9\xab"
#define PT "\xd9\xac"
static int
check_sscanf (const char *s, const char *format, const float n)
{
float f;
if (sscanf (s, format, &f) != 1)
{
printf ("nothing found for \"%s\"\n", s);
return 1;
}
if (f != n)
{
printf ("got %f expected %f from \"%s\"\n", f, n, s);
return 1;
}
return 0;
}
static int
do_test (void)
{
if (setlocale (LC_ALL, "fa_IR") == NULL)
{
puts ("cannot set fa_IR locale");
return 1;
}
int r = check_sscanf (P3 PD P1 P4, "%I8f", 3.14);
r |= check_sscanf (P3 PT P1 P4 P5, "%I'f", 3145);
r |= check_sscanf (P3 PD P1 P4 P1 P5 P9, "%If", 3.14159);
r |= check_sscanf ("-" P3 PD P1 P4 P1 P5, "%If", -3.1415);
r |= check_sscanf ("+" PD P1 P4 P1 P5, "%If", +.1415);
r |= check_sscanf (P3 PD P1 P4 P1 P5 "e+" P2, "%Ie", 3.1415e+2);
return r;
}
#define TEST_FUNCTION do_test ()
#include "../test-skeleton.c"

View file

@ -1,5 +1,4 @@
/* Copyright (C) 1991-2002, 2003, 2004, 2005, 2006, 2007
Free Software Foundation, Inc.
/* Copyright (C) 1991-2006, 2007 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@ -1264,13 +1263,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
cmpp = mbdigits[n];
while ((unsigned char) *cmpp == c && avail > 0)
while ((unsigned char) *cmpp == c && avail >= 0)
{
if (*++cmpp == '\0')
break;
else
{
if ((c = inchar ()) == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1317,13 +1316,13 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
int avail = width > 0 ? width : INT_MAX;
cmpp = mbdigits[n];
while ((unsigned char) *cmpp == c && avail > 0)
while ((unsigned char) *cmpp == c && avail >= 0)
{
if (*++cmpp == '\0')
break;
else
{
if ((c = inchar ()) == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1378,14 +1377,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail > 0)
while ((unsigned char) *cmpp == c && avail >= 0)
{
ADDW (c);
if (*++cmpp == '\0')
break;
else
{
if ((c = inchar ()) == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1450,14 +1449,14 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
const char *cmpp = thousands;
int avail = width > 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail > 0)
while ((unsigned char) *cmpp == c && avail >= 0)
{
ADDW (c);
if (*++cmpp == '\0')
break;
else
{
if ((c = inchar ()) == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1753,12 +1752,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
if (! got_dot)
{
while ((unsigned char) *cmpp == c && avail > 0)
while ((unsigned char) *cmpp == c && avail >= 0)
if (*++cmpp == '\0')
break;
else
{
if (inchar () == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1790,12 +1789,12 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
++cmp2p;
if (cmp2p - thousands == cmpp - decimal)
{
while ((unsigned char) *cmp2p == c && avail > 0)
while ((unsigned char) *cmp2p == c && avail >= 0)
if (*++cmp2p == '\0')
break;
else
{
if (inchar () == EOF)
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
@ -1828,6 +1827,221 @@ _IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
--width;
}
wctrans_t map;
if (__builtin_expect ((flags & I18N) != 0, 0)
/* Hexadecimal floats make no sense, fixing localized
digits with ASCII letters. */
&& !is_hexa
/* Minimum requirement. */
&& (wpsize == 0 || got_dot)
&& (map = __wctrans ("to_inpunct")) != NULL)
{
/* Reget the first character. */
inchar ();
/* Localized digits, decimal points, and thousands
separator. */
wint_t wcdigits[12];
/* First get decimal equivalent to check if we read it
or not. */
wcdigits[11] = __towctrans (L'.', map);
/* If we have not read any character or have just read
locale decimal point which matches the decimal point
for localized FP numbers, then we may have localized
digits. Note, we test GOT_DOT above. */
#ifdef COMPILE_WSCANF
if (wpsize == 0 || (wpsize == 1 && wcdigits[11] == decimal))
#else
char mbdigits[12][MB_LEN_MAX + 1];
mbstate_t state;
memset (&state, '\0', sizeof (state));
bool match_so_far = wpsize == 0;
size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
if (mblen != (size_t) -1)
{
mbdigits[11][mblen] = '\0';
match_so_far |= (wpsize == strlen (decimal)
&& strcmp (decimal, mbdigits[11]) == 0);
}
else
{
size_t decimal_len = strlen (decimal);
/* This should always be the case but the data comes
from a file. */
if (decimal_len <= MB_LEN_MAX)
{
match_so_far |= wpsize == decimal_len;
memcpy (mbdigits[11], decimal, decimal_len + 1);
}
else
match_so_far = false;
}
if (match_so_far)
#endif
{
int have_locthousands = true;
/* Now get the digits and the thousands-sep equivalents. */
for (int n = 0; n < 11; ++n)
{
if (n < 10)
wcdigits[n] = __towctrans (L'0' + n, map);
else if (n == 10)
wcdigits[10] = __towctrans (L',', map);
#ifndef COMPILE_WSCANF
memset (&state, '\0', sizeof (state));
size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
&state);
if (mblen == (size_t) -1)
{
if (n == 10)
{
if (thousands == NULL || (flags & GROUP) == 0)
have_locthousands = false;
else
{
size_t thousands_len = strlen (thousands);
if (thousands_len <= MB_LEN_MAX)
memcpy (mbdigits[10], thousands,
thousands_len + 1);
else
have_locthousands = false;
}
}
else
/* Ignore checking against localized digits. */
goto no_i18nflt;
}
else
mbdigits[n][mblen] = '\0';
#endif
}
/* Start checking against localized digits, if
convertion is done correctly. */
while (1)
{
if (got_e && wp[wpsize - 1] == exp_char
&& (c == L_('-') || c == L_('+')))
ADDW (c);
else if (wpsize > 0 && !got_e
&& (CHAR_T) TOLOWER (c) == exp_char)
{
ADDW (exp_char);
got_e = got_dot = 1;
}
else
{
/* Check against localized digits, decimal point,
and thousands separator. */
int n;
for (n = 0; n < 12; ++n)
{
#ifdef COMPILE_WSCANF
if (c == wcdigits[n])
{
if (n < 10)
ADDW (L_('0') + n);
else if (n == 11 && !got_dot)
{
ADDW (decimal);
got_dot = 1;
}
else if (n == 10 && have_locthousands
&& ! got_dot)
ADDW (thousands);
else
/* The last read character is not part
of the number anymore. */
n = 12;
break;
}
#else
const char *cmpp = mbdigits[n];
int avail = width > 0 ? width : INT_MAX;
while ((unsigned char) *cmpp == c && avail >= 0)
if (*++cmpp == '\0')
break;
else
{
if (avail == 0 || inchar () == EOF)
break;
--avail;
}
if (*cmpp == '\0')
{
if (width > 0)
width = avail;
if (n < 10)
ADDW (L_('0') + n);
else if (n == 11 && !got_dot)
{
/* Add all the characters. */
for (cmpp = decimal; *cmpp != '\0';
++cmpp)
ADDW ((unsigned char) *cmpp);
got_dot = 1;
}
else if (n == 10 && (flags & GROUP) != 0
&& thousands != NULL && ! got_dot)
{
/* Add all the characters. */
for (cmpp = thousands; *cmpp != '\0';
++cmpp)
ADDW ((unsigned char) *cmpp);
}
else
/* The last read character is not part
of the number anymore. */
n = 12;
break;
}
/* We are pushing all read characters back. */
if (cmpp > mbdigits[n])
{
ungetc (c, s);
while (--cmpp > mbdigits[n])
ungetc_not_eof ((unsigned char) *cmpp, s);
c = (unsigned char) *cmpp;
}
#endif
}
if (n >= 12)
{
/* The last read character is not part
of the number anymore. */
ungetc (c, s);
break;
}
}
if (width == 0 || inchar () == EOF)
break;
if (width > 0)
--width;
}
}
#ifndef COMPILE_WSCANF
no_i18nflt:
;
#endif
}
/* Have we read any character? If we try to read a number
in hexadecimal notation and we have read only the `0x'
prefix or no exponent this is an error. */