* locales/zh_CN: Remove non-ASCII digit entry.
	* locales/ja_JP: Likewise.

2000-09-30  Bruno Haible  <haible@clisp.cons.org>

	* gen-unicode-ctype.c (is_digit, is_xdigit): Remove all non-ASCII
	digits.
	(is_alpha): Add them here.
	* locales/i18n (digit): Remove all non-ASCII digits.
	(alpha): Add them here.

	* dump-ctype.c: Pretty print.

2000-10-04  Ulrich Drepper  <drepper@redhat.com>
This commit is contained in:
Ulrich Drepper 2000-10-05 02:01:22 +00:00
parent 84ca314775
commit f00f95d188
6 changed files with 93 additions and 45 deletions

View file

@ -1,3 +1,18 @@
2000-10-04 Ulrich Drepper <drepper@redhat.com>
* locales/zh_CN: Remove non-ASCII digit entry.
* locales/ja_JP: Likewise.
2000-09-30 Bruno Haible <haible@clisp.cons.org>
* gen-unicode-ctype.c (is_digit, is_xdigit): Remove all non-ASCII
digits.
(is_alpha): Add them here.
* locales/i18n (digit): Remove all non-ASCII digits.
(alpha): Add them here.
* dump-ctype.c: Pretty print.
2000-10-04 Ulrich Drepper <drepper@redhat.com>
* charmaps/EUC-KR: Change \x5c mapping to U20a9.

View file

@ -113,7 +113,8 @@ static void dump_map (const char *map_name)
}
}
int main (int argc, char *argv[])
int
main (int argc, char *argv[])
{
size_t i;

View file

@ -166,21 +166,21 @@ fill_attributes (const char *unicodedata_filename)
int n;
lineno++;
n = getfield(stream, field0, ';');
n += getfield(stream, field1, ';');
n += getfield(stream, field2, ';');
n += getfield(stream, field3, ';');
n += getfield(stream, field4, ';');
n += getfield(stream, field5, ';');
n += getfield(stream, field6, ';');
n += getfield(stream, field7, ';');
n += getfield(stream, field8, ';');
n += getfield(stream, field9, ';');
n += getfield(stream, field10, ';');
n += getfield(stream, field11, ';');
n += getfield(stream, field12, ';');
n += getfield(stream, field13, ';');
n += getfield(stream, field14, '\n');
n = getfield (stream, field0, ';');
n += getfield (stream, field1, ';');
n += getfield (stream, field2, ';');
n += getfield (stream, field3, ';');
n += getfield (stream, field4, ';');
n += getfield (stream, field5, ';');
n += getfield (stream, field6, ';');
n += getfield (stream, field7, ';');
n += getfield (stream, field8, ';');
n += getfield (stream, field9, ';');
n += getfield (stream, field10, ';');
n += getfield (stream, field11, ';');
n += getfield (stream, field12, ';');
n += getfield (stream, field13, ';');
n += getfield (stream, field14, '\n');
if (n == 0)
break;
if (n != 15)
@ -196,21 +196,21 @@ fill_attributes (const char *unicodedata_filename)
{
/* Deal with a range. */
lineno++;
n = getfield(stream, field0, ';');
n += getfield(stream, field1, ';');
n += getfield(stream, field2, ';');
n += getfield(stream, field3, ';');
n += getfield(stream, field4, ';');
n += getfield(stream, field5, ';');
n += getfield(stream, field6, ';');
n += getfield(stream, field7, ';');
n += getfield(stream, field8, ';');
n += getfield(stream, field9, ';');
n += getfield(stream, field10, ';');
n += getfield(stream, field11, ';');
n += getfield(stream, field12, ';');
n += getfield(stream, field13, ';');
n += getfield(stream, field14, '\n');
n = getfield (stream, field0, ';');
n += getfield (stream, field1, ';');
n += getfield (stream, field2, ';');
n += getfield (stream, field3, ';');
n += getfield (stream, field4, ';');
n += getfield (stream, field5, ';');
n += getfield (stream, field6, ';');
n += getfield (stream, field7, ';');
n += getfield (stream, field8, ';');
n += getfield (stream, field9, ';');
n += getfield (stream, field10, ';');
n += getfield (stream, field11, ';');
n += getfield (stream, field12, ';');
n += getfield (stream, field13, ';');
n += getfield (stream, field14, '\n');
if (n != 15)
{
fprintf (stderr, "missing end range in '%s':%d\n",
@ -390,17 +390,35 @@ is_alpha (unsigned int ch)
|| (unicode_attributes[ch].category[0] == 'S'
&& unicode_attributes[ch].category[1] == 'o'
&& strstr (unicode_attributes[ch].name, " LETTER ")
!= NULL)));
!= NULL)
/* Consider all the non-ASCII digits as alphabetic.
ISO C 99 forbids us to have them in category "digit",
but we want iswalnum to return true on them. */
|| (unicode_attributes[ch].category[0] == 'N'
&& unicode_attributes[ch].category[1] == 'd'
&& !(ch >= 0x0030 && ch <= 0x0039))));
}
static bool
is_digit (unsigned int ch)
{
#if 0
return (unicode_attributes[ch].name != NULL
&& unicode_attributes[ch].category[0] == 'N'
&& unicode_attributes[ch].category[1] == 'd');
/* Note: U+0BE7..U+0BEF and U+1369..U+1371 are digit systems without
a zero. Must add <0> in front of them by hand. */
#else
/* SUSV2 gives us some freedom for the "digit" category, but ISO C 99
takes it away:
7.25.2.1.5:
The iswdigit function tests for any wide character that corresponds
to a decimal-digit character (as defined in 5.2.1).
5.2.1:
the 10 decimal digits 0 1 2 3 4 5 6 7 8 9
*/
return (ch >= 0x0030 && ch <= 0x0039);
#endif
}
static bool
@ -455,9 +473,23 @@ is_cntrl (unsigned int ch)
static bool
is_xdigit (unsigned int ch)
{
#if 0
return is_digit (ch)
|| (ch >= 0x0041 && ch <= 0x0046)
|| (ch >= 0x0061 && ch <= 0x0066);
#else
/* SUSV2 gives us some freedom for the "xdigit" category, but ISO C 99
takes it away:
7.25.2.1.12:
The iswxdigit function tests for any wide character that corresponds
to a hexadecimal-digit character (as defined in 6.4.4.1).
6.4.4.1:
hexadecimal-digit: one of 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
*/
return (ch >= 0x0030 && ch <= 0x0039)
|| (ch >= 0x0041 && ch <= 0x0046)
|| (ch >= 0x0061 && ch <= 0x0066);
#endif
}
static bool

View file

@ -305,13 +305,10 @@ alpha /
<UFE70>..<UFE72>;<UFE74>;<UFE76>..<UFEFC>;/
% HALFWIDTH AND FULLWIDTH FORMS/
<UFF21>..<UFF3A>;<UFF41>..<UFF5A>;<UFF66>..<UFFBE>;<UFFC2>..<UFFC7>;/
<UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>
% The "digit" class of the "i18n" FDCC-set is reflecting
% the recommendations in TR 10176 annex A
digit /
% TABLE 1 BASIC LATIN/
<U0030>..<U0039>;/
<UFFCA>..<UFFCF>;<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;/
% The non-ASCII number characters are included here because ISO C 99 /
% forbids us to classify them as digits; however, they behave more like /
% alphanumeric than like punctuation. /
% TABLE 15 and 16 ARABIC/
<U0660>..<U0669>;<U06F0>..<U06F9>;/
% TABLE 17 DEVANAGARI/
@ -349,6 +346,11 @@ digit /
% HALFWIDTH AND FULLWIDTH FORMS/
<UFF10>..<UFF19>
% The "digit" class must only contain the BASIC LATIN digits, says ISO C 99
% (sections 7.25.2.1.5 and 5.2.1).
digit /
<U0030>..<U0039>
outdigit <U0030>..<U0039>
space /
@ -602,6 +604,8 @@ print /
<UFFD2>..<UFFD7>;<UFFDA>..<UFFDC>;<UFFE0>..<UFFE6>;<UFFE8>..<UFFEE>;/
<UFFF9>..<UFFFD>
% The "xdigit" class must only contain the BASIC LATIN digits and A-F, a-f,
% says ISO C 99 (sections 7.25.2.1.12 and 6.4.4.1).
xdigit /
<U0030>..<U0039>;<U0041>..<U0046>;<U0061>..<U0066>

View file

@ -155,9 +155,7 @@ alpha <U0041>;<U0042>;<U0043>;<U0044>;<U0045>;<U0046>;<U0047>;<U0048>;/
<U0175>;<U00FD>;<U00FF>;<U0177>;<U017A>;<U017E>;<U017C>
digit <U0030>;<U0031>;<U0032>;<U0033>;<U0034>;/
<U0035>;<U0036>;<U0037>;<U0038>;<U0039>;/
<UFF10>;<UFF11>;<UFF12>;<UFF13>;<UFF14>;/
<UFF15>;<UFF16>;<UFF17>;<UFF18>;<UFF19>
<U0035>;<U0036>;<U0037>;<U0038>;<U0039>
xdigit <U0030>;<U0031>;<U0032>;<U0033>;<U0034>;/
<U0035>;<U0036>;<U0037>;<U0038>;<U0039>;/

View file

@ -90,9 +90,7 @@ alpha <U0041>..<U005A>;<U0061>..<U007A>;/
% Bopomofo/
<U3105>..<U3129>
digit <U0030>..<U0039>;/
% Fullwidth digits/
<UFF10>..<UFF19>
digit <U0030>..<U0039>
outdigit <U0030>..<U0039>