* iconv/iconv_prog.c (main): Provide an error message that identifies
	the wrong encoding.

2002-09-22  Bruno Haible  <bruno@clisp.org>

	* iconvdata/tscii.c: New file.
	* iconvdata/testdata/TSCII: New file.
	* iconvdata/testdata/TSCII..UTF8: New file.
	* iconvdata/TSCII.precomposed: New file.
	* iconvdata/TSCII.irreversible: New file.
	* iconvdata/gconv-modules (TSCII): New module.
	* iconvdata/Makefile (modules): Add TSCII.
	(distribute): Add tscii.c.
	* iconvdata/tst-table-from.c (try, utf8_decode, main): Double output
	buffer size.
	* iconvdata/tst-tables.sh: Add TSCII.
	* iconvdata/TESTS: Add TSCII.

2002-09-22  Bruno Haible  <bruno@clisp.org>

	Revert 2002-04-18 patch.
	* iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
	FROM_DIRECTION): Make the FROM direction stateful again.
	* iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
	FROM_DIRECTION): Likewise.

2002-09-22  Bruno Haible  <bruno@clisp.org>
This commit is contained in:
Ulrich Drepper 2002-09-24 04:19:03 +00:00
parent f2a444335f
commit fa00744e51
16 changed files with 1659 additions and 181 deletions

View File

@ -1,3 +1,31 @@
2002-09-22 Bruno Haible <bruno@clisp.org>
* iconv/iconv_prog.c (main): Provide an error message that identifies
the wrong encoding.
2002-09-22 Bruno Haible <bruno@clisp.org>
* iconvdata/tscii.c: New file.
* iconvdata/testdata/TSCII: New file.
* iconvdata/testdata/TSCII..UTF8: New file.
* iconvdata/TSCII.precomposed: New file.
* iconvdata/TSCII.irreversible: New file.
* iconvdata/gconv-modules (TSCII): New module.
* iconvdata/Makefile (modules): Add TSCII.
(distribute): Add tscii.c.
* iconvdata/tst-table-from.c (try, utf8_decode, main): Double output
buffer size.
* iconvdata/tst-tables.sh: Add TSCII.
* iconvdata/TESTS: Add TSCII.
2002-09-22 Bruno Haible <bruno@clisp.org>
Revert 2002-04-18 patch.
* iconvdata/euc-jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
FROM_DIRECTION): Make the FROM direction stateful again.
* iconvdata/shift_jisx0213.c (EMIT_SHIFT_TO_INIT, BODY for
FROM_DIRECTION): Likewise.
2002-09-22 Bruno Haible <bruno@clisp.org>
* iconvdata/tst-e2big.c: New file.

View File

@ -28,6 +28,7 @@
#include <langinfo.h>
#include <locale.h>
#include <search.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@ -216,10 +217,47 @@ main (int argc, char *argv[])
if (cd == (iconv_t) -1)
{
if (errno == EINVAL)
error (EXIT_FAILURE, 0,
_("conversion from `%s' to `%s' not supported"),
from_code[0] ? from_code : nl_langinfo (CODESET),
orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET));
{
/* Try to be nice with the user and tell her which of the
two encoding names is wrong. This is possible because
all supported encodings can be converted from/to Unicode,
in other words, because the graph of encodings is
connected. */
bool from_wrong =
(iconv_open ("UTF-8", from_code) == (iconv_t) -1
&& errno == EINVAL);
bool to_wrong =
(iconv_open (to_code, "UTF-8") == (iconv_t) -1
&& errno == EINVAL);
const char *from_pretty =
(from_code[0] ? from_code : nl_langinfo (CODESET));
const char *to_pretty =
(orig_to_code[0] ? orig_to_code : nl_langinfo (CODESET));
if (from_wrong)
{
if (to_wrong)
error (EXIT_FAILURE, 0,
_("\
conversions from `%s' and to `%s' are not supported"),
from_pretty, to_pretty);
else
error (EXIT_FAILURE, 0,
_("conversion from `%s' is not supported"),
from_pretty);
}
else
{
if (to_wrong)
error (EXIT_FAILURE, 0,
_("conversion to `%s' is not supported"),
to_pretty);
else
error (EXIT_FAILURE, 0,
_("conversion from `%s' to `%s' is not supported"),
from_pretty, to_pretty);
}
}
else
error (EXIT_FAILURE, errno,
_("failed to start conversion processing"));

View File

@ -51,7 +51,7 @@ modules := ISO8859-1 ISO8859-2 ISO8859-3 ISO8859-4 ISO8859-5 \
GB18030 ISO-2022-CN-EXT VISCII GBBIG5 CP10007 KOI8-T \
GEORGIAN-PS GEORGIAN-ACADEMY ISO-IR-209 MAC-SAMI ARMSCII-8 \
TCVN5712-1 libJISX0213 EUC-JISX0213 SHIFT_JISX0213 \
ISO-2022-JP-3
ISO-2022-JP-3 TSCII
modules.so := $(addsuffix .so, $(modules))
@ -166,7 +166,7 @@ distribute := gconv-modules extra-module.mk gap.awk gaptab.awk \
mac-sami.c ibm1160.c ibm1160.h ibm1161.c ibm1161.h \
ibm1163.c ibm1163.h ibm1164.c ibm1164.h jisx0213.c jisx0213.h \
euc-jisx0213.c shift_jisx0213.c iso-2022-jp-3.c \
tcvn5712-1.c armscii-8.c
tcvn5712-1.c armscii-8.c tscii.c
# We build the transformation modules only when we build shared libs.
ifeq (yes,$(build-shared))

View File

@ -118,3 +118,4 @@ TCVN-5712 TCVN-5712 Y UTF8
EUC-JISX0213 EUC-JISX0213 Y UTF8
SHIFT_JISX0213 SHIFT_JISX0213 Y UTF8
ISO-2022-JP-3 ISO-2022-JP-3 N UTF8
TSCII TSCII Y UTF8

View File

@ -0,0 +1 @@
0xAD 0x0B87

View File

@ -0,0 +1,66 @@
0x82 0x0BB8 0x0BCD 0x0BB0 0x0BC0
0x87 0x0B95 0x0BCD 0x0BB7
0x88 0x0B9C 0x0BCD
0x89 0x0BB7 0x0BCD
0x8A 0x0BB8 0x0BCD
0x8B 0x0BB9 0x0BCD
0x8C 0x0B95 0x0BCD 0x0BB7 0x0BCD
0x99 0x0B99 0x0BC1
0x9A 0x0B9E 0x0BC1
0x9B 0x0B99 0x0BC2
0x9C 0x0B9E 0x0BC2
0xA6A1 0x0BCA
0xA7A1 0x0BCB
0xA7AA 0x0BCC
0xCA 0x0B9F 0x0BBF
0xCB 0x0B9F 0x0BC0
0xCC 0x0B95 0x0BC1
0xCD 0x0B9A 0x0BC1
0xCE 0x0B9F 0x0BC1
0xCF 0x0BA3 0x0BC1
0xD0 0x0BA4 0x0BC1
0xD1 0x0BA8 0x0BC1
0xD2 0x0BAA 0x0BC1
0xD3 0x0BAE 0x0BC1
0xD4 0x0BAF 0x0BC1
0xD5 0x0BB0 0x0BC1
0xD6 0x0BB2 0x0BC1
0xD7 0x0BB5 0x0BC1
0xD8 0x0BB4 0x0BC1
0xD9 0x0BB3 0x0BC1
0xDA 0x0BB1 0x0BC1
0xDB 0x0BA9 0x0BC1
0xDC 0x0B95 0x0BC2
0xDD 0x0B9A 0x0BC2
0xDE 0x0B9F 0x0BC2
0xDF 0x0BA3 0x0BC2
0xE0 0x0BA4 0x0BC2
0xE1 0x0BA8 0x0BC2
0xE2 0x0BAA 0x0BC2
0xE3 0x0BAE 0x0BC2
0xE4 0x0BAF 0x0BC2
0xE5 0x0BB0 0x0BC2
0xE6 0x0BB2 0x0BC2
0xE7 0x0BB5 0x0BC2
0xE8 0x0BB4 0x0BC2
0xE9 0x0BB3 0x0BC2
0xEA 0x0BB1 0x0BC2
0xEB 0x0BA9 0x0BC2
0xEC 0x0B95 0x0BCD
0xED 0x0B99 0x0BCD
0xEE 0x0B9A 0x0BCD
0xEF 0x0B9E 0x0BCD
0xF0 0x0B9F 0x0BCD
0xF1 0x0BA3 0x0BCD
0xF2 0x0BA4 0x0BCD
0xF3 0x0BA8 0x0BCD
0xF4 0x0BAA 0x0BCD
0xF5 0x0BAE 0x0BCD
0xF6 0x0BAF 0x0BCD
0xF7 0x0BB0 0x0BCD
0xF8 0x0BB2 0x0BCD
0xF9 0x0BB5 0x0BCD
0xFA 0x0BB4 0x0BCD
0xFB 0x0BB3 0x0BCD
0xFC 0x0BB1 0x0BCD
0xFD 0x0BA9 0x0BCD

View File

@ -67,7 +67,9 @@
*statep = saved_state
/* During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
/* During EUC-JISX0213 to UCS-4 conversion, the COUNT element of the state
contains the last UCS-4 character, shifted by 3 bits.
During UCS-4 to EUC-JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
@ -77,8 +79,17 @@
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
/* We don't use shift states in the FROM_DIRECTION. */ \
data->__statep->__count = 0; \
{ \
if (__builtin_expect (outbuf + 4 <= outend, 1)) \
{ \
/* Write out the last character. */ \
*((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \
data->__statep->__count = 0; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \
else \
{ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
@ -104,104 +115,114 @@
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint32_t ch = *inptr; \
uint32_t ch; \
\
if (ch < 0x80) \
/* Plain ASCII character. */ \
++inptr; \
else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
/* Determine whether there is a buffered character pending. */ \
ch = *statep >> 3; \
if (__builtin_expect (ch == 0, 1)) \
{ \
/* Two or three byte character. */ \
uint32_t ch2; \
/* No - so look at the next input byte. */ \
ch = *inptr; \
\
if (__builtin_expect (inptr + 1 >= inend, 0)) \
if (ch < 0x80) \
/* Plain ASCII character. */ \
++inptr; \
else if ((ch >= 0xa1 && ch <= 0xfe) || ch == 0x8e || ch == 0x8f) \
{ \
/* The second byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
/* Two or three byte character. */ \
uint32_t ch2; \
\
ch2 = inptr[1]; \
\
/* The second byte must be >= 0xa1 and <= 0xfe. */ \
if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
if (ch == 0x8e) \
{ \
/* Half-width katakana. */ \
if (__builtin_expect (ch2 > 0xdf, 0)) \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
ch = ch2 + 0xfec0; \
inptr += 2; \
} \
else \
{ \
const unsigned char *endp; \
\
if (ch == 0x8f) \
if (__builtin_expect (inptr + 1 >= inend, 0)) \
{ \
/* JISX 0213 plane 2. */ \
uint32_t ch3; \
/* The second byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
if (__builtin_expect (inptr + 2 >= inend, 0)) \
{ \
/* The third byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
ch2 = inptr[1]; \
\
ch3 = inptr[2]; \
endp = inptr + 3; \
/* The second byte must be >= 0xa1 and <= 0xfe. */ \
if (__builtin_expect (ch2 < 0xa1 || ch2 > 0xfe, 0)) \
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
if (ch == 0x8e) \
{ \
/* Half-width katakana. */ \
if (__builtin_expect (ch2 > 0xdf, 0)) \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
ch = ch2 + 0xfec0; \
inptr += 2; \
} \
else \
{ \
/* JISX 0213 plane 1. */ \
endp = inptr + 2; \
const unsigned char *endp; \
\
ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
} \
\
if (ch == 0) \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
if (ch < 0x80) \
{ \
/* It's a combining character. */ \
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
/* See whether we have room for two characters. */ \
if (outptr + 8 <= outend) \
if (ch == 0x8f) \
{ \
inptr = endp; \
put32 (outptr, u1); \
outptr += 4; \
put32 (outptr, u2); \
outptr += 4; \
continue; \
/* JISX 0213 plane 2. */ \
uint32_t ch3; \
\
if (__builtin_expect (inptr + 2 >= inend, 0)) \
{ \
/* The third byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
ch3 = inptr[2]; \
endp = inptr + 3; \
\
ch = jisx0213_to_ucs4 (0x200 - 0x80 + ch2, ch3 ^ 0x80); \
} \
else \
{ \
/* JISX 0213 plane 1. */ \
endp = inptr + 2; \
\
ch = jisx0213_to_ucs4 (0x100 - 0x80 + ch, ch2 ^ 0x80); \
} \
\
if (ch == 0) \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
\
inptr = endp; \
\
if (ch < 0x80) \
{ \
/* It's a combining character. */ \
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
put32 (outptr, u1); \
outptr += 4; \
\
/* See whether we have room for two characters. */ \
if (outptr + 4 <= outend) \
{ \
put32 (outptr, u2); \
outptr += 4; \
continue; \
} \
\
/* Otherwise store only the first character now, and \
put the second one into the queue. */ \
*statep = u2 << 3; \
/* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
\
inptr = endp; \
} \
} \
else \
{ \
/* This is illegal. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
else \
{ \
/* This is illegal. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
} \
\
put32 (outptr, ch); \

View File

@ -1523,3 +1523,7 @@ module INTERNAL EUC-JISX0213// EUC-JISX0213 1
# from to module cost
module Shift_JISX0213// INTERNAL SHIFT_JISX0213 1
module INTERNAL Shift_JISX0213// SHIFT_JISX0213 1
# from to module cost
module TSCII// INTERNAL TSCII 1
module INTERNAL TSCII// TSCII 1

View File

@ -67,7 +67,9 @@
*statep = saved_state
/* During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
/* During Shift_JISX0213 to UCS-4 conversion, the COUNT element of the state
contains the last UCS-4 character, shifted by 3 bits.
During UCS-4 to Shift_JISX0213 conversion, the COUNT element of the state
contains the last two bytes to be output, shifted by 3 bits. */
/* Since this is a stateful encoding we have to provide code which resets
@ -77,8 +79,17 @@
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
/* We don't use shift states in the FROM_DIRECTION. */ \
data->__statep->__count = 0; \
{ \
if (__builtin_expect (outbuf + 4 <= outend, 1)) \
{ \
/* Write out the last character. */ \
*((uint32_t *) outbuf)++ = data->__statep->__count >> 3; \
data->__statep->__count = 0; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \
else \
{ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
@ -104,106 +115,116 @@
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint32_t ch = *inptr; \
uint32_t ch; \
\
if (ch < 0x80) \
/* Determine whether there is a buffered character pending. */ \
ch = *statep >> 3; \
if (__builtin_expect (ch == 0, 1)) \
{ \
/* Plain ISO646-JP character. */ \
if (__builtin_expect (ch == 0x5c, 0)) \
ch = 0xa5; \
else if (__builtin_expect (ch == 0x7e, 0)) \
ch = 0x203e; \
++inptr; \
} \
else if (ch >= 0xa1 && ch <= 0xdf) \
{ \
/* Half-width katakana. */ \
ch += 0xfec0; \
++inptr; \
} \
else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \
{ \
/* Two byte character. */ \
uint32_t ch2; \
\
if (__builtin_expect (inptr + 1 >= inend, 0)) \
{ \
/* The second byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
ch2 = inptr[1]; \
\
/* The second byte must be in the range 0x{40..7E,80..FC}. */ \
if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0)) \
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
/* Convert to row and column. */ \
if (ch < 0xe0) \
ch -= 0x81; \
else \
ch -= 0xc1; \
if (ch2 < 0x80) \
ch2 -= 0x40; \
else \
ch2 -= 0x41; \
/* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \
ch = 2 * ch; \
if (ch2 >= 0x5e) \
ch2 -= 0x5e, ch++; \
ch2 += 0x21; \
if (ch >= 0x5e) \
{ \
/* Handling of JISX 0213 plane 2 rows. */ \
if (ch >= 0x67) \
ch += 230; \
else if (ch >= 0x63 || ch == 0x5f) \
ch += 168; \
else \
ch += 162; \
} \
\
ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \
\
if (ch == 0) \
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
/* No - so look at the next input byte. */ \
ch = *inptr; \
\
if (ch < 0x80) \
{ \
/* It's a combining character. */ \
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
/* Plain ISO646-JP character. */ \
if (__builtin_expect (ch == 0x5c, 0)) \
ch = 0xa5; \
else if (__builtin_expect (ch == 0x7e, 0)) \
ch = 0x203e; \
++inptr; \
} \
else if (ch >= 0xa1 && ch <= 0xdf) \
{ \
/* Half-width katakana. */ \
ch += 0xfec0; \
++inptr; \
} \
else if ((ch >= 0x81 && ch <= 0x9f) || (ch >= 0xe0 && ch <= 0xfc)) \
{ \
/* Two byte character. */ \
uint32_t ch2; \
\
/* See whether we have room for two characters. */ \
if (outptr + 8 <= outend) \
if (__builtin_expect (inptr + 1 >= inend, 0)) \
{ \
inptr += 2; \
/* The second byte is not available. */ \
result = __GCONV_INCOMPLETE_INPUT; \
break; \
} \
\
ch2 = inptr[1]; \
\
/* The second byte must be in the range 0x{40..7E,80..FC}. */ \
if (__builtin_expect (ch2 < 0x40 || ch2 == 0x7f || ch2 > 0xfc, 0))\
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
/* Convert to row and column. */ \
if (ch < 0xe0) \
ch -= 0x81; \
else \
ch -= 0xc1; \
if (ch2 < 0x80) \
ch2 -= 0x40; \
else \
ch2 -= 0x41; \
/* Now 0 <= ch <= 0x3b, 0 <= ch2 <= 0xbb. */ \
ch = 2 * ch; \
if (ch2 >= 0x5e) \
ch2 -= 0x5e, ch++; \
ch2 += 0x21; \
if (ch >= 0x5e) \
{ \
/* Handling of JISX 0213 plane 2 rows. */ \
if (ch >= 0x67) \
ch += 230; \
else if (ch >= 0x63 || ch == 0x5f) \
ch += 168; \
else \
ch += 162; \
} \
\
ch = jisx0213_to_ucs4 (0x121 + ch, ch2); \
\
if (ch == 0) \
{ \
/* This is an illegal character. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
\
inptr += 2; \
\
if (ch < 0x80) \
{ \
/* It's a combining character. */ \
uint32_t u1 = __jisx0213_to_ucs_combining[ch - 1][0]; \
uint32_t u2 = __jisx0213_to_ucs_combining[ch - 1][1]; \
\
put32 (outptr, u1); \
outptr += 4; \
put32 (outptr, u2); \
outptr += 4; \
continue; \
} \
else \
{ \
\
/* See whether we have room for two characters. */ \
if (outptr + 4 <= outend) \
{ \
put32 (outptr, u2); \
outptr += 4; \
continue; \
} \
\
/* Otherwise store only the first character now, and \
put the second one into the queue. */ \
*statep = u2 << 3; \
/* Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
\
inptr += 2; \
} \
else \
{ \
/* This is illegal. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
else \
{ \
/* This is illegal. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
} \
\
put32 (outptr, ch); \

27
iconvdata/testdata/TSCII vendored Normal file
View File

@ -0,0 +1,27 @@
╚ ╛ Ч ╝ ╞ ╟ ╠ ╡ ╢ ╣ ╤ ╥
╦ ╦║ ╦╒ ╦ё л э ╕╦ ╖╦ ╗╦ ╕╦║ ╖╦║ ╕╦╙
╧ ╧║ ╧╒ ╧ё ≥ ⌡ ╕╧ ╖╧ ╗╧ ╕╧║ ╕╧║ ╕╧╙
╨ ╨║ ╨╒ ╨ё м щ ╕╨ ╖╨ ╗╨ ╕╨║ ╖╨║ ╕╨╙
╩ ╩║ ╩╒ ╩ё   ° ╕╩ ╖╩ ╗╩ ╕╩║ ╖╩║ ╕╩╙
╪ ╪║ й к н ч ╕╪ ╖╪ ╗╪ ╕╪║ ╖╪║ ╕╪╙
╫ ╫║ ╫╒ ╫ё о ъ ╕╫ ╖╫ ╗╫ ╕╫║ ╖╫║ ╕╫╙
╬ ╬║ ╬╒ ╬ё п Ю ╕╬ ╖╬ ╗╬ ╕╬║ ╖╬║ ╕╬╙
© ©║ ©╒ ©ё я А ╕© ╖© ╗© ╕©║ ╖©║ ╕©╙
ю ю║ ю╒ юё р Б ╕ю ╖ю ╗ю ╕ю║ ╖ю║ ╕ю╙
а а║ а╒ аё с Ц ╕а ╖а ╗а ╕а║ ╖а║ ╕а╙
б б║ б╒ бё т Д ╕б ╖б ╗б ╕б║ ╖б║ ╕б╙
ц ц║ ц╒ цё у Е ╕ц ╖ц ╗ц ╕ц║ ╖ц║ ╕ц╙
д д║ д╒ дё ж Ф ╕д ╖д ╗д ╕д║ ╖д║ ╕д╙
е е║ е╒ её в Г ╕е ╖е ╗е ╕е║ ╖е║ ╕е╙
ф ф║ ф╒ фё ь Х ╕ф ╖ф ╗ф ╕ф║ ╖ф║ ╕ф╙
х х║ х╒ хё з Й ╕х ╖х ╗х ╕х║ ╖х║ ╕х╙
и и║ и╒ иё ш К ╕и ╖и ╗и ╕и║ ╖и║ ╕и╙
┐ ┐║ ┐╒ ┐ё ┐╓ ┐╔ ╕┐ ╖┐ ╗┐ ╕┐║ ╖┐║ ╕┐╙
└ └║ └╒ └ё └╓ └╔ ╕└ ╖└ ╗└ ╕└║ ╖└║ ╕└╙
┘ ┘║ ┘╒ ┘ё ┼╓ ┼╔ ╕┘ ╖┘ ╗┘ ╕┘║ ╖┘║ ╕┘╙
├ ├║ ├╒ ├ё ▀╓ ▀╔ ╕├ ╖├ ╗├ ╕├║ ╖├║ ╕├╙
┤ ┤║ ┤╒ ┤ё ┤╔ ┤╔ ╕┤ ╖┤ ╗┤ ╕┤║ ╖┤║ ╕┤╙

27
iconvdata/testdata/TSCII..UTF8 vendored Normal file
View File

@ -0,0 +1,27 @@
அ ஆ இ ஈ உ ஊ எ ஏ ஒ ஓ ஔ ஃ
க கா கி கீ கு கூ கெ கே கை கொ கோ கௌ
ங ஙா ஙி ஙீ ஙு ஙூ ஙெ ஙே ஙை ஙொ ஙொ ஙௌ
ச சா சி சீ சு சூ செ சே சை சொ சோ சௌ
ஞ ஞா ஞி ஞீ ஞு ஞூ ஞெ ஞே ஞை ஞொ ஞோ ஞௌ
ட டா டி டீ டு டூ டெ டே டை டொ டோ டௌ
ண ணா ணி ணீ ணு ணூ ணெ ணே ணை ணொ ணோ ணௌ
த தா தி தீ து தூ தெ தே தை தொ தோ தௌ
ந நா நி நீ நு நூ நெ நே நை நொ நோ நௌ
ப பா பி பீ பு பூ பெ பே பை பொ போ பௌ
ம மா மி மீ மு மூ மெ மே மை மொ மோ மௌ
ய யா யி யீ யு யூ யெ யே யை யொ யோ யௌ
ர ரா ரி ரீ ரு ரூ ரெ ரே ரை ரொ ரோ ரௌ
ல லா லி லீ லு லூ லெ லே லை லொ லோ லௌ
வ வா வி வீ வு வூ வெ வே வை வொ வோ வௌ
ழ ழா ழி ழீ ழு ழூ ழெ ழே ழை ழொ ழோ ழௌ
ற றா றி றீ று றூ றெ றே றை றொ றோ றௌ
ன னா னி னீ னு னூ னெ னே னை னொ னோ னௌ
ஸ்ரீ
ஜ ஜா ஜி ஜீ ஜு ஜூ ெஜ ேஜ ைஜ ெஜா ேஜா ெஜௗ
ஷ ஷா ஷி ஷீ ஷு ஷூ ெஷ ேஷ ைஷ ெஷா ேஷா ெஷௗ
ஸ ஸா ஸி ஸீ ஸு ஸூ ெஸ ேஸ ைஸ ெஸா ேஸா ெஸௗ
ஹ ஹா ஹி ஹீ ஹு ஹூ ெஹ ேஹ ைஹ ெஹா ேஹா ெஹௗ
க்ஷ க்ஷா க்ஷி க்ஷீ க்ஷூ க்ஷூ ெக்ஷ ேக்ஷ ைக்ஷ ெக்ஷா ேக்ஷா ெக்ஷௗ

851
iconvdata/tscii.c Normal file
View File

@ -0,0 +1,851 @@
/* Conversion from and to TSCII.
Copyright (C) 2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Bruno Haible <bruno@clisp.org>, 2002.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <dlfcn.h>
#include <stdint.h>
#include <gconv.h>
#include <assert.h>
/* TSCII is an 8-bit encoding consisting of:
0x00..0x7F: ASCII
0x80..0x90, 0x95..0x9F, 0xAB..0xFE:
Tamil letters and glyphs
0xA1..0xA5, 0xAA: Tamil combining letters (after the base character)
0xA6..0xA8: Tamil combining letters (before the base character)
0x91..0x94: Punctuation
0xA9: Symbols
*/
/* Definitions used in the body of the `gconv' function. */
#define CHARSET_NAME "TSCII//"
#define FROM_LOOP from_tscii
#define TO_LOOP to_tscii
#define DEFINE_INIT 1
#define DEFINE_FINI 1
#define FROM_LOOP_MIN_NEEDED_FROM 1
#define FROM_LOOP_MAX_NEEDED_FROM 2
#define FROM_LOOP_MIN_NEEDED_TO 4
#define FROM_LOOP_MAX_NEEDED_TO 16
#define TO_LOOP_MIN_NEEDED_FROM 4
#define TO_LOOP_MAX_NEEDED_FROM 4
#define TO_LOOP_MIN_NEEDED_TO 1
#define TO_LOOP_MAX_NEEDED_TO 3
#define PREPARE_LOOP \
int saved_state; \
int *statep = &data->__statep->__count;
#define EXTRA_LOOP_ARGS , statep
/* Since we might have to reset input pointer we must be able to save
and restore the state. */
#define SAVE_RESET_STATE(Save) \
if (Save) \
saved_state = *statep; \
else \
*statep = saved_state
/* During TSCII to UCS-4 conversion, the COUNT element of the state contains
the last UCS-4 character to be output, shifted by 8 bits, and an encoded
representation of additional UCS-4 characters to be output (if any),
shifted by 4 bits. This character can be:
0x0000 Nothing pending.
0x0BCD Pending VIRAMA sign. If bit 3 is set, it may be
omitted if followed by a vowel sign U or UU.
0x0BC6, 0x0BC7, 0x0BC8 Pending vowel sign. Bit 3 is set after the
consonant was seen.
Other Bit 3 always cleared. */
/* During UCS-4 to TSCII conversion, the COUNT element of the state contains
the last byte (or sometimes the last two bytes) to be output, shifted by
3 bits. This can be:
0x00 Nothing pending.
0xB8..0xC9, 0x83..0x86 A consonant.
0xEC, 0x8A A consonant with VIRAMA sign (final or joining).
0x87, 0xC38A Two consonants combined through a VIRAMA sign. */
/* Since this is a stateful encoding we have to provide code which resets
the output state to the initial state. This has to be done during the
flushing. */
#define EMIT_SHIFT_TO_INIT \
if (data->__statep->__count != 0) \
{ \
if (FROM_DIRECTION) \
{ \
do \
{ \
if (__builtin_expect (outbuf + 4 > outend, 0)) \
{ \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
break; \
} \
/* Write out the pending character. */ \
*((uint32_t *) outbuf)++ = data->__statep->__count >> 8; \
/* Retrieve the successor state. */ \
data->__statep->__count = \
tscii_next_state[(data->__statep->__count >> 4) & 0x0f]; \
} \
while (data->__statep->__count != 0); \
} \
else \
{ \
uint32_t last = data->__statep->__count >> 3; \
if (__builtin_expect (last >> 8, 0)) \
{ \
/* Write out the last character, two bytes. */ \
if (__builtin_expect (outbuf + 2 <= outend, 1)) \
{ \
*outbuf++ = last & 0xff; \
*outbuf++ = (last >> 8) & 0xff; \
data->__statep->__count = 0; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \
else \
{ \
/* Write out the last character, a single byte. */ \
if (__builtin_expect (outbuf < outend, 1)) \
{ \
*outbuf++ = last & 0xff; \
data->__statep->__count = 0; \
} \
else \
/* We don't have enough room in the output buffer. */ \
status = __GCONV_FULL_OUTPUT; \
} \
} \
}
/* First define the conversion function from TSCII to UCS-4. */
static const uint16_t tscii_to_ucs4[128][2] =
{
{ 0x0BE6, 0 },
{ 0x0BE7, 0 },
{ 0, 0 }, /* 0x82 - maps to <U0BB8><U0BCD><U0BB0><U0BC0> */
{ 0x0B9C, 0 },
{ 0x0BB7, 0 },
{ 0x0BB8, 0 },
{ 0x0BB9, 0 },
{ 0, 0 }, /* 0x87 - maps to <U0B95><U0BCD><U0BB7> */
{ 0x0B9C, 0x0BCD },
{ 0x0BB7, 0x0BCD },
{ 0, 0 }, /* 0x8a - maps to <U0BB8> and buffers <U0BCD> */
{ 0, 0 }, /* 0x8b - maps to <U0BB9> and buffers <U0BCD> */
{ 0, 0 }, /* 0x8c - maps to <U0B95><U0BCD><U0BB7><U0BCD> */
{ 0x0BE8, 0 },
{ 0x0BE9, 0 },
{ 0x0BEA, 0 },
{ 0x0BEB, 0 },
{ 0x2018, 0 },
{ 0x2019, 0 },
{ 0x201C, 0 },
{ 0x201D, 0 },
{ 0x0BEC, 0 },
{ 0x0BED, 0 },
{ 0x0BEE, 0 },
{ 0x0BEF, 0 },
{ 0x0B99, 0x0BC1 },
{ 0x0B9E, 0x0BC1 },
{ 0x0B99, 0x0BC2 },
{ 0x0B9E, 0x0BC2 },
{ 0x0BF0, 0 },
{ 0x0BF1, 0 },
{ 0x0BF2, 0 },
{ 0, 0 }, /* 0xa0 - unmapped */
{ 0x0BBE, 0 },
{ 0x0BBF, 0 },
{ 0x0BC0, 0 },
{ 0x0BC1, 0 },
{ 0x0BC2, 0 },
{ 0, 0 }, /* 0xa6 - buffers <U0BC6> */
{ 0, 0 }, /* 0xa7 - buffers <U0BC7> */
{ 0, 0 }, /* 0xa8 - buffers <U0BC8> */
{ 0x00A9, 0 },
{ 0x0BD7, 0 },
{ 0x0B85, 0 },
{ 0x0B86, 0 },
{ 0x0B87, 0 },
{ 0x0B88, 0 },
{ 0x0B89, 0 },
{ 0x0B8A, 0 },
{ 0x0B8E, 0 },
{ 0x0B8F, 0 },
{ 0x0B90, 0 },
{ 0x0B92, 0 },
{ 0x0B93, 0 },
{ 0x0B94, 0 },
{ 0x0B83, 0 },
{ 0x0B95, 0 },
{ 0x0B99, 0 },
{ 0x0B9A, 0 },
{ 0x0B9E, 0 },
{ 0x0B9F, 0 },
{ 0x0BA3, 0 },
{ 0x0BA4, 0 },
{ 0x0BA8, 0 },
{ 0x0BAA, 0 },
{ 0x0BAE, 0 },
{ 0x0BAF, 0 },
{ 0x0BB0, 0 },
{ 0x0BB2, 0 },
{ 0x0BB5, 0 },
{ 0x0BB4, 0 },
{ 0x0BB3, 0 },
{ 0x0BB1, 0 },
{ 0x0BA9, 0 },
{ 0x0B9F, 0x0BBF },
{ 0x0B9F, 0x0BC0 },
{ 0x0B95, 0x0BC1 },
{ 0x0B9A, 0x0BC1 },
{ 0x0B9F, 0x0BC1 },
{ 0x0BA3, 0x0BC1 },
{ 0x0BA4, 0x0BC1 },
{ 0x0BA8, 0x0BC1 },
{ 0x0BAA, 0x0BC1 },
{ 0x0BAE, 0x0BC1 },
{ 0x0BAF, 0x0BC1 },
{ 0x0BB0, 0x0BC1 },
{ 0x0BB2, 0x0BC1 },
{ 0x0BB5, 0x0BC1 },
{ 0x0BB4, 0x0BC1 },
{ 0x0BB3, 0x0BC1 },
{ 0x0BB1, 0x0BC1 },
{ 0x0BA9, 0x0BC1 },
{ 0x0B95, 0x0BC2 },
{ 0x0B9A, 0x0BC2 },
{ 0x0B9F, 0x0BC2 },
{ 0x0BA3, 0x0BC2 },
{ 0x0BA4, 0x0BC2 },
{ 0x0BA8, 0x0BC2 },
{ 0x0BAA, 0x0BC2 },
{ 0x0BAE, 0x0BC2 },
{ 0x0BAF, 0x0BC2 },
{ 0x0BB0, 0x0BC2 },
{ 0x0BB2, 0x0BC2 },
{ 0x0BB5, 0x0BC2 },
{ 0x0BB4, 0x0BC2 },
{ 0x0BB3, 0x0BC2 },
{ 0x0BB1, 0x0BC2 },
{ 0x0BA9, 0x0BC2 },
{ 0x0B95, 0x0BCD },
{ 0x0B99, 0x0BCD },
{ 0x0B9A, 0x0BCD },
{ 0x0B9E, 0x0BCD },
{ 0x0B9F, 0x0BCD },
{ 0x0BA3, 0x0BCD },
{ 0x0BA4, 0x0BCD },
{ 0x0BA8, 0x0BCD },
{ 0x0BAA, 0x0BCD },
{ 0x0BAE, 0x0BCD },
{ 0x0BAF, 0x0BCD },
{ 0x0BB0, 0x0BCD },
{ 0x0BB2, 0x0BCD },
{ 0x0BB5, 0x0BCD },
{ 0x0BB4, 0x0BCD },
{ 0x0BB3, 0x0BCD },
{ 0x0BB1, 0x0BCD },
{ 0x0BA9, 0x0BCD },
{ 0x0B87, 0 },
{ 0, 0 } /* 0xff - unmapped */
};
static const uint32_t tscii_next_state[6] =
{
/* 0 means no more pending Unicode characters. */
0,
/* 1 means <U0BB7>. */
(0x0BB7 << 8),
/* 2 means <U0BC0>. */
(0x0BC0 << 8),
/* 3 means <U0BCD>. */
(0x0BCD << 8),
/* 4 means <U0BB0><U0BC0>. */
(0x0BB0 << 8) + (2 << 4),
/* 5 means <U0BB7><U0BCD>. */
(0x0BB7 << 8) + (3 << 4)
};
#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
#define LOOPFCT FROM_LOOP
#define BODY \
{ \
uint32_t ch = *inptr; \
\
if ((*statep >> 8) != 0) \
{ \
/* Attempt to combine the last character with this one. */ \
uint32_t last = *statep >> 8; \
\
if (last == 0x0BCD && (*statep & (1 << 3))) \
{ \
if (ch == 0xa4 || ch == 0xa5) \
{ \
ch += 0xb1d; \
/* Now ch = 0x0BC1 or ch = 0x0BC2. */ \
put32 (outptr, ch); \
outptr += 4; \
*statep = 0; \
inptr++; \
continue; \
} \
} \
else if (last >= 0x0BC6 && last <= 0x0BC8) \
{ \
if ((last == 0x0BC6 && ch == 0xa1) \
|| (last == 0x0BC7 && (ch == 0xa1 || ch == 0xaa))) \
{ \
ch = last + 4 + (ch != 0xa1); \
/* Now ch = 0x0BCA or ch = 0x0BCB or ch = 0x0BCC. */ \
put32 (outptr, ch); \
outptr += 4; \
*statep = 0; \
inptr++; \
continue; \
} \
if ((ch >= 0xb8 && ch <= 0xc9) && (*statep & (1 << 3)) == 0) \
{ \
ch = tscii_to_ucs4[ch - 0x80][0]; \
put32 (outptr, ch); \
outptr += 4; \
*statep |= 1 << 3; \
inptr++; \
continue; \
} \
} \
\
do \
{ \
/* Output the buffered character. */ \
put32 (outptr, last); \
outptr += 4; \
/* Retrieve the successor state. */ \
*statep = tscii_next_state[(*statep >> 4) & 0x0f]; \
} \
while (*statep != 0 && __builtin_expect (outptr + 4 <= outend, 1)); \
\
if (*statep != 0) \
{ \
/* We don't have enough room in the output buffer. \
Tell the caller why we terminate the loop. */ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
\
continue; \
} \
\
if (ch < 0x80) \
{ \
/* Plain ASCII character. */ \
put32 (outptr, ch); \
outptr += 4; \
} \
else \
{ \
/* Tamil character. */ \
uint32_t u1 = tscii_to_ucs4[ch - 0x80][0]; \
\
if (u1 != 0) \
{ \
uint32_t u2 = tscii_to_ucs4[ch - 0x80][1]; \
\
inptr++; \
\
put32 (outptr, u1); \
outptr += 4; \
\
if (u2 != 0) \
{ \
/* See whether we have room for two characters. Otherwise \
store only the first character now, and put the second \
one into the queue. */ \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = u2 << 8; \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, u2); \
outptr += 4; \
} \
continue; \
} \
/* Special handling of a few Tamil characters. */ \
else if (ch == 0xa6 || ch == 0xa7 || ch == 0xa8) \
{ \
ch += 0x0b20; \
/* Now ch = 0x0BC6 or ch = 0x0BC7 or ch = 0x0BC8. */ \
*statep = ch << 8; \
inptr++; \
continue; \
} \
else if (ch == 0x8a || ch == 0x8b) \
{ \
ch += 0x0b2e; \
/* Now ch = 0x0BB8 or ch = 0x0BB9. */ \
put32 (outptr, ch); \
outptr += 4; \
*statep = (0x0BCD << 8) + (1 << 3); \
inptr++; \
continue; \
} \
else if (ch == 0x82) \
{ \
/* Output <U0BB8><U0BCD><U0BB0><U0BC0>, if we have room for \
four characters. */ \
inptr++; \
put32 (outptr, 0x0BB8); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BCD << 8) + (4 << 4); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BCD); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BB0 << 8) + (2 << 4); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BB0); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BC0 << 8); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BC0); \
outptr += 4; \
continue; \
} \
else if (ch == 0x87) \
{ \
/* Output <U0B95><U0BCD><U0BB7>, if we have room for \
three characters. */ \
inptr++; \
put32 (outptr, 0x0B95); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BCD << 8) + (1 << 4); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BCD); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BB7 << 8); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BB7); \
outptr += 4; \
continue; \
} \
else if (ch == 0x8c) \
{ \
/* Output <U0B95><U0BCD><U0BB7><U0BCD>, if we have room for \
four characters. */ \
inptr++; \
put32 (outptr, 0x0B95); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BCD << 8) + (5 << 4); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BCD); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BB7 << 8) + (3 << 4); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BB7); \
outptr += 4; \
if (__builtin_expect (outptr + 4 > outend, 0)) \
{ \
*statep = (0x0BCD << 8); \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
put32 (outptr, 0x0BCD); \
outptr += 4; \
continue; \
} \
else \
{ \
/* This is illegal. */ \
STANDARD_FROM_LOOP_ERR_HANDLER (1); \
} \
} \
\
/* Now that we wrote the output increment the input pointer. */ \
inptr++; \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS , int *statep
#include <iconv/loop.c>
/* Next, define the other direction, from UCS-4 to TSCII. */
static const uint8_t ucs4_to_tscii[128] =
{
0, 0, 0, 0xb7, 0, 0xab, 0xac, 0xfe, /* 0x0B80..0x0B87 */
0xae, 0xaf, 0xb0, 0, 0, 0, 0xb1, 0xb2, /* 0x0B88..0x0B8F */
0xb3, 0, 0xb4, 0xb5, 0xb6, 0xb8, 0, 0, /* 0x0B90..0x0B97 */
0, 0xb9, 0xba, 0, 0x83, 0, 0xbb, 0xbc, /* 0x0B98..0x0B9F */
0, 0, 0, 0xbd, 0xbe, 0, 0, 0, /* 0x0BA0..0x0BA7 */
0xbf, 0xc9, 0xc0, 0, 0, 0, 0xc1, 0xc2, /* 0x0BA8..0x0BAF */
0xc3, 0xc8, 0xc4, 0xc7, 0xc6, 0xc5, 0, 0x84, /* 0x0BB0..0x0BB7 */
0x85, 0x86, 0, 0, 0, 0, 0xa1, 0xa2, /* 0x0BB8..0x0BBF */
0xa3, 0xa4, 0xa5, 0, 0, 0, 0xa6, 0xa7, /* 0x0BC0..0x0BC7 */
0xa8, 0, 0, 0, 0, 0, 0, 0, /* 0x0BC8..0x0BCF */
0, 0, 0, 0, 0, 0, 0, 0xaa, /* 0x0BD0..0x0BD7 */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x0BD8..0x0BDF */
0, 0, 0, 0, 0, 0, 0x80, 0x81, /* 0x0BE0..0x0BE7 */
0x8d, 0x8e, 0x8f, 0x90, 0x95, 0x96, 0x97, 0x98, /* 0x0BE8..0x0BEF */
0x9d, 0x9e, 0x9f, 0, 0, 0, 0, 0, /* 0x0BF0..0x0BF7 */
0, 0, 0, 0, 0, 0, 0, 0 /* 0x0BF8..0x0BFF */
};
static const uint8_t consonant_with_u[18] =
{
0xcc, 0x99, 0xcd, 0x9a, 0xce, 0xcf, 0xd0, 0xd1, 0xd2,
0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb
};
static const uint8_t consonant_with_uu[18] =
{
0xdc, 0x9b, 0xdd, 0x9c, 0xde, 0xdf, 0xe0, 0xe1, 0xe2,
0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb
};
static const uint8_t consonant_with_virama[18] =
{
0xec, 0xed, 0xee, 0xef, 0xf0, 0xf1, 0xf2, 0xf3, 0xf4,
0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd
};
#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
#define LOOPFCT TO_LOOP
#define BODY \
{ \
uint32_t ch = get32 (inptr); \
\
if ((*statep >> 3) != 0) \
{ \
/* Attempt to combine the last character with this one. */ \
uint32_t last = *statep >> 3; \
\
if (last >= 0xb8 && last <= 0xc9) \
{ \
if (ch == 0x0BC1) \
{ \
*outptr++ = consonant_with_u[last - 0xb8]; \
*statep = 0; \
inptr += 4; \
continue; \
} \
if (ch == 0x0BC2) \
{ \
*outptr++ = consonant_with_uu[last - 0xb8]; \
*statep = 0; \
inptr += 4; \
continue; \
} \
if (ch == 0x0BC6) \
{ \
if (__builtin_expect (outptr + 2 <= outend, 1)) \
{ \
*outptr++ = 0xa6; \
*outptr++ = last; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BC7) \
{ \
if (__builtin_expect (outptr + 2 <= outend, 1)) \
{ \
*outptr++ = 0xa7; \
*outptr++ = last; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BC8) \
{ \
if (__builtin_expect (outptr + 2 <= outend, 1)) \
{ \
*outptr++ = 0xa8; \
*outptr++ = last; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BCA) \
{ \
if (__builtin_expect (outptr + 3 <= outend, 1)) \
{ \
*outptr++ = 0xa6; \
*outptr++ = last; \
*outptr++ = 0xa1; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BCB) \
{ \
if (__builtin_expect (outptr + 3 <= outend, 1)) \
{ \
*outptr++ = 0xa7; \
*outptr++ = last; \
*outptr++ = 0xa1; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BCC) \
{ \
if (__builtin_expect (outptr + 3 <= outend, 1)) \
{ \
*outptr++ = 0xa7; \
*outptr++ = last; \
*outptr++ = 0xaa; \
*statep = 0; \
inptr += 4; \
continue; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
if (ch == 0x0BCD) \
{ \
if (last != 0xb8) \
{ \
*outptr++ = consonant_with_virama[last - 0xb8]; \
*statep = 0; \
} \
else \
*statep = 0xec << 3; \
inptr += 4; \
continue; \
} \
if (last == 0xbc && (ch == 0x0BBF || ch == 0x0BC0)) \
{ \
*outptr++ = ch - 0x0af5; \
*statep = 0; \
inptr += 4; \
continue; \
} \
} \
else if (last >= 0x83 && last <= 0x86) \
{ \
if (last >= 0x85 && (ch == 0x0BC1 || ch == 0x0BC2)) \
{ \
*outptr++ = last + 5; \
*statep = 0; \
continue; \
} \
if (ch == 0x0BCD) \
{ \
if (last != 0x85) \
{ \
*outptr++ = last + 5; \
*statep = 0; \
} \
else \
*statep = 0x8a << 3; \
inptr += 4; \
continue; \
} \
} \
else if (last == 0xec) \
{ \
if (ch == 0x0BB7) \
{ \
*statep = 0x87 << 3; \
inptr += 4; \
continue; \
} \
} \
else if (last == 0x8a) \
{ \
if (ch == 0x0BB0) \
{ \
*statep = 0xc38a << 3; \
inptr += 4; \
continue; \
} \
} \
else if (last == 0x87) \
{ \
if (ch == 0x0BCD) \
{ \
*outptr++ = 0x8c; \
*statep = 0; \
inptr += 4; \
continue; \
} \
} \
else \
{ \
assert (last == 0xc38a); \
if (ch == 0x0BC0) \
{ \
*outptr++ = 0x82; \
*statep = 0; \
inptr += 4; \
continue; \
} \
} \
\
/* Output the buffered character. */ \
if (__builtin_expect (last >> 8, 0)) \
{ \
if (__builtin_expect (outptr + 2 <= outend, 1)) \
{ \
*outptr++ = last & 0xff; \
*outptr++ = (last >> 8) & 0xff; \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
else \
*outptr++ = last & 0xff; \
*statep = 0; \
continue; \
} \
\
if (ch < 0x80) \
/* Plain ASCII character. */ \
*outptr++ = ch; \
else if (ch >= 0x0B80 && ch <= 0x0BFF) \
{ \
/* Tamil character. */ \
uint8_t t = ucs4_to_tscii[ch - 0x0B80]; \
\
if (t != 0) \
{ \
if ((t >= 0xb8 && t <= 0xc9) || (t >= 0x83 && t <= 0x86)) \
*statep = (uint32_t) t << 3; \
else \
*outptr++ = t; \
} \
else if (ch >= 0x0BCA && ch <= 0x0BCC) \
{ \
/* See whether we have room for two bytes. */ \
if (__builtin_expect (outptr + 2 <= outend, 1)) \
{ \
*outptr++ = (ch == 0x0BCA ? 0xa6 : 0xa7); \
*outptr++ = (ch != 0x0BCC ? 0xa1 : 0xaa); \
} \
else \
{ \
result = __GCONV_FULL_OUTPUT; \
break; \
} \
} \
else \
{ \
/* Illegal character. */ \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
} \
else if (ch == 0x00A9) \
*outptr++ = ch; \
else if (ch == 0x2018 || ch == 0x2019) \
*outptr++ = ch - 0x1f87; \
else if (ch == 0x201C || ch == 0x201D) \
*outptr++ = ch - 0x1f89; \
else \
{ \
UNICODE_TAG_HANDLER (ch, 4); \
\
/* Illegal character. */ \
STANDARD_TO_LOOP_ERR_HANDLER (4); \
} \
\
/* Now that we wrote the output increment the input pointer. */ \
inptr += 4; \
}
#define LOOP_NEED_FLAGS
#define EXTRA_LOOP_DECLS , int *statep
#include <iconv/loop.c>
/* Now define the toplevel functions. */
#include <iconv/skeleton.c>

View File

@ -57,7 +57,7 @@ hexbuf (unsigned char buf[], unsigned int buflen)
return msg;
}
/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (6 bytes)
/* Attempts to convert a byte buffer BUF (BUFLEN bytes) to OUT (12 bytes)
using the conversion descriptor CD. Returns the number of written bytes,
or 0 if ambiguous, or -1 if invalid. */
static int
@ -66,7 +66,7 @@ try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
const char *inbuf = (const char *) buf;
size_t inbytesleft = buflen;
char *outbuf = (char *) out;
size_t outbytesleft = 6;
size_t outbytesleft = 12;
size_t result;
iconv (cd, NULL, NULL, NULL, NULL);
@ -100,10 +100,10 @@ try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
fprintf (stderr, "%s: inbytes = %ld, outbytes = %ld\n",
hexbuf (buf, buflen),
(long) (buflen - inbytesleft),
(long) (6 - outbytesleft));
(long) (12 - outbytesleft));
exit (1);
}
return 6 - outbytesleft;
return 12 - outbytesleft;
}
}
@ -111,7 +111,7 @@ try (iconv_t cd, unsigned char buf[], unsigned int buflen, unsigned char *out)
static const char *
utf8_decode (const unsigned char *out, unsigned int outlen)
{
static char hexbuf[42];
static char hexbuf[84];
char *p = hexbuf;
while (outlen > 0)
@ -203,7 +203,7 @@ main (int argc, char *argv[])
search_depth = (strcmp (charset, "UTF-8") == 0 ? 3 : 4);
{
unsigned char out[6];
unsigned char out[12];
unsigned char buf[4];
unsigned int i0, i1, i2, i3;
int result;

View File

@ -202,6 +202,7 @@ cat <<EOF |
MAC-SAMI
ARMSCII-8
TCVN5712-1
TSCII
#
# Multibyte encodings come here
#

View File

@ -1,3 +1,7 @@
2002-09-22 Bruno Haible <bruno@clisp.org>
* charmaps/TSCII: New file.
2002-09-20 Roland McGrath <roland@redhat.com>
* gen-locale.sh (generate_locale): Clean up overly baroque sh syntax.

388
localedata/charmaps/TSCII Normal file
View File

@ -0,0 +1,388 @@
<code_set_name> TSCII
<comment_char> %
<escape_char> /
<mb_cur_min> 1
<mb_cur_max> 1
% based on TSCII version 1.7
CHARMAP
<U0000> /x00 NULL
<U0001> /x01 START OF HEADING
<U0002> /x02 START OF TEXT
<U0003> /x03 END OF TEXT
<U0004> /x04 END OF TRANSMISSION
<U0005> /x05 ENQUIRY
<U0006> /x06 ACKNOWLEDGE
<U0007> /x07 BELL
<U0008> /x08 BACKSPACE
<U0009> /x09 HORIZONTAL TABULATION
<U000A> /x0a LINE FEED
<U000B> /x0b VERTICAL TABULATION
<U000C> /x0c FORM FEED
<U000D> /x0d CARRIAGE RETURN
<U000E> /x0e SHIFT OUT
<U000F> /x0f SHIFT IN
<U0010> /x10 DATA LINK ESCAPE
<U0011> /x11 DEVICE CONTROL ONE
<U0012> /x12 DEVICE CONTROL TWO
<U0013> /x13 DEVICE CONTROL THREE
<U0014> /x14 DEVICE CONTROL FOUR
<U0015> /x15 NEGATIVE ACKNOWLEDGE
<U0016> /x16 SYNCHRONOUS IDLE
<U0017> /x17 END OF TRANSMISSION BLOCK
<U0018> /x18 CANCEL
<U0019> /x19 END OF MEDIUM
<U001A> /x1a SUBSTITUTE
<U001B> /x1b ESCAPE
<U001C> /x1c FILE SEPARATOR
<U001D> /x1d GROUP SEPARATOR
<U001E> /x1e RECORD SEPARATOR
<U001F> /x1f UNIT SEPARATOR
<U0020> /x20 SPACE
<U0021> /x21 EXCLAMATION MARK
<U0022> /x22 QUOTATION MARK
<U0023> /x23 NUMBER SIGN
<U0024> /x24 DOLLAR SIGN
<U0025> /x25 PERCENT SIGN
<U0026> /x26 AMPERSAND
<U0027> /x27 APOSTROPHE
<U0028> /x28 LEFT PARENTHESIS
<U0029> /x29 RIGHT PARENTHESIS
<U002A> /x2a ASTERISK
<U002B> /x2b PLUS SIGN
<U002C> /x2c COMMA
<U002D> /x2d HYPHEN-MINUS
<U002E> /x2e FULL STOP
<U002F> /x2f SOLIDUS
<U0030> /x30 DIGIT ZERO
<U0031> /x31 DIGIT ONE
<U0032> /x32 DIGIT TWO
<U0033> /x33 DIGIT THREE
<U0034> /x34 DIGIT FOUR
<U0035> /x35 DIGIT FIVE
<U0036> /x36 DIGIT SIX
<U0037> /x37 DIGIT SEVEN
<U0038> /x38 DIGIT EIGHT
<U0039> /x39 DIGIT NINE
<U003A> /x3a COLON
<U003B> /x3b SEMICOLON
<U003C> /x3c LESS-THAN SIGN
<U003D> /x3d EQUALS SIGN
<U003E> /x3e GREATER-THAN SIGN
<U003F> /x3f QUESTION MARK
<U0040> /x40 COMMERCIAL AT
<U0041> /x41 LATIN CAPITAL LETTER A
<U0042> /x42 LATIN CAPITAL LETTER B
<U0043> /x43 LATIN CAPITAL LETTER C
<U0044> /x44 LATIN CAPITAL LETTER D
<U0045> /x45 LATIN CAPITAL LETTER E
<U0046> /x46 LATIN CAPITAL LETTER F
<U0047> /x47 LATIN CAPITAL LETTER G
<U0048> /x48 LATIN CAPITAL LETTER H
<U0049> /x49 LATIN CAPITAL LETTER I
<U004A> /x4a LATIN CAPITAL LETTER J
<U004B> /x4b LATIN CAPITAL LETTER K
<U004C> /x4c LATIN CAPITAL LETTER L
<U004D> /x4d LATIN CAPITAL LETTER M
<U004E> /x4e LATIN CAPITAL LETTER N
<U004F> /x4f LATIN CAPITAL LETTER O
<U0050> /x50 LATIN CAPITAL LETTER P
<U0051> /x51 LATIN CAPITAL LETTER Q
<U0052> /x52 LATIN CAPITAL LETTER R
<U0053> /x53 LATIN CAPITAL LETTER S
<U0054> /x54 LATIN CAPITAL LETTER T
<U0055> /x55 LATIN CAPITAL LETTER U
<U0056> /x56 LATIN CAPITAL LETTER V
<U0057> /x57 LATIN CAPITAL LETTER W
<U0058> /x58 LATIN CAPITAL LETTER X
<U0059> /x59 LATIN CAPITAL LETTER Y
<U005A> /x5a LATIN CAPITAL LETTER Z
<U005B> /x5b LEFT SQUARE BRACKET
<U005C> /x5c REVERSE SOLIDUS
<U005D> /x5d RIGHT SQUARE BRACKET
<U005E> /x5e CIRCUMFLEX ACCENT
<U005F> /x5f LOW LINE
<U0060> /x60 GRAVE ACCENT
<U0061> /x61 LATIN SMALL LETTER A
<U0062> /x62 LATIN SMALL LETTER B
<U0063> /x63 LATIN SMALL LETTER C
<U0064> /x64 LATIN SMALL LETTER D
<U0065> /x65 LATIN SMALL LETTER E
<U0066> /x66 LATIN SMALL LETTER F
<U0067> /x67 LATIN SMALL LETTER G
<U0068> /x68 LATIN SMALL LETTER H
<U0069> /x69 LATIN SMALL LETTER I
<U006A> /x6a LATIN SMALL LETTER J
<U006B> /x6b LATIN SMALL LETTER K
<U006C> /x6c LATIN SMALL LETTER L
<U006D> /x6d LATIN SMALL LETTER M
<U006E> /x6e LATIN SMALL LETTER N
<U006F> /x6f LATIN SMALL LETTER O
<U0070> /x70 LATIN SMALL LETTER P
<U0071> /x71 LATIN SMALL LETTER Q
<U0072> /x72 LATIN SMALL LETTER R
<U0073> /x73 LATIN SMALL LETTER S
<U0074> /x74 LATIN SMALL LETTER T
<U0075> /x75 LATIN SMALL LETTER U
<U0076> /x76 LATIN SMALL LETTER V
<U0077> /x77 LATIN SMALL LETTER W
<U0078> /x78 LATIN SMALL LETTER X
<U0079> /x79 LATIN SMALL LETTER Y
<U007A> /x7a LATIN SMALL LETTER Z
<U007B> /x7b LEFT CURLY BRACKET
<U007C> /x7c VERTICAL LINE
<U007D> /x7d RIGHT CURLY BRACKET
<U007E> /x7e TILDE
<U007F> /x7f DELETE
<U0BE6> /x80 TAMIL DIGIT ZERO (currently unassigned)
<U0BE7> /x81 TAMIL DIGIT ONE
<U0BB8><U0BCD><U0BB0><U0BC0> /x82 TAMIL GLYPH SRI
<U0B9C> /x83 TAMIL LETTER JA
<U0B9C><U0BC1> /x83/xa4 TAMIL GLYPH JU
<U0B9C><U0BC2> /x83/xa5 TAMIL GLYPH JUU
<U0BB7> /x84 TAMIL LETTER SSA
<U0BB7><U0BC1> /x84/xa4 TAMIL GLYPH SSU
<U0BB7><U0BC2> /x84/xa5 TAMIL GLYPH SSUU
<U0BB8> /x85 TAMIL LETTER SA
<U0BB9> /x86 TAMIL LETTER HA
<U0B95><U0BCD><U0BB7> /x87 TAMIL GLYPH KSHA
<U0B9C><U0BCD> /x88 TAMIL GLYPH J
<U0BB7><U0BCD> /x89 TAMIL GLYPH SS
<U0BB8><U0BCD> /x8a TAMIL GLYPH S
<U0BB8><U0BC1> /x8a/xa4 TAMIL GLYPH SU
<U0BB8><U0BC2> /x8a/xa5 TAMIL GLYPH SUU
<U0BB9><U0BCD> /x8b TAMIL GLYPH H
<U0BB9><U0BC1> /x8b/xa4 TAMIL GLYPH HU
<U0BB9><U0BC2> /x8b/xa5 TAMIL GLYPH HUU
<U0B95><U0BCD><U0BB7><U0BCD> /x8c TAMIL GLYPH KSH
<U0BE8> /x8d TAMIL DIGIT TWO
<U0BE9> /x8e TAMIL DIGIT THREE
<U0BEA> /x8f TAMIL DIGIT FOUR
<U0BEB> /x90 TAMIL DIGIT FIVE
<U2018> /x91 LEFT SINGLE QUOTATION MARK
<U2019> /x92 RIGHT SINGLE QUOTATION MARK
<U201C> /x93 LEFT DOUBLE QUOTATION MARK
<U201D> /x94 RIGHT DOUBLE QUOTATION MARK
<U0BEC> /x95 TAMIL DIGIT SIX
<U0BED> /x96 TAMIL DIGIT SEVEN
<U0BEE> /x97 TAMIL DIGIT EIGHT
<U0BEF> /x98 TAMIL DIGIT NINE
<U0B99><U0BC1> /x99 TAMIL GLYPH NGU
<U0B9E><U0BC1> /x9a TAMIL GLYPH NYU
<U0B99><U0BC2> /x9b TAMIL GLYPH NGUU
<U0B9E><U0BC2> /x9c TAMIL GLYPH NYUU
<U0BF0> /x9d TAMIL NUMBER TEN
<U0BF1> /x9e TAMIL NUMBER ONE HUNDRED
<U0BF2> /x9f TAMIL NUMBER ONE THOUSAND
<U0BBE> /xa1 TAMIL VOWEL SIGN AA
<U0BBF> /xa2 TAMIL VOWEL SIGN I
<U0BC0> /xa3 TAMIL VOWEL SIGN II
<U0BC1> /xa4 TAMIL VOWEL SIGN U
<U0BC2> /xa5 TAMIL VOWEL SIGN UU
<U0BC6> /xa6 TAMIL VOWEL SIGN E
<U0BCA> /xa6/xa1 TAMIL VOWEL SIGN O
<U0B95><U0BC6> /xa6/xb8 TAMIL GLYPH KE
<U0B95><U0BCA> /xa6/xb8/xa1 TAMIL GLYPH KAI
<U0B99><U0BC6> /xa6/xb9 TAMIL GLYPH NGE
<U0B99><U0BCA> /xa6/xb9/xa1 TAMIL GLYPH NGAI
<U0B9A><U0BC6> /xa6/xba TAMIL GLYPH CE
<U0B9A><U0BCA> /xa6/xba/xa1 TAMIL GLYPH CAI
<U0B9E><U0BC6> /xa6/xbb TAMIL GLYPH NYE
<U0B9E><U0BCA> /xa6/xbb/xa1 TAMIL GLYPH NYAI
<U0B9F><U0BC6> /xa6/xbc TAMIL GLYPH TTE
<U0B9F><U0BCA> /xa6/xbc/xa1 TAMIL GLYPH TTAI
<U0BA3><U0BC6> /xa6/xbd TAMIL GLYPH NNE
<U0BA3><U0BCA> /xa6/xbd/xa1 TAMIL GLYPH NNAI
<U0BA4><U0BC6> /xa6/xbe TAMIL GLYPH TE
<U0BA4><U0BCA> /xa6/xbe/xa1 TAMIL GLYPH TAI
<U0BA8><U0BC6> /xa6/xbf TAMIL GLYPH NE
<U0BA8><U0BCA> /xa6/xbf/xa1 TAMIL GLYPH NAI
<U0BAA><U0BC6> /xa6/xc0 TAMIL GLYPH PE
<U0BAA><U0BCA> /xa6/xc0/xa1 TAMIL GLYPH PAI
<U0BAE><U0BC6> /xa6/xc1 TAMIL GLYPH ME
<U0BAE><U0BCA> /xa6/xc1/xa1 TAMIL GLYPH MAI
<U0BAF><U0BC6> /xa6/xc2 TAMIL GLYPH YE
<U0BAF><U0BCA> /xa6/xc2/xa1 TAMIL GLYPH YAI
<U0BB0><U0BC6> /xa6/xc3 TAMIL GLYPH RE
<U0BB0><U0BCA> /xa6/xc3/xa1 TAMIL GLYPH RAI
<U0BB2><U0BC6> /xa6/xc4 TAMIL GLYPH LE
<U0BB2><U0BCA> /xa6/xc4/xa1 TAMIL GLYPH LAI
<U0BB5><U0BC6> /xa6/xc5 TAMIL GLYPH VE
<U0BB5><U0BCA> /xa6/xc5/xa1 TAMIL GLYPH VAI
<U0BB4><U0BC6> /xa6/xc6 TAMIL GLYPH LLLE
<U0BB4><U0BCA> /xa6/xc6/xa1 TAMIL GLYPH LLLAI
<U0BB3><U0BC6> /xa6/xc7 TAMIL GLYPH LLE
<U0BB3><U0BCA> /xa6/xc7/xa1 TAMIL GLYPH LLAI
<U0BB1><U0BC6> /xa6/xc8 TAMIL GLYPH RRE
<U0BB1><U0BCA> /xa6/xc8/xa1 TAMIL GLYPH RRAI
<U0BA9><U0BC6> /xa6/xc9 TAMIL GLYPH NNNE
<U0BA9><U0BCA> /xa6/xc9/xa1 TAMIL GLYPH NNNAI
<U0BC7> /xa7 TAMIL VOWEL SIGN EE
<U0BCB> /xa7/xa1 TAMIL VOWEL SIGN OO
<U0BCC> /xa7/xaa TAMIL VOWEL SIGN AU
<U0B95><U0BC7> /xa7/xb8 TAMIL GLYPH KEE
<U0B95><U0BCB> /xa7/xb8/xa1 TAMIL GLYPH KOO
<U0B95><U0BCC> /xa7/xb8/xaa TAMIL GLYPH KAU
<U0B99><U0BC7> /xa7/xb9 TAMIL GLYPH NGEE
<U0B99><U0BCB> /xa7/xb9/xa1 TAMIL GLYPH NGOO
<U0B99><U0BCC> /xa7/xb9/xaa TAMIL GLYPH NGAU
<U0B9A><U0BC7> /xa7/xba TAMIL GLYPH CEE
<U0B9A><U0BCB> /xa7/xba/xa1 TAMIL GLYPH COO
<U0B9A><U0BCC> /xa7/xba/xaa TAMIL GLYPH CAU
<U0B9E><U0BC7> /xa7/xbb TAMIL GLYPH NYEE
<U0B9E><U0BCB> /xa7/xbb/xa1 TAMIL GLYPH NYOO
<U0B9E><U0BCC> /xa7/xbb/xaa TAMIL GLYPH NYAU
<U0B9F><U0BC7> /xa7/xbc TAMIL GLYPH TTEE
<U0B9F><U0BCB> /xa7/xbc/xa1 TAMIL GLYPH TTOO
<U0B9F><U0BCC> /xa7/xbc/xaa TAMIL GLYPH TTAU
<U0BA3><U0BC7> /xa7/xbd TAMIL GLYPH NNEE
<U0BA3><U0BCB> /xa7/xbd/xa1 TAMIL GLYPH NNOO
<U0BA3><U0BCC> /xa7/xbd/xaa TAMIL GLYPH NNAU
<U0BA4><U0BC7> /xa7/xbe TAMIL GLYPH TEE
<U0BA4><U0BCB> /xa7/xbe/xa1 TAMIL GLYPH TOO
<U0BA4><U0BCC> /xa7/xbe/xaa TAMIL GLYPH TAU
<U0BA8><U0BC7> /xa7/xbf TAMIL GLYPH NEE
<U0BA8><U0BCB> /xa7/xbf/xa1 TAMIL GLYPH NOO
<U0BA8><U0BCC> /xa7/xbf/xaa TAMIL GLYPH NAU
<U0BAA><U0BC7> /xa7/xc0 TAMIL GLYPH PEE
<U0BAA><U0BCB> /xa7/xc0/xa1 TAMIL GLYPH POO
<U0BAA><U0BCC> /xa7/xc0/xaa TAMIL GLYPH PAU
<U0BAE><U0BC7> /xa7/xc1 TAMIL GLYPH MEE
<U0BAE><U0BCB> /xa7/xc1/xa1 TAMIL GLYPH MOO
<U0BAE><U0BCC> /xa7/xc1/xaa TAMIL GLYPH MAU
<U0BAF><U0BC7> /xa7/xc2 TAMIL GLYPH YEE
<U0BAF><U0BCB> /xa7/xc2/xa1 TAMIL GLYPH YOO
<U0BAF><U0BCC> /xa7/xc2/xaa TAMIL GLYPH YAU
<U0BB0><U0BC7> /xa7/xc3 TAMIL GLYPH REE
<U0BB0><U0BCB> /xa7/xc3/xa1 TAMIL GLYPH ROO
<U0BB0><U0BCC> /xa7/xc3/xaa TAMIL GLYPH RAU
<U0BB2><U0BC7> /xa7/xc4 TAMIL GLYPH LEE
<U0BB2><U0BCB> /xa7/xc4/xa1 TAMIL GLYPH LOO
<U0BB2><U0BCC> /xa7/xc4/xaa TAMIL GLYPH LAU
<U0BB5><U0BC7> /xa7/xc5 TAMIL GLYPH VEE
<U0BB5><U0BCB> /xa7/xc5/xa1 TAMIL GLYPH VOO
<U0BB5><U0BCC> /xa7/xc5/xaa TAMIL GLYPH VAU
<U0BB4><U0BC7> /xa7/xc6 TAMIL GLYPH LLLEE
<U0BB4><U0BCB> /xa7/xc6/xa1 TAMIL GLYPH LLLOO
<U0BB4><U0BCC> /xa7/xc6/xaa TAMIL GLYPH LLLAU
<U0BB3><U0BC7> /xa7/xc7 TAMIL GLYPH LLEE
<U0BB3><U0BCB> /xa7/xc7/xa1 TAMIL GLYPH LLOO
<U0BB3><U0BCC> /xa7/xc7/xaa TAMIL GLYPH LLAU
<U0BB1><U0BC7> /xa7/xc8 TAMIL GLYPH RREE
<U0BB1><U0BCB> /xa7/xc8/xa1 TAMIL GLYPH RROO
<U0BB1><U0BCC> /xa7/xc8/xaa TAMIL GLYPH RRAU
<U0BA9><U0BC7> /xa7/xc9 TAMIL GLYPH NNNEE
<U0BA9><U0BCB> /xa7/xc9/xa1 TAMIL GLYPH NNNOO
<U0BA9><U0BCC> /xa7/xc9/xaa TAMIL GLYPH NNNAU
<U0BC8> /xa8 TAMIL VOWEL SIGN AI
<U0B95><U0BC8> /xa8/xb8 TAMIL GLYPH KA
<U0B99><U0BC8> /xa8/xb9 TAMIL GLYPH NGA
<U0B9A><U0BC8> /xa8/xba TAMIL GLYPH CA
<U0B9E><U0BC8> /xa8/xbb TAMIL GLYPH NYA
<U0B9F><U0BC8> /xa8/xbc TAMIL GLYPH TTA
<U0BA3><U0BC8> /xa8/xbd TAMIL GLYPH NNA
<U0BA4><U0BC8> /xa8/xbe TAMIL GLYPH TA
<U0BA8><U0BC8> /xa8/xbf TAMIL GLYPH NA
<U0BAA><U0BC8> /xa8/xc0 TAMIL GLYPH PA
<U0BAE><U0BC8> /xa8/xc1 TAMIL GLYPH MA
<U0BAF><U0BC8> /xa8/xc2 TAMIL GLYPH YA
<U0BB0><U0BC8> /xa8/xc3 TAMIL GLYPH RA
<U0BB2><U0BC8> /xa8/xc4 TAMIL GLYPH LA
<U0BB5><U0BC8> /xa8/xc5 TAMIL GLYPH VA
<U0BB4><U0BC8> /xa8/xc6 TAMIL GLYPH LLLA
<U0BB3><U0BC8> /xa8/xc7 TAMIL GLYPH LLA
<U0BB1><U0BC8> /xa8/xc8 TAMIL GLYPH RRA
<U0BA9><U0BC8> /xa8/xc9 TAMIL GLYPH NNNA
<U00A9> /xa9 COPYRIGHT SIGN
<U0BD7> /xaa TAMIL AU LENGTH MARK
<U0B85> /xab TAMIL LETTER A
<U0B86> /xac TAMIL LETTER AA
%IRREVERSIBLE%<U0B87> /xad TAMIL LETTER I
<U0B88> /xae TAMIL LETTER II
<U0B89> /xaf TAMIL LETTER U
<U0B8A> /xb0 TAMIL LETTER UU
<U0B8E> /xb1 TAMIL LETTER E
<U0B8F> /xb2 TAMIL LETTER EE
<U0B90> /xb3 TAMIL LETTER AI
<U0B92> /xb4 TAMIL LETTER O
<U0B93> /xb5 TAMIL LETTER OO
<U0B94> /xb6 TAMIL LETTER AU
<U0B83> /xb7 TAMIL SIGN VISARGA
<U0B95> /xb8 TAMIL LETTER KA
<U0B99> /xb9 TAMIL LETTER NGA
<U0B9A> /xba TAMIL LETTER CA
<U0B9E> /xbb TAMIL LETTER NYA
<U0B9F> /xbc TAMIL LETTER TTA
<U0BA3> /xbd TAMIL LETTER NNA
<U0BA4> /xbe TAMIL LETTER TA
<U0BA8> /xbf TAMIL LETTER NA
<U0BAA> /xc0 TAMIL LETTER PA
<U0BAE> /xc1 TAMIL LETTER MA
<U0BAF> /xc2 TAMIL LETTER YA
<U0BB0> /xc3 TAMIL LETTER RA
<U0BB2> /xc4 TAMIL LETTER LA
<U0BB5> /xc5 TAMIL LETTER VA
<U0BB4> /xc6 TAMIL LETTER LLLA
<U0BB3> /xc7 TAMIL LETTER LLA
<U0BB1> /xc8 TAMIL LETTER RRA
<U0BA9> /xc9 TAMIL LETTER NNNA
<U0B9F><U0BBF> /xca TAMIL GLYPH TI
<U0B9F><U0BC0> /xcb TAMIL GLYPH TII
<U0B95><U0BC1> /xcc TAMIL GLYPH KU
<U0B9A><U0BC1> /xcd TAMIL GLYPH CU
<U0B9F><U0BC1> /xce TAMIL GLYPH TTU
<U0BA3><U0BC1> /xcf TAMIL GLYPH NNU
<U0BA4><U0BC1> /xd0 TAMIL GLYPH TU
<U0BA8><U0BC1> /xd1 TAMIL GLYPH NU
<U0BAA><U0BC1> /xd2 TAMIL GLYPH PU
<U0BAE><U0BC1> /xd3 TAMIL GLYPH MU
<U0BAF><U0BC1> /xd4 TAMIL GLYPH YU
<U0BB0><U0BC1> /xd5 TAMIL GLYPH RU
<U0BB2><U0BC1> /xd6 TAMIL GLYPH LU
<U0BB5><U0BC1> /xd7 TAMIL GLYPH VU
<U0BB4><U0BC1> /xd8 TAMIL GLYPH LLLU
<U0BB3><U0BC1> /xd9 TAMIL GLYPH LLU
<U0BB1><U0BC1> /xda TAMIL GLYPH RRU
<U0BA9><U0BC1> /xdb TAMIL GLYPH NNNU
<U0B95><U0BC2> /xdc TAMIL GLYPH KUU
<U0B9A><U0BC2> /xdd TAMIL GLYPH CUU
<U0B9F><U0BC2> /xde TAMIL GLYPH TTUU
<U0BA3><U0BC2> /xdf TAMIL GLYPH NNUU
<U0BA4><U0BC2> /xe0 TAMIL GLYPH TUU
<U0BA8><U0BC2> /xe1 TAMIL GLYPH NUU
<U0BAA><U0BC2> /xe2 TAMIL GLYPH PUU
<U0BAE><U0BC2> /xe3 TAMIL GLYPH MUU
<U0BAF><U0BC2> /xe4 TAMIL GLYPH YUU
<U0BB0><U0BC2> /xe5 TAMIL GLYPH RUU
<U0BB2><U0BC2> /xe6 TAMIL GLYPH LUU
<U0BB5><U0BC2> /xe7 TAMIL GLYPH VUU
<U0BB4><U0BC2> /xe8 TAMIL GLYPH LLLUU
<U0BB3><U0BC2> /xe9 TAMIL GLYPH LLUU
<U0BB1><U0BC2> /xea TAMIL GLYPH RRUU
<U0BA9><U0BC2> /xeb TAMIL GLYPH NNNUU
<U0B95><U0BCD> /xec TAMIL GLYPH K
<U0B99><U0BCD> /xed TAMIL GLYPH NG
<U0B9A><U0BCD> /xee TAMIL GLYPH C
<U0B9E><U0BCD> /xef TAMIL GLYPH NY
<U0B9F><U0BCD> /xf0 TAMIL GLYPH TT
<U0BA3><U0BCD> /xf1 TAMIL GLYPH NN
<U0BA4><U0BCD> /xf2 TAMIL GLYPH T
<U0BA8><U0BCD> /xf3 TAMIL GLYPH N
<U0BAA><U0BCD> /xf4 TAMIL GLYPH P
<U0BAE><U0BCD> /xf5 TAMIL GLYPH M
<U0BAF><U0BCD> /xf6 TAMIL GLYPH Y
<U0BB0><U0BCD> /xf7 TAMIL GLYPH R
<U0BB2><U0BCD> /xf8 TAMIL GLYPH L
<U0BB5><U0BCD> /xf9 TAMIL GLYPH V
<U0BB4><U0BCD> /xfa TAMIL GLYPH LLL
<U0BB3><U0BCD> /xfb TAMIL GLYPH LL
<U0BB1><U0BCD> /xfc TAMIL GLYPH RR
<U0BA9><U0BCD> /xfd TAMIL GLYPH NNN
<U0B87> /xfe TAMIL LETTER I
END CHARMAP
WIDTH
<U0B82> 0
<U0BC0> 0
<U0BCD> 0
END WIDTH