glibc/iconv/gconv_trans.c
Ulrich Drepper 1d96d74da7 Update.
* locale/langinfo.h: Add entries for default_missing information.
	* locale/C-ctype.c: Add initializers for new fields.
	* iconv/gconv_trans.c: If nothing matched, try to use default_missing
	information.
	* locale/categories.h: Add entries for all LC_CTYPE values.
	* locale/programs/ld-ctype.c (ctype_output): Write out default_missing
	information.

	* localedata/tst-trans.c: Write out an error message if class is
	not found.
2000-06-17 03:08:26 +00:00

180 lines
5.3 KiB
C

/* Transliteration using the locale's data.
Copyright (C) 2000 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 2000.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <dlfcn.h>
#include <stdint.h>
#include "gconv_int.h"
#include "../locale/localeinfo.h"
int
__gconv_transliterate (struct __gconv_step *step,
struct __gconv_step_data *step_data,
const unsigned char *inbufstart,
const unsigned char **inbufp,
const unsigned char *inbufend,
unsigned char **outbufstart, size_t *irreversible)
{
/* Find out about the locale's transliteration. */
uint_fast32_t size;
uint_fast32_t layers;
uint32_t *from_idx;
uint32_t *from_tbl;
uint32_t *to_idx;
uint32_t *to_tbl;
uint32_t *winbuf;
uint32_t *winbufend;
uint_fast32_t low;
uint_fast32_t high;
uint32_t *default_missing;
/* If there is no transliteration information in the locale don't do
anything and return the error. */
size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
if (size == 0)
goto no_rules;
/* Get the rest of the values. */
layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
/* The input buffer. There are actually 4-byte values. */
winbuf = (uint32_t *) *inbufp;
winbufend = (uint32_t *) inbufend;
/* Test whether there is enough input. */
if (winbuf + 1 > winbufend)
return (winbuf == winbufend
? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
/* The array starting at FROM_IDX contains indeces to the string table
in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
are doing binary search. */
low = 0;
high = size;
while (low < high)
{
uint_fast32_t med = (low + high) / 2;
uint32_t idx;
int cnt;
/* Compare the string at this index with the string at the current
position in the input buffer. */
idx = from_idx[med];
cnt = 0;
do
{
if (from_tbl[idx + cnt] != winbuf[cnt])
/* Does not match. */
break;
++cnt;
}
while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
{
/* Found a matching input sequence. Now try to convert the
possible replacements. */
uint32_t idx2 = to_idx[med];
do
{
/* Determine length of replacement. */
uint_fast32_t len = 0;
int res;
const unsigned char *toinptr;
while (to_tbl[idx2 + len] != L'\0')
++len;
/* Try this input text. */
toinptr = (const unsigned char *) &to_tbl[idx2];
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) &to_tbl[idx2 + len],
(unsigned char **) outbufstart,
irreversible, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
/* If the conversion succeeds we have to increment the
input buffer. */
if (res == __GCONV_EMPTY_INPUT)
{
*inbufp += cnt * sizeof (uint32_t);
++*irreversible;
}
return res;
}
/* Next replacement. */
idx2 += len + 1;
}
while (to_tbl[idx2] != L'\0');
/* Nothing found, continue searching. */
}
if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
low = idx;
else
high = idx;
}
/* One last chance: use the default replacement. */
no_rules:
default_missing = (uint32_t *)
_NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING);
if (default_missing[0] != L'\0')
{
const unsigned char *toinptr = (const unsigned char *) default_missing;
uint32_t len = _NL_CURRENT_WORD (LC_CTYPE,
_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN);
int res;
res = DL_CALL_FCT (step->__fct,
(step, step_data, &toinptr,
(const unsigned char *) (default_missing + len),
(unsigned char **) outbufstart,
irreversible, 0, 0));
if (res != __GCONV_ILLEGAL_INPUT)
{
/* If the conversion succeeds we have to increment the
input buffer. */
if (res == __GCONV_EMPTY_INPUT)
{
/* We consuming one character. */
++*inbufp;
++*irreversible;
}
return res;
}
}
/* Haven't found a match. */
return __GCONV_ILLEGAL_INPUT;
}