locale: Introduce translate_unicode_codepoint into linereader.c
This will permit reusing the Unicode character processing for different character encodings, not just the current <U...> encoding. Reviewed-by: Carlos O'Donell <carlos@redhat.com> Tested-by: Carlos O'Donell <carlos@redhat.com>
This commit is contained in:
parent
19d4944459
commit
7dcaabb94c
|
@ -596,6 +596,83 @@ get_ident (struct linereader *lr)
|
||||||
return &lr->token;
|
return &lr->token;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Process a decoded Unicode codepoint WCH in a string, placing the
|
||||||
|
multibyte sequence into LRB. Return false if the character is not
|
||||||
|
found in CHARMAP/REPERTOIRE. */
|
||||||
|
static bool
|
||||||
|
translate_unicode_codepoint (struct localedef_t *locale,
|
||||||
|
const struct charmap_t *charmap,
|
||||||
|
const struct repertoire_t *repertoire,
|
||||||
|
uint32_t wch, struct lr_buffer *lrb)
|
||||||
|
{
|
||||||
|
/* See whether the charmap contains the Uxxxxxxxx names. */
|
||||||
|
char utmp[10];
|
||||||
|
snprintf (utmp, sizeof (utmp), "U%08X", wch);
|
||||||
|
struct charseq *seq = charmap_find_value (charmap, utmp, 9);
|
||||||
|
|
||||||
|
if (seq == NULL)
|
||||||
|
{
|
||||||
|
/* No, this isn't the case. Now determine from
|
||||||
|
the repertoire the name of the character and
|
||||||
|
find it in the charmap. */
|
||||||
|
if (repertoire != NULL)
|
||||||
|
{
|
||||||
|
const char *symbol = repertoire_find_symbol (repertoire, wch);
|
||||||
|
if (symbol != NULL)
|
||||||
|
seq = charmap_find_value (charmap, symbol, strlen (symbol));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seq == NULL)
|
||||||
|
{
|
||||||
|
#ifndef NO_TRANSLITERATION
|
||||||
|
/* Transliterate if possible. */
|
||||||
|
if (locale != NULL)
|
||||||
|
{
|
||||||
|
if ((locale->avail & CTYPE_LOCALE) == 0)
|
||||||
|
{
|
||||||
|
/* Load the CTYPE data now. */
|
||||||
|
int old_needed = locale->needed;
|
||||||
|
|
||||||
|
locale->needed = 0;
|
||||||
|
locale = load_locale (LC_CTYPE, locale->name,
|
||||||
|
locale->repertoire_name,
|
||||||
|
charmap, locale);
|
||||||
|
locale->needed = old_needed;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t *translit;
|
||||||
|
if ((locale->avail & CTYPE_LOCALE) != 0
|
||||||
|
&& ((translit = find_translit (locale, charmap, wch))
|
||||||
|
!= NULL))
|
||||||
|
/* The CTYPE data contains a matching
|
||||||
|
transliteration. */
|
||||||
|
{
|
||||||
|
for (int i = 0; translit[i] != 0; ++i)
|
||||||
|
{
|
||||||
|
snprintf (utmp, sizeof (utmp), "U%08X", translit[i]);
|
||||||
|
seq = charmap_find_value (charmap, utmp, 9);
|
||||||
|
assert (seq != NULL);
|
||||||
|
adds (lrb, seq->bytes, seq->nbytes);
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif /* NO_TRANSLITERATION */
|
||||||
|
|
||||||
|
/* Not a known name. */
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (seq != NULL)
|
||||||
|
{
|
||||||
|
adds (lrb, seq->bytes, seq->nbytes);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static struct token *
|
static struct token *
|
||||||
get_string (struct linereader *lr, const struct charmap_t *charmap,
|
get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
|
@ -635,7 +712,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int illegal_string = 0;
|
bool illegal_string = false;
|
||||||
size_t buf2act = 0;
|
size_t buf2act = 0;
|
||||||
size_t buf2max = 56 * sizeof (uint32_t);
|
size_t buf2max = 56 * sizeof (uint32_t);
|
||||||
int ch;
|
int ch;
|
||||||
|
@ -695,7 +772,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
{
|
{
|
||||||
/* <> is no correct name. Ignore it and also signal an
|
/* <> is no correct name. Ignore it and also signal an
|
||||||
error. */
|
error. */
|
||||||
illegal_string = 1;
|
illegal_string = true;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -709,8 +786,6 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
|
|
||||||
if (cp == &lrb.buf[lrb.act])
|
if (cp == &lrb.buf[lrb.act])
|
||||||
{
|
{
|
||||||
char utmp[10];
|
|
||||||
|
|
||||||
/* Yes, it is. */
|
/* Yes, it is. */
|
||||||
addc (&lrb, '\0');
|
addc (&lrb, '\0');
|
||||||
wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
|
wch = strtoul (lrb.buf + startidx + 1, NULL, 16);
|
||||||
|
@ -721,81 +796,9 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
if (return_widestr)
|
if (return_widestr)
|
||||||
ADDWC (wch);
|
ADDWC (wch);
|
||||||
|
|
||||||
/* See whether the charmap contains the Uxxxxxxxx names. */
|
if (!translate_unicode_codepoint (locale, charmap,
|
||||||
snprintf (utmp, sizeof (utmp), "U%08X", wch);
|
repertoire, wch, &lrb))
|
||||||
seq = charmap_find_value (charmap, utmp, 9);
|
illegal_string = true;
|
||||||
|
|
||||||
if (seq == NULL)
|
|
||||||
{
|
|
||||||
/* No, this isn't the case. Now determine from
|
|
||||||
the repertoire the name of the character and
|
|
||||||
find it in the charmap. */
|
|
||||||
if (repertoire != NULL)
|
|
||||||
{
|
|
||||||
const char *symbol;
|
|
||||||
|
|
||||||
symbol = repertoire_find_symbol (repertoire, wch);
|
|
||||||
|
|
||||||
if (symbol != NULL)
|
|
||||||
seq = charmap_find_value (charmap, symbol,
|
|
||||||
strlen (symbol));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (seq == NULL)
|
|
||||||
{
|
|
||||||
#ifndef NO_TRANSLITERATION
|
|
||||||
/* Transliterate if possible. */
|
|
||||||
if (locale != NULL)
|
|
||||||
{
|
|
||||||
uint32_t *translit;
|
|
||||||
|
|
||||||
if ((locale->avail & CTYPE_LOCALE) == 0)
|
|
||||||
{
|
|
||||||
/* Load the CTYPE data now. */
|
|
||||||
int old_needed = locale->needed;
|
|
||||||
|
|
||||||
locale->needed = 0;
|
|
||||||
locale = load_locale (LC_CTYPE,
|
|
||||||
locale->name,
|
|
||||||
locale->repertoire_name,
|
|
||||||
charmap, locale);
|
|
||||||
locale->needed = old_needed;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((locale->avail & CTYPE_LOCALE) != 0
|
|
||||||
&& ((translit = find_translit (locale,
|
|
||||||
charmap, wch))
|
|
||||||
!= NULL))
|
|
||||||
/* The CTYPE data contains a matching
|
|
||||||
transliteration. */
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; translit[i] != 0; ++i)
|
|
||||||
{
|
|
||||||
char utmp[10];
|
|
||||||
|
|
||||||
snprintf (utmp, sizeof (utmp), "U%08X",
|
|
||||||
translit[i]);
|
|
||||||
seq = charmap_find_value (charmap, utmp,
|
|
||||||
9);
|
|
||||||
assert (seq != NULL);
|
|
||||||
adds (&lrb, seq->bytes, seq->nbytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif /* NO_TRANSLITERATION */
|
|
||||||
|
|
||||||
/* Not a known name. */
|
|
||||||
illegal_string = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (seq != NULL)
|
|
||||||
adds (&lrb, seq->bytes, seq->nbytes);
|
|
||||||
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -812,7 +815,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
/* This name is not in the charmap. */
|
/* This name is not in the charmap. */
|
||||||
lr_error (lr, _("symbol `%.*s' not in charmap"),
|
lr_error (lr, _("symbol `%.*s' not in charmap"),
|
||||||
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
||||||
illegal_string = 1;
|
illegal_string = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (return_widestr)
|
if (return_widestr)
|
||||||
|
@ -833,7 +836,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
/* This name is not in the repertoire map. */
|
/* This name is not in the repertoire map. */
|
||||||
lr_error (lr, _("symbol `%.*s' not in repertoire map"),
|
lr_error (lr, _("symbol `%.*s' not in repertoire map"),
|
||||||
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
(int) (lrb.act - startidx), &lrb.buf[startidx]);
|
||||||
illegal_string = 1;
|
illegal_string = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ADDWC (wch);
|
ADDWC (wch);
|
||||||
|
@ -850,7 +853,7 @@ get_string (struct linereader *lr, const struct charmap_t *charmap,
|
||||||
if (ch == '\n' || ch == EOF)
|
if (ch == '\n' || ch == EOF)
|
||||||
{
|
{
|
||||||
lr_error (lr, _("unterminated string"));
|
lr_error (lr, _("unterminated string"));
|
||||||
illegal_string = 1;
|
illegal_string = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (illegal_string)
|
if (illegal_string)
|
||||||
|
|
Loading…
Reference in a new issue