* misc/mntent_r.c (decode_name): Fix decoding of tab, add decoding
	of newline.
	* manual/sysinfo.texi (mtab): Adjust description accordingly.
	Reported by Andries.Brouwer@cwi.nl.
This commit is contained in:
Ulrich Drepper 2003-11-29 06:40:52 +00:00
parent bb3f4825c4
commit 37369d1cef
4 changed files with 360 additions and 126 deletions

View file

@ -1,5 +1,10 @@
2003-11-28 Ulrich Drepper <drepper@redhat.com>
* misc/mntent_r.c (decode_name): Fix decoding of tab, add decoding
of newline.
* manual/sysinfo.texi (mtab): Adjust description accordingly.
Reported by Andries.Brouwer@cwi.nl.
* sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes
to compensate other setup.

View file

@ -673,12 +673,13 @@ filled with the information from the next entry from the file currently
read.
The file format used prescribes the use of spaces or tab characters to
separate the fields. This makes it harder to use name containing one of
these characters (e.g., mount points using spaces). Therefore these
characters are encoded in the files and the @code{getmntent} function
takes care of the decoding while reading the entries back in.
@code{'\040'} is used to encode a space character, @code{'\012'} to
encode a tab character and @code{'\\'} to encode a backslash.
separate the fields. This makes it harder to use name containing one
of these characters (e.g., mount points using spaces). Therefore
these characters are encoded in the files and the @code{getmntent}
function takes care of the decoding while reading the entries back in.
@code{'\040'} is used to encode a space character, @code{'\011'} to
encode a tab character, @code{'\012'} to encode a newline character,
and @code{'\\'} to encode a backslash.
If there was an error or the end of the file is reached the return value
is @code{NULL}.

View file

@ -84,12 +84,18 @@ decode_name (char *buf)
*wp++ = ' ';
rp += 3;
}
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '2')
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '1')
{
/* \012 is a TAB. */
*wp++ = '\t';
rp += 3;
}
else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1' && rp[3] == '2')
{
/* \012 is a NEWLINE. */
*wp++ = '\n';
rp += 3;
}
else if (rp[0] == '\\' && rp[1] == '\\')
{
/* We have to escape \\ to be able to represent all characters. */

View file

@ -62,17 +62,14 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, dfa)
init_len = dfa->mb_cur_max;
init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
ret = re_string_realloc_buffers (pstr, init_buf_len);
if (BE (ret != REG_NOERROR, 0))
return ret;
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
: (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr)
|| dfa->mb_cur_max > 1) ? pstr->valid_len : len;
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
pstr->valid_raw_len = pstr->valid_len;
return REG_NOERROR;
}
@ -88,9 +85,6 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
{
reg_errcode_t ret;
re_string_construct_common (str, len, pstr, trans, icase, dfa);
pstr->stop = pstr->len;
/* Set 0 so that this function can initialize whole buffers. */
pstr->valid_len = 0;
if (len > 0)
{
@ -98,15 +92,27 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
if (BE (ret != REG_NOERROR, 0))
return ret;
}
pstr->mbs_case = (MBS_CASE_ALLOCATED (pstr) ? pstr->mbs_case
: (unsigned char *) str);
pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case;
pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
if (icase)
{
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
build_wcs_upper_buffer (pstr);
{
while (1)
{
ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
if (pstr->valid_raw_len >= len)
break;
if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
break;
ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
}
else
#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);
@ -122,12 +128,13 @@ re_string_construct (pstr, str, len, trans, icase, dfa)
if (trans != NULL)
re_string_translate_buffer (pstr);
else
pstr->valid_len = len;
{
pstr->valid_len = pstr->bufs_len;
pstr->valid_raw_len = pstr->bufs_len;
}
}
}
/* Initialized whole buffers, then valid_len == bufs_len. */
pstr->valid_len = pstr->bufs_len;
return REG_NOERROR;
}
@ -145,9 +152,16 @@ re_string_realloc_buffers (pstr, new_buf_len)
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->wcs = new_array;
if (pstr->offsets != NULL)
{
int *new_array = re_realloc (pstr->offsets, int, new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->offsets = new_array;
}
}
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
{
unsigned char *new_array = re_realloc (pstr->mbs, unsigned char,
new_buf_len);
@ -155,16 +169,6 @@ re_string_realloc_buffers (pstr, new_buf_len)
return REG_ESPACE;
pstr->mbs = new_array;
}
if (MBS_CASE_ALLOCATED (pstr))
{
unsigned char *new_array = re_realloc (pstr->mbs_case, unsigned char,
new_buf_len);
if (BE (new_array == NULL, 0))
return REG_ESPACE;
pstr->mbs_case = new_array;
if (!MBS_ALLOCATED (pstr))
pstr->mbs = pstr->mbs_case;
}
pstr->bufs_len = new_buf_len;
return REG_NOERROR;
}
@ -182,11 +186,15 @@ re_string_construct_common (str, len, pstr, trans, icase, dfa)
memset (pstr, '\0', sizeof (re_string_t));
pstr->raw_mbs = (const unsigned char *) str;
pstr->len = len;
pstr->raw_len = len;
pstr->trans = trans;
pstr->icase = icase ? 1 : 0;
pstr->mbs_allocated = (trans != NULL || icase);
pstr->mb_cur_max = dfa->mb_cur_max;
pstr->is_utf8 = dfa->is_utf8;
pstr->map_notascii = dfa->map_notascii;
pstr->stop = pstr->len;
pstr->raw_stop = pstr->stop;
}
#ifdef RE_ENABLE_I18N
@ -206,18 +214,39 @@ static void
build_wcs_buffer (pstr)
re_string_t *pstr;
{
#ifdef _LIBC
unsigned char buf[pstr->mb_cur_max];
#else
unsigned char buf[64];
#endif
mbstate_t prev_st;
int byte_idx, end_idx, mbclen, remain_len;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0))
{
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
buf[i] = pstr->trans[ch];
}
p = (const char *) buf;
}
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen == (size_t) -2, 0))
{
/* The buffer doesn't have enough space, finish to build. */
@ -229,15 +258,11 @@ build_wcs_buffer (pstr)
/* We treat these cases as a singlebyte character. */
mbclen = 1;
wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
if (BE (pstr->trans != NULL, 0))
wc = pstr->trans[wc];
pstr->cur_state = prev_st;
}
/* Apply the translation if we need. */
if (pstr->trans != NULL && mbclen == 1)
{
int ch = pstr->trans[pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]];
pstr->mbs_case[byte_idx] = ch;
}
/* Write wide character and padding. */
pstr->wcs[byte_idx++] = wc;
/* Write paddings. */
@ -245,61 +270,83 @@ build_wcs_buffer (pstr)
pstr->wcs[byte_idx++] = WEOF;
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
}
/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
but for REG_ICASE. */
static void
static int
build_wcs_upper_buffer (pstr)
re_string_t *pstr;
{
mbstate_t prev_st;
int byte_idx, end_idx, mbclen, remain_len;
/* Build the buffers from pstr->valid_len to either pstr->len or
pstr->bufs_len. */
end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len;
int src_idx, byte_idx, end_idx, mbclen, remain_len;
#ifdef _LIBC
unsigned char buf[pstr->mb_cur_max];
#else
unsigned char buf[64];
#endif
byte_idx = pstr->valid_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
#ifdef _LIBC
/* The following optimization assumes that the wchar_t encoding is
always ISO 10646. */
if (! pstr->map_notascii && pstr->trans == NULL)
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
}
else
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
{
while (byte_idx < end_idx)
{
wchar_t wc;
if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
&& mbsinit (&pstr->cur_state))
{
/* In case of a singlebyte character. */
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
with ISO 10646: all ASCII values can be converted like
this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
continue;
}
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (BE (mbclen > 1, 1))
if (BE (mbclen > 0, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc),
&prev_st);
{
int mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else
{
src_idx = byte_idx;
goto offsets_needed;
}
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character. Just use the byte. */
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
pstr->mbs[byte_idx] = ch;
/* And also cast it to wide char. */
@ -314,48 +361,116 @@ build_wcs_upper_buffer (pstr)
break;
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = byte_idx;
return REG_NOERROR;
}
else
#endif
for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
offsets_needed:
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
mbclen = mbrtowc (&wc,
((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
+ byte_idx), remain_len, &pstr->cur_state);
if (mbclen == 1)
if (BE (pstr->trans != NULL, 0))
{
/* In case of a singlebyte character. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* Apply the translation if we need. */
if (BE (pstr->trans != NULL, 0) && mbclen == 1)
int i, ch;
for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
{
ch = pstr->trans[ch];
pstr->mbs_case[byte_idx] = ch;
ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
buf[i] = pstr->trans[ch];
}
pstr->wcs[byte_idx] = towupper (wc);
pstr->mbs[byte_idx++] = toupper (ch);
if (BE (mbclen == (size_t) -1, 0))
pstr->cur_state = prev_st;
p = (const char *) buf;
}
else if (BE (mbclen != (size_t) -2, 1))
else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
mbclen = mbrtowc (&wc, p, remain_len, &pstr->cur_state);
if (BE (mbclen > 0, 1))
{
wchar_t wcu = wc;
if (iswlower (wc))
wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st);
{
int mbcdlen;
wcu = towupper (wc);
mbcdlen = wcrtomb (buf, wcu, &prev_st);
if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
else
{
int i;
if (byte_idx + mbcdlen > pstr->bufs_len)
{
pstr->cur_state = prev_st;
break;
}
if (pstr->offsets == NULL)
{
pstr->offsets = re_malloc (int, pstr->bufs_len);
if (pstr->offsets == NULL)
return REG_ESPACE;
}
if (!pstr->offsets_needed)
{
for (i = 0; i < byte_idx; ++i)
pstr->offsets[i] = i;
pstr->offsets_needed = 1;
}
memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
pstr->wcs[byte_idx] = wcu;
pstr->offsets[byte_idx] = src_idx;
for (i = 1; i < mbcdlen; ++i)
{
pstr->offsets[byte_idx + i]
= src_idx + (i < mbclen ? i : mbclen - 1);
pstr->wcs[byte_idx + i] = WEOF;
}
pstr->len += mbcdlen - mbclen;
if (pstr->raw_stop > src_idx)
pstr->stop += mbcdlen - mbclen;
end_idx = (pstr->bufs_len > pstr->len)
? pstr->len : pstr->bufs_len;
byte_idx += mbcdlen;
src_idx += mbclen;
continue;
}
}
else
memcpy (pstr->mbs + byte_idx,
pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
pstr->wcs[byte_idx++] = towupper (wc);
memcpy (pstr->mbs + byte_idx, p, mbclen);
if (BE (pstr->offsets_needed != 0, 0))
{
int i;
for (i = 0; i < mbclen; ++i)
pstr->offsets[byte_idx + i] = src_idx + i;
}
src_idx += mbclen;
pstr->wcs[byte_idx++] = wcu;
/* Write paddings. */
for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
else if (mbclen == (size_t) -1 || mbclen == 0)
{
/* It is an invalid character. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
/* It is an invalid character or '\0'. Just use the byte. */
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans [ch];
pstr->mbs[byte_idx] = ch;
if (BE (pstr->offsets_needed != 0, 0))
pstr->offsets[byte_idx] = src_idx;
++src_idx;
/* And also cast it to wide char. */
pstr->wcs[byte_idx++] = (wchar_t) ch;
if (BE (mbclen == (size_t) -1, 0))
@ -369,6 +484,8 @@ build_wcs_upper_buffer (pstr)
}
}
pstr->valid_len = byte_idx;
pstr->valid_raw_len = src_idx;
return REG_NOERROR;
}
/* Skip characters until the index becomes greater than NEW_RAW_IDX.
@ -385,7 +502,7 @@ re_string_skip_chars (pstr, new_raw_idx, last_wc)
wchar_t wc = 0;
/* Skip the characters which are not necessary to check. */
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_len;
for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
rawbuf_idx < new_raw_idx;)
{
int remain_len;
@ -420,17 +537,15 @@ build_upper_buffer (pstr)
for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
if (pstr->trans != NULL)
{
ch = pstr->trans[ch];
pstr->mbs_case[char_idx] = ch;
}
if (BE (pstr->trans != NULL, 0))
ch = pstr->trans[ch];
if (islower (ch))
pstr->mbs[char_idx] = toupper (ch);
else
pstr->mbs[char_idx] = ch;
}
pstr->valid_len = char_idx;
pstr->valid_raw_len = char_idx;
}
/* Apply TRANS to the buffer in PSTR. */
@ -445,10 +560,11 @@ re_string_translate_buffer (pstr)
for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
{
int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
pstr->mbs_case[buf_idx] = pstr->trans[ch];
pstr->mbs[buf_idx] = pstr->trans[ch];
}
pstr->valid_len = buf_idx;
pstr->valid_raw_len = buf_idx;
}
/* This function re-construct the buffers.
@ -468,14 +584,15 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (pstr->mb_cur_max > 1)
memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
#endif /* RE_ENABLE_I18N */
pstr->len += pstr->raw_mbs_idx;
pstr->stop += pstr->raw_mbs_idx;
pstr->valid_len = pstr->raw_mbs_idx = 0;
pstr->len = pstr->raw_len;
pstr->stop = pstr->raw_stop;
pstr->valid_len = 0;
pstr->raw_mbs_idx = 0;
pstr->valid_raw_len = 0;
pstr->offsets_needed = 0;
pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
: CONTEXT_NEWLINE | CONTEXT_BEGBUF);
if (!MBS_CASE_ALLOCATED (pstr))
pstr->mbs_case = (unsigned char *) pstr->raw_mbs;
if (!MBS_ALLOCATED (pstr) && !MBS_CASE_ALLOCATED (pstr))
if (!pstr->mbs_allocated)
pstr->mbs = (unsigned char *) pstr->raw_mbs;
offset = idx;
}
@ -483,7 +600,13 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (offset != 0)
{
/* Are the characters which are already checked remain? */
if (offset < pstr->valid_len)
if (offset < pstr->valid_raw_len
#ifdef RE_ENABLE_I18N
/* Handling this would enlarge the code too much.
Accept a slowdown in that case. */
&& pstr->offsets_needed == 0
#endif
)
{
/* Yes, move them to the front of the buffer. */
pstr->tip_context = re_string_context_at (pstr, offset - 1, eflags,
@ -493,13 +616,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
memmove (pstr->wcs, pstr->wcs + offset,
(pstr->valid_len - offset) * sizeof (wint_t));
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
memmove (pstr->mbs, pstr->mbs + offset,
pstr->valid_len - offset);
if (MBS_CASE_ALLOCATED (pstr))
memmove (pstr->mbs_case, pstr->mbs_case + offset,
pstr->valid_len - offset);
pstr->valid_len -= offset;
pstr->valid_raw_len -= offset;
#if DEBUG
assert (pstr->valid_len > 0);
#endif
@ -507,16 +628,26 @@ re_string_reconstruct (pstr, idx, eflags, newline)
else
{
/* No, skip all characters until IDX. */
#ifdef RE_ENABLE_I18N
if (BE (pstr->offsets_needed, 0))
{
pstr->len = pstr->raw_len - idx + offset;
pstr->stop = pstr->raw_stop - idx + offset;
pstr->offsets_needed = 0;
}
#endif
pstr->valid_len = 0;
pstr->valid_raw_len = 0;
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
{
int wcs_idx;
wint_t wc = WEOF;
#ifdef _LIBC
if (pstr->is_utf8)
{
const unsigned char *raw, *p, *end;
const unsigned char *raw, *p, *q, *end;
/* Special case UTF-8. Multi-byte chars start with any
byte other than 0x80 - 0xbf. */
@ -527,13 +658,22 @@ re_string_reconstruct (pstr, idx, eflags, newline)
{
mbstate_t cur_state;
wchar_t wc2;
int mlen;
int mlen = raw + pstr->len - p;
unsigned char buf[6];
q = p;
if (BE (pstr->trans != NULL, 0))
{
int i = mlen < 6 ? mlen : 6;
while (--i >= 0)
buf[i] = pstr->trans[p[i]];
q = buf;
}
/* XXX Don't use mbrtowc, we know which conversion
to use (UTF-8 -> UCS4). */
memset (&cur_state, 0, sizeof (cur_state));
mlen = mbrtowc (&wc2, p, raw + pstr->len - p,
&cur_state) - (raw + offset - p);
mlen = mbrtowc (&wc2, p, mlen, &cur_state)
- (raw + offset - p);
if (mlen >= 0)
{
memset (&pstr->cur_state, '\0',
@ -544,12 +684,17 @@ re_string_reconstruct (pstr, idx, eflags, newline)
break;
}
}
#endif
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->trans && wc <= 0xff)
wc = pstr->trans[wc];
if (BE (pstr->valid_len, 0))
{
for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
pstr->wcs[wcs_idx] = WEOF;
if (pstr->mbs_allocated)
memset (pstr->mbs, 255, pstr->valid_len);
}
pstr->valid_raw_len = pstr->valid_len;
pstr->tip_context = (IS_WIDE_WORD_CHAR (wc) ? CONTEXT_WORD
: ((newline && IS_WIDE_NEWLINE (wc))
? CONTEXT_NEWLINE : 0));
@ -565,13 +710,8 @@ re_string_reconstruct (pstr, idx, eflags, newline)
? CONTEXT_NEWLINE : 0));
}
}
if (!MBS_CASE_ALLOCATED (pstr))
{
pstr->mbs_case += offset;
/* In case of !MBS_ALLOCATED && !MBS_CASE_ALLOCATED. */
if (!MBS_ALLOCATED (pstr))
pstr->mbs += offset;
}
if (!pstr->mbs_allocated)
pstr->mbs += offset;
}
pstr->raw_mbs_idx = idx;
pstr->len -= offset;
@ -582,7 +722,11 @@ re_string_reconstruct (pstr, idx, eflags, newline)
if (pstr->mb_cur_max > 1)
{
if (pstr->icase)
build_wcs_upper_buffer (pstr);
{
int ret = build_wcs_upper_buffer (pstr);
if (BE (ret != REG_NOERROR, 0))
return ret;
}
else
build_wcs_buffer (pstr);
}
@ -601,17 +745,95 @@ re_string_reconstruct (pstr, idx, eflags, newline)
return REG_NOERROR;
}
static unsigned char
re_string_peek_byte_case (const re_string_t *pstr,
int idx)
{
int ch, off;
/* Handle the common (easiest) cases first. */
if (BE (!pstr->icase, 1))
return re_string_peek_byte (pstr, idx);
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1
&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
return re_string_peek_byte (pstr, idx);
#endif
off = pstr->cur_idx + idx;
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
off = pstr->offsets[off];
#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
if (pstr->trans)
ch = pstr->trans[ch];
#ifdef RE_ENABLE_I18N
/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
this function returns CAPITAL LETTER I instead of first byte of
DOTLESS SMALL LETTER I. The latter would confuse the parser,
since peek_byte_case doesn't advance cur_idx in any way. */
if (pstr->offsets_needed && !isascii (ch))
return re_string_peek_byte (pstr, idx);
#endif
return ch;
}
static unsigned char
re_string_fetch_byte_case (re_string_t *pstr)
{
int ch, off;
if (BE (!pstr->icase, 1))
return re_string_fetch_byte (pstr);
#ifdef RE_ENABLE_I18N
if (pstr->offsets_needed)
{
/* For tr_TR.UTF-8 [[:islower:]] there is
[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
in that case the whole multi-byte character and return
the original letter. On the other side, with
[[: DOTLESS SMALL LETTER I return [[:I, as doing
anything else would complicate things too much. */
if (!re_string_first_byte (pstr, pstr->cur_idx))
return re_string_fetch_byte (pstr);
off = pstr->offsets[pstr->cur_idx];
ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
if (pstr->trans)
ch = pstr->trans[ch];
if (! isascii (ch))
return re_string_fetch_byte (pstr);
re_string_skip_bytes (pstr,
re_string_char_size_at (pstr, pstr->cur_idx));
return ch;
}
#endif
ch = pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
if (pstr->trans)
ch = pstr->trans[ch];
return ch;
}
static void
re_string_destruct (pstr)
re_string_t *pstr;
{
#ifdef RE_ENABLE_I18N
re_free (pstr->wcs);
re_free (pstr->offsets);
#endif /* RE_ENABLE_I18N */
if (MBS_ALLOCATED (pstr))
if (pstr->mbs_allocated)
re_free (pstr->mbs);
if (MBS_CASE_ALLOCATED (pstr))
re_free (pstr->mbs_case);
}
/* Return the context at IDX in INPUT. */