regex: copy back from Gnulib

Copy regex-related files back from Gnulib, to fix a problem with
static checking of regex calls noted by Martin Sebor.  This merges the
following changes:

* New macro __attribute_nonnull__ in misc/sys/cdefs.h, for use later
when copying other files back from Gnulib.

* Use __GNULIB_CDEFS instead of __GLIBC__ when deciding
whether to include bits/wordsize.h etc.

* Avoid duplicate entries in epsilon closure table.

* New regex.h macro _REGEX_NELTS to let regexec say that its pmatch
arg should contain nmatch elts.  Use that for regexec, instead of
__attr_access (which is incorrect).

* New regex.h macro _Attr_access_ which is like __attr_access except
portable to non-glibc platforms.

* Add some DEBUG_ASSERTs to pacify gcc -fanalyzer and to catch
recently-fixed performance bugs if they recur.

* Add Gnulib-specific stuff to port the dynarray- and lock-using parts
of regex code to non-glibc platforms.

* Fix glibc bug 11053.

* Avoid some undefined behavior when popping an empty fail stack.
This commit is contained in:
Paul Eggert 2021-09-21 07:47:45 -07:00
parent f3e6645633
commit 0b5ca7c3e5
9 changed files with 142 additions and 78 deletions

View File

@ -132,7 +132,8 @@
operators might not yield numerically correct answers due to operators might not yield numerically correct answers due to
arithmetic overflow. They do not rely on undefined or arithmetic overflow. They do not rely on undefined or
implementation-defined behavior. Their implementations are simple implementation-defined behavior. Their implementations are simple
and straightforward, but they are a bit harder to use than the and straightforward, but they are harder to use and may be less
efficient than the INT_<op>_WRAPV, INT_<op>_OK, and
INT_<op>_OVERFLOW macros described below. INT_<op>_OVERFLOW macros described below.
Example usage: Example usage:
@ -157,6 +158,9 @@
must have minimum value MIN and maximum MAX. Unsigned types should must have minimum value MIN and maximum MAX. Unsigned types should
use a zero MIN of the proper type. use a zero MIN of the proper type.
Because all arguments are subject to integer promotions, these
macros typically do not work on types narrower than 'int'.
These macros are tuned for constant MIN and MAX. For commutative These macros are tuned for constant MIN and MAX. For commutative
operations such as A + B, they are also tuned for constant B. */ operations such as A + B, they are also tuned for constant B. */
@ -338,9 +342,15 @@
arguments should not have side effects. arguments should not have side effects.
The WRAPV macros are not constant expressions. They support only The WRAPV macros are not constant expressions. They support only
+, binary -, and *. Because the WRAPV macros convert the result, +, binary -, and *.
they report overflow in different circumstances than the OVERFLOW
macros do. Because the WRAPV macros convert the result, they report overflow
in different circumstances than the OVERFLOW macros do. For
example, in the typical case with 16-bit 'short' and 32-bit 'int',
if A, B and R are all of type 'short' then INT_ADD_OVERFLOW (A, B)
returns false because the addition cannot overflow after A and B
are converted to 'int', whereas INT_ADD_WRAPV (A, B, &R) returns
true or false depending on whether the sum fits into 'short'.
These macros are tuned for their last input argument being a constant. These macros are tuned for their last input argument being a constant.

View File

@ -37,7 +37,8 @@ extern int __regcomp (regex_t *__preg, const char *__pattern, int __cflags);
libc_hidden_proto (__regcomp) libc_hidden_proto (__regcomp)
extern int __regexec (const regex_t *__preg, const char *__string, extern int __regexec (const regex_t *__preg, const char *__string,
size_t __nmatch, regmatch_t __pmatch[], int __eflags); size_t __nmatch, regmatch_t __pmatch[__nmatch],
int __eflags);
libc_hidden_proto (__regexec) libc_hidden_proto (__regexec)
extern size_t __regerror (int __errcode, const regex_t *__preg, extern size_t __regerror (int __errcode, const regex_t *__preg,

View File

@ -318,16 +318,18 @@
#endif #endif
/* The nonnull function attribute marks pointer parameters that /* The nonnull function attribute marks pointer parameters that
must not be NULL. */ must not be NULL. This has the name __nonnull in glibc,
#ifndef __nonnull and __attribute_nonnull__ in files shared with Gnulib to avoid
collision with a different __nonnull in DragonFlyBSD 5.9. */
#ifndef __attribute_nonnull__
# if __GNUC_PREREQ (3,3) || __glibc_has_attribute (__nonnull__) # if __GNUC_PREREQ (3,3) || __glibc_has_attribute (__nonnull__)
# define __nonnull(params) __attribute__ ((__nonnull__ params)) # define __attribute_nonnull__(params) __attribute__ ((__nonnull__ params))
# else # else
# define __nonnull(params) # define __attribute_nonnull__(params)
# endif # endif
#elif !defined __GLIBC__ #endif
# undef __nonnull #ifndef __nonnull
# define __nonnull(params) _GL_ATTRIBUTE_NONNULL (params) # define __nonnull(params) __attribute_nonnull__ (params)
#endif #endif
/* The returns_nonnull function attribute marks the return type of the function /* The returns_nonnull function attribute marks the return type of the function
@ -493,9 +495,9 @@
[!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })] [!!sizeof (struct { int __error_if_negative: (expr) ? 2 : -1; })]
#endif #endif
/* The #ifndef lets Gnulib avoid including these on non-glibc /* Gnulib avoids including these, as they don't work on non-glibc or
platforms, where the includes typically do not exist. */ older glibc platforms. */
#ifdef __GLIBC__ #ifndef __GNULIB_CDEFS
# include <bits/wordsize.h> # include <bits/wordsize.h>
# include <bits/long-double.h> # include <bits/long-double.h>
#endif #endif

View File

@ -1695,12 +1695,14 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
reg_errcode_t err; reg_errcode_t err;
Idx i; Idx i;
re_node_set eclosure; re_node_set eclosure;
bool ok;
bool incomplete = false; bool incomplete = false;
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1); err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (__glibc_unlikely (err != REG_NOERROR)) if (__glibc_unlikely (err != REG_NOERROR))
return err; return err;
/* An epsilon closure includes itself. */
eclosure.elems[eclosure.nelem++] = node;
/* This indicates that we are calculating this node now. /* This indicates that we are calculating this node now.
We reference this value to avoid infinite loop. */ We reference this value to avoid infinite loop. */
dfa->eclosures[node].nelem = -1; dfa->eclosures[node].nelem = -1;
@ -1753,10 +1755,6 @@ calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa, Idx node, bool root)
} }
} }
/* An epsilon closure includes itself. */
ok = re_node_set_insert (&eclosure, node);
if (__glibc_unlikely (! ok))
return REG_ESPACE;
if (incomplete && !root) if (incomplete && !root)
dfa->eclosures[node].nelem = 0; dfa->eclosures[node].nelem = 0;
else else

View File

@ -24,6 +24,7 @@
# if __GNUC_PREREQ (4, 6) # if __GNUC_PREREQ (4, 6)
# pragma GCC diagnostic ignored "-Wsuggest-attribute=pure" # pragma GCC diagnostic ignored "-Wsuggest-attribute=pure"
# pragma GCC diagnostic ignored "-Wvla"
# endif # endif
# if __GNUC_PREREQ (4, 3) # if __GNUC_PREREQ (4, 3)
# pragma GCC diagnostic ignored "-Wold-style-definition" # pragma GCC diagnostic ignored "-Wold-style-definition"

View File

@ -522,6 +522,30 @@ typedef struct
/* Declarations for routines. */ /* Declarations for routines. */
#ifndef _REGEX_NELTS
# if (defined __STDC_VERSION__ && 199901L <= __STDC_VERSION__ \
&& !defined __STDC_NO_VLA__)
# define _REGEX_NELTS(n) n
# else
# define _REGEX_NELTS(n)
# endif
#endif
#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wvla"
#endif
#ifndef _Attr_access_
# ifdef __attr_access
# define _Attr_access_(arg) __attr_access (arg)
# elif defined __GNUC__ && 10 <= __GNUC__
# define _Attr_access_(x) __attribute__ ((__access__ x))
# else
# define _Attr_access_(x)
# endif
#endif
#ifdef __USE_GNU #ifdef __USE_GNU
/* Sets the current default syntax to SYNTAX, and return the old syntax. /* Sets the current default syntax to SYNTAX, and return the old syntax.
You can also simply assign to the 're_syntax_options' variable. */ You can also simply assign to the 're_syntax_options' variable. */
@ -537,7 +561,7 @@ extern reg_syntax_t re_set_syntax (reg_syntax_t __syntax);
'regfree'. */ 'regfree'. */
extern const char *re_compile_pattern (const char *__pattern, size_t __length, extern const char *re_compile_pattern (const char *__pattern, size_t __length,
struct re_pattern_buffer *__buffer) struct re_pattern_buffer *__buffer)
__attr_access ((__read_only__, 1, 2)); _Attr_access_ ((__read_only__, 1, 2));
/* Compile a fastmap for the compiled pattern in BUFFER; used to /* Compile a fastmap for the compiled pattern in BUFFER; used to
@ -555,7 +579,7 @@ extern regoff_t re_search (struct re_pattern_buffer *__buffer,
const char *__String, regoff_t __length, const char *__String, regoff_t __length,
regoff_t __start, regoff_t __range, regoff_t __start, regoff_t __range,
struct re_registers *__regs) struct re_registers *__regs)
__attr_access ((__read_only__, 2, 3)); _Attr_access_ ((__read_only__, 2, 3));
/* Like 're_search', but search in the concatenation of STRING1 and /* Like 're_search', but search in the concatenation of STRING1 and
@ -566,8 +590,8 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
regoff_t __start, regoff_t __range, regoff_t __start, regoff_t __range,
struct re_registers *__regs, struct re_registers *__regs,
regoff_t __stop) regoff_t __stop)
__attr_access ((__read_only__, 2, 3)) _Attr_access_ ((__read_only__, 2, 3))
__attr_access ((__read_only__, 4, 5)); _Attr_access_ ((__read_only__, 4, 5));
/* Like 're_search', but return how many characters in STRING the regexp /* Like 're_search', but return how many characters in STRING the regexp
@ -575,7 +599,7 @@ extern regoff_t re_search_2 (struct re_pattern_buffer *__buffer,
extern regoff_t re_match (struct re_pattern_buffer *__buffer, extern regoff_t re_match (struct re_pattern_buffer *__buffer,
const char *__String, regoff_t __length, const char *__String, regoff_t __length,
regoff_t __start, struct re_registers *__regs) regoff_t __start, struct re_registers *__regs)
__attr_access ((__read_only__, 2, 3)); _Attr_access_ ((__read_only__, 2, 3));
/* Relates to 're_match' as 're_search_2' relates to 're_search'. */ /* Relates to 're_match' as 're_search_2' relates to 're_search'. */
@ -584,8 +608,8 @@ extern regoff_t re_match_2 (struct re_pattern_buffer *__buffer,
const char *__string2, regoff_t __length2, const char *__string2, regoff_t __length2,
regoff_t __start, struct re_registers *__regs, regoff_t __start, struct re_registers *__regs,
regoff_t __stop) regoff_t __stop)
__attr_access ((__read_only__, 2, 3)) _Attr_access_ ((__read_only__, 2, 3))
__attr_access ((__read_only__, 4, 5)); _Attr_access_ ((__read_only__, 4, 5));
/* Set REGS to hold NUM_REGS registers, storing them in STARTS and /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
@ -654,16 +678,19 @@ extern int regcomp (regex_t *_Restrict_ __preg,
extern int regexec (const regex_t *_Restrict_ __preg, extern int regexec (const regex_t *_Restrict_ __preg,
const char *_Restrict_ __String, size_t __nmatch, const char *_Restrict_ __String, size_t __nmatch,
regmatch_t __pmatch[_Restrict_arr_], regmatch_t __pmatch[_Restrict_arr_
int __eflags) _REGEX_NELTS (__nmatch)],
__attr_access ((__write_only__, 4, 3)); int __eflags);
extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg, extern size_t regerror (int __errcode, const regex_t *_Restrict_ __preg,
char *_Restrict_ __errbuf, size_t __errbuf_size) char *_Restrict_ __errbuf, size_t __errbuf_size)
__attr_access ((__write_only__, 3, 4)); _Attr_access_ ((__write_only__, 3, 4));
extern void regfree (regex_t *__preg); extern void regfree (regex_t *__preg);
#if defined __GNUC__ && 4 < __GNUC__ + (6 <= __GNUC_MINOR__)
# pragma GCC diagnostic pop
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -1211,6 +1211,10 @@ re_node_set_merge (re_node_set *dest, const re_node_set *src)
if (__glibc_unlikely (dest->nelem == 0)) if (__glibc_unlikely (dest->nelem == 0))
{ {
/* Although we already guaranteed above that dest->alloc != 0 and
therefore dest->elems != NULL, add a debug assertion to pacify
GCC 11.2.1's -fanalyzer. */
DEBUG_ASSERT (dest->elems);
dest->nelem = src->nelem; dest->nelem = src->nelem;
memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
return REG_NOERROR; return REG_NOERROR;
@ -1286,7 +1290,10 @@ re_node_set_insert (re_node_set *set, Idx elem)
if (__glibc_unlikely (set->nelem) == 0) if (__glibc_unlikely (set->nelem) == 0)
{ {
/* We already guaranteed above that set->alloc != 0. */ /* Although we already guaranteed above that set->alloc != 0 and
therefore set->elems != NULL, add a debug assertion to pacify
GCC 11.2 -fanalyzer. */
DEBUG_ASSERT (set->elems);
set->elems[0] = elem; set->elems[0] = elem;
++set->nelem; ++set->nelem;
return true; return true;
@ -1314,6 +1321,7 @@ re_node_set_insert (re_node_set *set, Idx elem)
{ {
for (idx = set->nelem; set->elems[idx - 1] > elem; idx--) for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
set->elems[idx] = set->elems[idx - 1]; set->elems[idx] = set->elems[idx - 1];
DEBUG_ASSERT (set->elems[idx - 1] < elem);
} }
/* Insert the new element. */ /* Insert the new element. */

View File

@ -32,6 +32,10 @@
#include <stdbool.h> #include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#ifndef _LIBC
# include <dynarray.h>
#endif
#include <intprops.h> #include <intprops.h>
#include <verify.h> #include <verify.h>
@ -49,14 +53,14 @@
# define lock_fini(lock) ((void) 0) # define lock_fini(lock) ((void) 0)
# define lock_lock(lock) __libc_lock_lock (lock) # define lock_lock(lock) __libc_lock_lock (lock)
# define lock_unlock(lock) __libc_lock_unlock (lock) # define lock_unlock(lock) __libc_lock_unlock (lock)
#elif defined GNULIB_LOCK && !defined USE_UNLOCKED_IO #elif defined GNULIB_LOCK && !defined GNULIB_REGEX_SINGLE_THREAD
# include "glthread/lock.h" # include "glthread/lock.h"
# define lock_define(name) gl_lock_define (, name) # define lock_define(name) gl_lock_define (, name)
# define lock_init(lock) glthread_lock_init (&(lock)) # define lock_init(lock) glthread_lock_init (&(lock))
# define lock_fini(lock) glthread_lock_destroy (&(lock)) # define lock_fini(lock) glthread_lock_destroy (&(lock))
# define lock_lock(lock) glthread_lock_lock (&(lock)) # define lock_lock(lock) glthread_lock_lock (&(lock))
# define lock_unlock(lock) glthread_lock_unlock (&(lock)) # define lock_unlock(lock) glthread_lock_unlock (&(lock))
#elif defined GNULIB_PTHREAD && !defined USE_UNLOCKED_IO #elif defined GNULIB_PTHREAD && !defined GNULIB_REGEX_SINGLE_THREAD
# include <pthread.h> # include <pthread.h>
# define lock_define(name) pthread_mutex_t name; # define lock_define(name) pthread_mutex_t name;
# define lock_init(lock) pthread_mutex_init (&(lock), 0) # define lock_init(lock) pthread_mutex_init (&(lock), 0)

View File

@ -59,7 +59,7 @@ static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
Idx cur_idx, Idx nmatch); Idx cur_idx, Idx nmatch);
static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs, static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
Idx str_idx, Idx dest_node, Idx nregs, Idx str_idx, Idx dest_node, Idx nregs,
regmatch_t *regs, regmatch_t *regs, regmatch_t *prevregs,
re_node_set *eps_via_nodes); re_node_set *eps_via_nodes);
static reg_errcode_t set_regs (const regex_t *preg, static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx, const re_match_context_t *mctx,
@ -186,11 +186,12 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
REG_NOTBOL is set, then ^ does not match at the beginning of the REG_NOTBOL is set, then ^ does not match at the beginning of the
string; if REG_NOTEOL is set, then $ does not match at the end. string; if REG_NOTEOL is set, then $ does not match at the end.
We return 0 if we find a match and REG_NOMATCH if not. */ Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
EFLAGS is invalid. */
int int
regexec (const regex_t *__restrict preg, const char *__restrict string, regexec (const regex_t *__restrict preg, const char *__restrict string,
size_t nmatch, regmatch_t pmatch[], int eflags) size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
{ {
reg_errcode_t err; reg_errcode_t err;
Idx start, length; Idx start, length;
@ -234,7 +235,7 @@ int
attribute_compat_text_section attribute_compat_text_section
__compat_regexec (const regex_t *__restrict preg, __compat_regexec (const regex_t *__restrict preg,
const char *__restrict string, size_t nmatch, const char *__restrict string, size_t nmatch,
regmatch_t pmatch[], int eflags) regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
{ {
return regexec (preg, string, nmatch, pmatch, return regexec (preg, string, nmatch, pmatch,
eflags & (REG_NOTBOL | REG_NOTEOL)); eflags & (REG_NOTBOL | REG_NOTEOL));
@ -269,8 +270,8 @@ compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
strings.) strings.)
On success, re_match* functions return the length of the match, re_search* On success, re_match* functions return the length of the match, re_search*
return the position of the start of the match. Return value -1 means no return the position of the start of the match. They return -1 on
match was found and -2 indicates an internal error. */ match failure, -2 on error. */
regoff_t regoff_t
re_match (struct re_pattern_buffer *bufp, const char *string, Idx length, re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
@ -1206,27 +1207,30 @@ check_halt_state_context (const re_match_context_t *mctx,
/* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
corresponding to the DFA). corresponding to the DFA).
Return the destination node, and update EPS_VIA_NODES; Return the destination node, and update EPS_VIA_NODES;
return -1 in case of errors. */ return -1 on match failure, -2 on error. */
static Idx static Idx
proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs, proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
regmatch_t *prevregs,
Idx *pidx, Idx node, re_node_set *eps_via_nodes, Idx *pidx, Idx node, re_node_set *eps_via_nodes,
struct re_fail_stack_t *fs) struct re_fail_stack_t *fs)
{ {
const re_dfa_t *const dfa = mctx->dfa; const re_dfa_t *const dfa = mctx->dfa;
Idx i;
bool ok;
if (IS_EPSILON_NODE (dfa->nodes[node].type)) if (IS_EPSILON_NODE (dfa->nodes[node].type))
{ {
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes; re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
re_node_set *edests = &dfa->edests[node]; re_node_set *edests = &dfa->edests[node];
Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node); if (! re_node_set_contains (eps_via_nodes, node))
if (__glibc_unlikely (! ok)) {
return -2; bool ok = re_node_set_insert (eps_via_nodes, node);
/* Pick up a valid destination, or return -1 if none if (__glibc_unlikely (! ok))
is found. */ return -2;
for (dest_node = -1, i = 0; i < edests->nelem; ++i) }
/* Pick a valid destination, or return -1 if none is found. */
Idx dest_node = -1;
for (Idx i = 0; i < edests->nelem; i++)
{ {
Idx candidate = edests->elems[i]; Idx candidate = edests->elems[i];
if (!re_node_set_contains (cur_nodes, candidate)) if (!re_node_set_contains (cur_nodes, candidate))
@ -1244,7 +1248,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
/* Otherwise, push the second epsilon-transition on the fail stack. */ /* Otherwise, push the second epsilon-transition on the fail stack. */
else if (fs != NULL else if (fs != NULL
&& push_fail_stack (fs, *pidx, candidate, nregs, regs, && push_fail_stack (fs, *pidx, candidate, nregs, regs,
eps_via_nodes)) prevregs, eps_via_nodes))
return -2; return -2;
/* We know we are going to exit. */ /* We know we are going to exit. */
@ -1288,7 +1292,7 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
if (naccepted == 0) if (naccepted == 0)
{ {
Idx dest_node; Idx dest_node;
ok = re_node_set_insert (eps_via_nodes, node); bool ok = re_node_set_insert (eps_via_nodes, node);
if (__glibc_unlikely (! ok)) if (__glibc_unlikely (! ok))
return -2; return -2;
dest_node = dfa->edests[node].elems[0]; dest_node = dfa->edests[node].elems[0];
@ -1317,7 +1321,8 @@ proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
static reg_errcode_t static reg_errcode_t
__attribute_warn_unused_result__ __attribute_warn_unused_result__
push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node, push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes) Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
re_node_set *eps_via_nodes)
{ {
reg_errcode_t err; reg_errcode_t err;
Idx num = fs->num++; Idx num = fs->num++;
@ -1333,25 +1338,30 @@ push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
} }
fs->stack[num].idx = str_idx; fs->stack[num].idx = str_idx;
fs->stack[num].node = dest_node; fs->stack[num].node = dest_node;
fs->stack[num].regs = re_malloc (regmatch_t, nregs); fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
if (fs->stack[num].regs == NULL) if (fs->stack[num].regs == NULL)
return REG_ESPACE; return REG_ESPACE;
memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs); memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes); err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
return err; return err;
} }
static Idx static Idx
pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs, pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
regmatch_t *regs, re_node_set *eps_via_nodes) regmatch_t *regs, regmatch_t *prevregs,
re_node_set *eps_via_nodes)
{ {
if (fs == NULL || fs->num == 0)
return -1;
Idx num = --fs->num; Idx num = --fs->num;
DEBUG_ASSERT (num >= 0);
*pidx = fs->stack[num].idx; *pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs); memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes); re_node_set_free (eps_via_nodes);
re_free (fs->stack[num].regs); re_free (fs->stack[num].regs);
*eps_via_nodes = fs->stack[num].eps_via_nodes; *eps_via_nodes = fs->stack[num].eps_via_nodes;
DEBUG_ASSERT (0 <= fs->stack[num].node);
return fs->stack[num].node; return fs->stack[num].node;
} }
@ -1407,33 +1417,32 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
{ {
update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch); update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node) if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
|| (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
{ {
Idx reg_idx; Idx reg_idx;
cur_node = -1;
if (fs) if (fs)
{ {
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1) if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
break; {
if (reg_idx == nmatch) cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
{ prev_idx_match, &eps_via_nodes);
re_node_set_free (&eps_via_nodes); break;
regmatch_list_free (&prev_match); }
return free_fail_stack_return (fs);
}
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes);
} }
else if (cur_node < 0)
{ {
re_node_set_free (&eps_via_nodes); re_node_set_free (&eps_via_nodes);
regmatch_list_free (&prev_match); regmatch_list_free (&prev_match);
return REG_NOERROR; return free_fail_stack_return (fs);
} }
} }
/* Proceed to next node. */ /* Proceed to next node. */
cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node, cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
&idx, cur_node,
&eps_via_nodes, fs); &eps_via_nodes, fs);
if (__glibc_unlikely (cur_node < 0)) if (__glibc_unlikely (cur_node < 0))
@ -1445,13 +1454,13 @@ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
free_fail_stack_return (fs); free_fail_stack_return (fs);
return REG_ESPACE; return REG_ESPACE;
} }
if (fs) cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch, prev_idx_match, &eps_via_nodes);
&eps_via_nodes); if (cur_node < 0)
else
{ {
re_node_set_free (&eps_via_nodes); re_node_set_free (&eps_via_nodes);
regmatch_list_free (&prev_match); regmatch_list_free (&prev_match);
free_fail_stack_return (fs);
return REG_NOMATCH; return REG_NOMATCH;
} }
} }
@ -1495,10 +1504,10 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
} }
else if (type == OP_CLOSE_SUBEXP) else if (type == OP_CLOSE_SUBEXP)
{ {
/* We are at the last node of this sub expression. */
Idx reg_num = dfa->nodes[cur_node].opr.idx + 1; Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
if (reg_num < nmatch) if (reg_num < nmatch)
{ {
/* We are at the last node of this sub expression. */
if (pmatch[reg_num].rm_so < cur_idx) if (pmatch[reg_num].rm_so < cur_idx)
{ {
pmatch[reg_num].rm_eo = cur_idx; pmatch[reg_num].rm_eo = cur_idx;
@ -2195,6 +2204,7 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
/* Return the next state to which the current state STATE will transit by /* Return the next state to which the current state STATE will transit by
accepting the current input byte, and update STATE_LOG if necessary. accepting the current input byte, and update STATE_LOG if necessary.
Return NULL on failure.
If STATE can accept a multibyte char/collating element/back reference If STATE can accept a multibyte char/collating element/back reference
update the destination of STATE_LOG. */ update the destination of STATE_LOG. */
@ -2395,7 +2405,7 @@ check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
#if 0 #if 0
/* Return the next state to which the current state STATE will transit by /* Return the next state to which the current state STATE will transit by
accepting the current input byte. */ accepting the current input byte. Return NULL on failure. */
static re_dfastate_t * static re_dfastate_t *
transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx, transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
@ -2817,7 +2827,8 @@ find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
/* Check whether the node TOP_NODE at TOP_STR can arrive to the node /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
LAST_NODE at LAST_STR. We record the path onto PATH since it will be LAST_NODE at LAST_STR. We record the path onto PATH since it will be
heavily reused. heavily reused.
Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. */ Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
REG_ESPACE if memory is exhausted. */
static reg_errcode_t static reg_errcode_t
__attribute_warn_unused_result__ __attribute_warn_unused_result__
@ -3433,7 +3444,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
/* Group all nodes belonging to STATE into several destinations. /* Group all nodes belonging to STATE into several destinations.
Then for all destinations, set the nodes belonging to the destination Then for all destinations, set the nodes belonging to the destination
to DESTS_NODE[i] and set the characters accepted by the destination to DESTS_NODE[i] and set the characters accepted by the destination
to DEST_CH[i]. This function return the number of destinations. */ to DEST_CH[i]. Return the number of destinations if successful,
-1 on internal error. */
static Idx static Idx
group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
@ -4211,7 +4223,8 @@ match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
} }
/* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. */ at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
Return the new entry if successful, NULL if memory is exhausted. */
static re_sub_match_last_t * static re_sub_match_last_t *
match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx) match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)