2002-10-11  Isamu Hasegawa  <isamu@yamato.ibm.com>

	* posix/regcomp.c (re_compile_fastmap_iter): Remove the handling
	OP_CONTEXT_NODE.
	(regfree): Likewise.
	(create_initial_state): Likewise.
	(analyze): Remove the substitutions which became useless.
	(calc_first): Likewise.
	(calc_epsdest): Use edests of OP_BACK_REF in case that it has
	epsilon destination.
	(duplicate_node_closure): New function.
	(duplicate_node): Remove the handling OP_CONTEXT_NODE.
	(calc_inveclosure): Likewise.
	(calc_eclosure): Likewise.
	(calc_eclosure_iter): Invoke duplicate_node_closure instead of
	direct invocation of duplicate_node.
	(parse): Don't use comma operator in the return to avoid compiler
	warning.
	(parse_reg_exp): Likewise.
	(parse_branch): Likewise.
	(parse_expression): Likewise.
	(parse_sub_exp): Likewise.
	(parse_dup_op): Likewise.
	* posix/regex_internal.c (re_dfa_add_node): Remove the substitutions
	which became useless.
	(create_ci_newstate): Remove the handling OP_CONTEXT_NODE.
	(create_cd_newstate): Likewise.
	* posix/regex_internal.h (re_token_type_t): Remove the obsolete type.
	(re_token_t): Likewise.
	(re_dfa_t): Likewise.
	(re_node_set_remove): New macro.
	* posix/regexec.c (check_matching): Remove the handling
	OP_CONTEXT_NODE.
	(check_halt_node_context): Likewise.
	(proceed_next_node): Likewise.
	(pop_fail_stack): Fix the memory leak.
	(set_regs): Likewise.
	(free_fail_stack_return): New function.
	(sift_states_backward): Fix the memory leak.  Remove the handling
	OP_CONTEXT_NODE.
	(update_cur_sifted_state): Append some if clause to avoid redundant
	call.
	(sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a
	back reference.
	(check_dst_limits): Remove the handling OP_CONTEXT_NODE.
	(check_subexp_limits): Likewise.
	(search_subexp): Likewise.
	(sift_states_bkref): Likewise.
	(transit_state_mb): Likewise.
	(transit_state_bkref_loop): Likewise.
	(transit_state_bkref_loop): Likewise.
	(group_nodes_into_DFAstates): Likewise.
	(check_node_accept): Likewise.
	(sift_ctx_init): Add initializing.

2002-10-12  Ulrich Drepper  <drepper@redhat.com>

	* sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use
	__builtin_expect.
This commit is contained in:
Ulrich Drepper 2002-10-12 08:34:26 +00:00
parent cc12f2a442
commit 485d775dd5
6 changed files with 365 additions and 347 deletions

View file

@ -1,3 +1,63 @@
2002-10-11 Isamu Hasegawa <isamu@yamato.ibm.com>
* posix/regcomp.c (re_compile_fastmap_iter): Remove the handling
OP_CONTEXT_NODE.
(regfree): Likewise.
(create_initial_state): Likewise.
(analyze): Remove the substitutions which became useless.
(calc_first): Likewise.
(calc_epsdest): Use edests of OP_BACK_REF in case that it has
epsilon destination.
(duplicate_node_closure): New function.
(duplicate_node): Remove the handling OP_CONTEXT_NODE.
(calc_inveclosure): Likewise.
(calc_eclosure): Likewise.
(calc_eclosure_iter): Invoke duplicate_node_closure instead of
direct invocation of duplicate_node.
(parse): Don't use comma operator in the return to avoid compiler
warning.
(parse_reg_exp): Likewise.
(parse_branch): Likewise.
(parse_expression): Likewise.
(parse_sub_exp): Likewise.
(parse_dup_op): Likewise.
* posix/regex_internal.c (re_dfa_add_node): Remove the substitutions
which became useless.
(create_ci_newstate): Remove the handling OP_CONTEXT_NODE.
(create_cd_newstate): Likewise.
* posix/regex_internal.h (re_token_type_t): Remove the obsolete type.
(re_token_t): Likewise.
(re_dfa_t): Likewise.
(re_node_set_remove): New macro.
* posix/regexec.c (check_matching): Remove the handling
OP_CONTEXT_NODE.
(check_halt_node_context): Likewise.
(proceed_next_node): Likewise.
(pop_fail_stack): Fix the memory leak.
(set_regs): Likewise.
(free_fail_stack_return): New function.
(sift_states_backward): Fix the memory leak. Remove the handling
OP_CONTEXT_NODE.
(update_cur_sifted_state): Append some if clause to avoid redundant
call.
(sub_epsilon_src_nodes): Use IS_EPSILON_NODE since it might be a
back reference.
(check_dst_limits): Remove the handling OP_CONTEXT_NODE.
(check_subexp_limits): Likewise.
(search_subexp): Likewise.
(sift_states_bkref): Likewise.
(transit_state_mb): Likewise.
(transit_state_bkref_loop): Likewise.
(transit_state_bkref_loop): Likewise.
(group_nodes_into_DFAstates): Likewise.
(check_node_accept): Likewise.
(sift_ctx_init): Add initializing.
2002-10-12 Ulrich Drepper <drepper@redhat.com>
* sysdeps/unix/sysv/linux/i386/sysdep.h (INLINE_SYSCALL): Use
__builtin_expect.
2002-10-11 Ulrich Drepper <drepper@redhat.com>
* elf/dl-load.c (_dl_map_object_from_fd): Remove unnecessarily

View file

@ -85,6 +85,9 @@ static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
int top_clone_node, int root_node,
unsigned int constraint);
static reg_errcode_t duplicate_node (int *new_idx, re_dfa_t *dfa, int org_idx,
unsigned int constraint);
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
@ -351,11 +354,6 @@ re_compile_fastmap_iter (bufp, init_state, fastmap)
{
int node = init_state->nodes.elems[node_cnt];
re_token_type_t type = dfa->nodes[node].type;
if (type == OP_CONTEXT_NODE)
{
node = dfa->nodes[node].opr.ctx_info->entity;
type = dfa->nodes[node].type;
}
if (type == CHARACTER)
fastmap[dfa->nodes[node].opr.c] = 1;
@ -587,18 +585,7 @@ regfree (preg)
#endif /* RE_ENABLE_I18N */
if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
re_free (node->opr.sbcset);
else if (node->type == OP_CONTEXT_NODE)
{
if (dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
{
if (node->opr.ctx_info->bkref_eclosure != NULL)
re_node_set_free (node->opr.ctx_info->bkref_eclosure);
re_free (node->opr.ctx_info->bkref_eclosure);
}
re_free (node->opr.ctx_info);
}
}
re_free (dfa->firsts);
re_free (dfa->nexts);
for (i = 0; i < dfa->nodes_len; ++i)
{
@ -883,39 +870,25 @@ create_initial_state (dfa)
re_token_type_t type = dfa->nodes[node_idx].type;
int clexp_idx;
int entity = (type != OP_CONTEXT_NODE ? node_idx
: dfa->nodes[node_idx].opr.ctx_info->entity);
if ((type != OP_CONTEXT_NODE
|| (dfa->nodes[entity].type != OP_BACK_REF))
&& (type != OP_BACK_REF))
if (type != OP_BACK_REF)
continue;
for (clexp_idx = 0; clexp_idx < init_nodes.nelem; ++clexp_idx)
{
re_token_t *clexp_node;
clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
if (clexp_node->type == OP_CLOSE_SUBEXP
&& clexp_node->opr.idx + 1 == dfa->nodes[entity].opr.idx)
&& clexp_node->opr.idx + 1 == dfa->nodes[node_idx].opr.idx)
break;
}
if (clexp_idx == init_nodes.nelem)
continue;
if (type == OP_CONTEXT_NODE
&& (dfa->nodes[dfa->nodes[node_idx].opr.ctx_info->entity].type
== OP_BACK_REF))
if (type == OP_BACK_REF)
{
int prev_nelem = init_nodes.nelem;
re_node_set_merge (&init_nodes,
dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure);
if (prev_nelem < init_nodes.nelem)
i = 0;
}
else if (type == OP_BACK_REF)
{
int next_idx = dfa->nexts[node_idx];
if (!re_node_set_contains (&init_nodes, next_idx))
int dest_idx = dfa->edests[node_idx].elems[0];
if (!re_node_set_contains (&init_nodes, dest_idx))
{
re_node_set_merge (&init_nodes, dfa->eclosures + next_idx);
re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
i = 0;
}
}
@ -959,18 +932,16 @@ analyze (dfa)
reg_errcode_t ret;
/* Allocate arrays. */
dfa->firsts = re_malloc (int, dfa->nodes_alloc);
dfa->nexts = re_malloc (int, dfa->nodes_alloc);
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
if (BE (dfa->firsts == NULL || dfa->nexts == NULL || dfa->edests == NULL
if (BE (dfa->nexts == NULL || dfa->edests == NULL
|| dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
return REG_ESPACE;
/* Initialize them. */
for (i = 0; i < dfa->nodes_len; ++i)
{
dfa->firsts[i] = -1;
dfa->nexts[i] = -1;
re_node_set_init_empty (dfa->edests + i);
re_node_set_init_empty (dfa->eclosures + i);
@ -1083,8 +1054,6 @@ calc_first (dfa, node)
node->first = node->left->first;
break;
}
if (node->type == 0)
dfa->firsts[idx] = node->first;
}
/* Calculate "next" for the node NODE. */
@ -1187,11 +1156,114 @@ calc_epsdest (dfa, node)
}
else if (dfa->nodes[idx].type == ANCHOR
|| dfa->nodes[idx].type == OP_OPEN_SUBEXP
|| dfa->nodes[idx].type == OP_CLOSE_SUBEXP)
|| dfa->nodes[idx].type == OP_CLOSE_SUBEXP
|| dfa->nodes[idx].type == OP_BACK_REF)
re_node_set_init_1 (dfa->edests + idx, node->next);
}
}
/* Duplicate the epsilon closure of the node ROOT_NODE.
Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
to their own constraint. */
static reg_errcode_t
duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
init_constraint)
re_dfa_t *dfa;
int top_org_node, top_clone_node, root_node;
unsigned int init_constraint;
{
reg_errcode_t err;
int org_node, clone_node, ret;
unsigned int constraint = init_constraint;
for (org_node = top_org_node, clone_node = top_clone_node;;)
{
int org_dest, clone_dest;
if (dfa->nodes[org_node].type == OP_BACK_REF)
{
/* If the back reference epsilon-transit, its destination must
also have the constraint. Then duplicate the epsilon closure
of the destination of the back reference, and store it in
edests of the back reference. */
org_dest = dfa->nexts[org_node];
re_node_set_empty (dfa->edests + clone_node);
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
dfa->nexts[clone_node] = dfa->nexts[org_node];
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
}
else if (dfa->edests[org_node].nelem == 0)
{
/* In case of the node can't epsilon-transit, don't duplicate the
destination and store the original destination as the
destination of the node. */
dfa->nexts[clone_node] = dfa->nexts[org_node];
break;
}
else if (dfa->edests[org_node].nelem == 1)
{
/* In case of the node can epsilon-transit, and it has only one
destination. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
if (dfa->nodes[org_node].type == ANCHOR)
{
/* In case of the node has another constraint, append it. */
if (org_node == root_node && clone_node != org_node)
{
/* ...but if the node is root_node itself, it means the
epsilon closure have a loop, then tie it to the
destination of the root_node. */
ret = re_node_set_insert (dfa->edests + clone_node,
org_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
break;
}
constraint |= dfa->nodes[org_node].opr.ctx_type;
}
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
}
else /* dfa->edests[org_node].nelem == 2 */
{
/* In case of the node can epsilon-transit, and it has two
destinations. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
err = duplicate_node_closure (dfa, org_dest, clone_dest, root_node,
constraint);
if (BE (err != REG_NOERROR, 0))
return err;
org_dest = dfa->edests[org_node].elems[1];
err = duplicate_node (&clone_dest, dfa, org_dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
if (BE (ret < 0, 0))
return REG_ESPACE;
}
org_node = org_dest;
clone_node = clone_dest;
}
return REG_NOERROR;
}
/* Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.
The new index will be stored in NEW_IDX and return REG_NOERROR if succeeded,
otherwise return the error code. */
@ -1204,50 +1276,18 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
{
re_token_t dup;
int dup_idx;
reg_errcode_t err;
dup.type = OP_CONTEXT_NODE;
if (dfa->nodes[org_idx].type == OP_CONTEXT_NODE)
{
/* If the node whose index is ORG_IDX is the same as the intended
node, use it. */
if (dfa->nodes[org_idx].constraint == constraint)
{
*new_idx = org_idx;
return REG_NOERROR;
}
dup.constraint = constraint |
dfa->nodes[org_idx].constraint;
}
else
dup.constraint = constraint;
/* In case that `entity' points OP_CONTEXT_NODE,
we correct `entity' to real entity in calc_inveclosures(). */
dup.opr.ctx_info = malloc (sizeof (*dup.opr.ctx_info));
dup = dfa->nodes[org_idx];
dup_idx = re_dfa_add_node (dfa, dup, 1);
if (BE (dup.opr.ctx_info == NULL || dup_idx == -1, 0))
if (BE (dup_idx == -1, 0))
return REG_ESPACE;
dup.opr.ctx_info->entity = org_idx;
dup.opr.ctx_info->bkref_eclosure = NULL;
dfa->nodes[dup_idx].constraint = constraint;
if (dfa->nodes[org_idx].type == ANCHOR)
dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
dfa->nodes[dup_idx].duplicated = 1;
dfa->firsts[dup_idx] = dfa->firsts[org_idx];
dfa->nexts[dup_idx] = dfa->nexts[org_idx];
err = re_node_set_init_copy (dfa->edests + dup_idx, dfa->edests + org_idx);
if (BE (err != REG_NOERROR, 0))
return err;
/* Since we don't duplicate epsilon nodes, epsilon closure have
only itself. */
err = re_node_set_init_1 (dfa->eclosures + dup_idx, dup_idx);
if (BE (err != REG_NOERROR, 0))
return err;
err = re_node_set_init_1 (dfa->inveclosures + dup_idx, dup_idx);
if (BE (err != REG_NOERROR, 0))
return err;
/* Then we must update inveclosure for this node.
We process them at last part of calc_eclosure(),
since we don't complete to calculate them here. */
re_node_set_init_empty (dfa->edests + dup_idx);
re_node_set_init_empty (dfa->eclosures + dup_idx);
re_node_set_init_empty (dfa->inveclosures + dup_idx);
*new_idx = dup_idx;
return REG_NOERROR;
@ -1257,7 +1297,7 @@ static void
calc_inveclosure (dfa)
re_dfa_t *dfa;
{
int src, idx, dest, entity;
int src, idx, dest;
for (src = 0; src < dfa->nodes_len; ++src)
{
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
@ -1265,15 +1305,6 @@ calc_inveclosure (dfa)
dest = dfa->eclosures[src].elems[idx];
re_node_set_insert (dfa->inveclosures + dest, src);
}
entity = src;
while (dfa->nodes[entity].type == OP_CONTEXT_NODE)
{
entity = dfa->nodes[entity].opr.ctx_info->entity;
re_node_set_merge (dfa->inveclosures + src,
dfa->inveclosures + entity);
dfa->nodes[src].opr.ctx_info->entity = entity;
}
}
}
@ -1283,16 +1314,17 @@ static reg_errcode_t
calc_eclosure (dfa)
re_dfa_t *dfa;
{
int idx, node_idx, max, incomplete = 0;
int node_idx, incomplete;
#ifdef DEBUG
assert (dfa->nodes_len > 0);
#endif
incomplete = 0;
/* For each nodes, calculate epsilon closure. */
for (node_idx = 0, max = dfa->nodes_len; ; ++node_idx)
for (node_idx = 0; ; ++node_idx)
{
reg_errcode_t err;
re_node_set eclosure_elem;
if (node_idx == max)
if (node_idx == dfa->nodes_len)
{
if (!incomplete)
break;
@ -1301,7 +1333,6 @@ calc_eclosure (dfa)
}
#ifdef DEBUG
assert (dfa->nodes[node_idx].type != OP_CONTEXT_NODE);
assert (dfa->eclosures[node_idx].nelem != -1);
#endif
/* If we have already calculated, skip it. */
@ -1318,41 +1349,6 @@ calc_eclosure (dfa)
re_node_set_free (&eclosure_elem);
}
}
/* for duplicated nodes. */
for (idx = max; idx < dfa->nodes_len; ++idx)
{
int entity, i, constraint;
re_node_set *bkref_eclosure;
entity = dfa->nodes[idx].opr.ctx_info->entity;
re_node_set_merge (dfa->inveclosures + idx, dfa->inveclosures + entity);
if (dfa->nodes[entity].type != OP_BACK_REF)
continue;
/* If the node is backreference, duplicate the epsilon closure of
the next node. Since it may epsilon transit. */
/* Note: duplicate_node() may realloc dfa->eclosures, etc. */
bkref_eclosure = re_malloc (re_node_set, 1);
if (BE (bkref_eclosure == NULL, 0))
return REG_ESPACE;
re_node_set_init_empty (bkref_eclosure);
constraint = dfa->nodes[idx].constraint;
for (i = 0; i < dfa->eclosures[dfa->nexts[idx]].nelem; ++i)
{
int dest_node_idx = dfa->eclosures[dfa->nexts[idx]].elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest_node_idx].type))
{
reg_errcode_t err;
err = duplicate_node (&dest_node_idx, dfa, dest_node_idx,
constraint);
if (BE (err != REG_NOERROR, 0))
return err;
}
re_node_set_insert (bkref_eclosure, dest_node_idx);
}
dfa->nodes[idx].opr.ctx_info->bkref_eclosure = bkref_eclosure;
}
return REG_NOERROR;
}
@ -1366,8 +1362,9 @@ calc_eclosure_iter (new_set, dfa, node, root)
{
reg_errcode_t err;
unsigned int constraint;
int i, max, incomplete = 0;
int i, incomplete;
re_node_set eclosure;
incomplete = 0;
err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@ -1378,9 +1375,19 @@ calc_eclosure_iter (new_set, dfa, node, root)
constraint = ((dfa->nodes[node].type == ANCHOR)
? dfa->nodes[node].opr.ctx_type : 0);
/* If the current node has constraints, duplicate all nodes.
Since they must inherit the constraints. */
if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
{
int org_node, cur_node;
org_node = cur_node = node;
err = duplicate_node_closure (dfa, node, node, node, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
}
/* Expand each epsilon destination nodes. */
if (dfa->edests[node].nelem != 0)
if (IS_EPSILON_NODE(dfa->nodes[node].type))
for (i = 0; i < dfa->edests[node].nelem; ++i)
{
re_node_set eclosure_elem;
@ -1413,28 +1420,6 @@ calc_eclosure_iter (new_set, dfa, node, root)
}
}
/* If the current node has constraints, duplicate all non-epsilon nodes.
Since they must inherit the constraints. */
if (constraint)
for (i = 0, max = eclosure.nelem; i < max; ++i)
{
int dest = eclosure.elems[i];
if (!IS_EPSILON_NODE (dfa->nodes[dest].type))
{
int dup_dest;
reg_errcode_t err;
err = duplicate_node (&dup_dest, dfa, dest, constraint);
if (BE (err != REG_NOERROR, 0))
return err;
if (dest != dup_dest)
{
re_node_set_remove_at (&eclosure, i--);
re_node_set_insert (&eclosure, dup_dest);
--max;
}
}
}
/* Epsilon closures include itself. */
re_node_set_insert (&eclosure, node);
if (incomplete && !root)
@ -1793,7 +1778,10 @@ parse (regexp, preg, syntax, err)
else
root = eor;
if (BE (new_idx == -1 || eor == NULL || root == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
return root;
}
@ -1841,7 +1829,10 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
branch = NULL;
tree = create_tree (tree, branch, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
dfa->has_plural_match = 1;
}
return tree;
@ -1883,7 +1874,10 @@ parse_branch (regexp, preg, token, syntax, nest, err)
{
tree = create_tree (tree, exp, CONCAT, 0);
if (tree == NULL)
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
}
else if (tree == NULL)
tree = exp;
@ -1916,7 +1910,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
#ifdef RE_ENABLE_I18N
if (MB_CUR_MAX > 1)
{
@ -1954,7 +1951,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
++dfa->nbackref;
dfa->has_mb_node = 1;
break;
@ -1963,7 +1963,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
case OP_DUP_QUESTION:
case OP_OPEN_DUP_NUM:
if (syntax & RE_CONTEXT_INVALID_OPS)
return *err = REG_BADRPT, NULL;
{
*err = REG_BADRPT;
return NULL;
}
else if (syntax & RE_CONTEXT_INDEP_OPS)
{
*token = fetch_token (regexp, syntax);
@ -1973,7 +1976,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
case OP_CLOSE_SUBEXP:
if ((token->type == OP_CLOSE_SUBEXP) &&
!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
return *err = REG_ERPAREN, NULL;
{
*err = REG_ERPAREN;
return NULL;
}
/* else fall through */
case OP_CLOSE_DUP_NUM:
/* We treat it as a normal character. */
@ -1983,7 +1989,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
break;
case ANCHOR:
if (dfa->word_char == NULL)
@ -2008,7 +2017,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
if (BE (idx_first == -1 || idx_last == -1 || new_idx == -1
|| tree_first == NULL || tree_last == NULL
|| tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
}
else
{
@ -2027,7 +2039,10 @@ parse_expression (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
if (MB_CUR_MAX > 1)
dfa->has_mb_node = 1;
break;
@ -2108,7 +2123,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
left_par = create_tree (NULL, NULL, 0, new_idx);
if (BE (new_idx == -1 || left_par == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
dfa->nodes[new_idx].opr.idx = cur_nsub;
*token = fetch_token (regexp, syntax);
@ -2134,7 +2152,10 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
: create_tree (tree, right_par, CONCAT, 0));
tree = create_tree (left_par, tree, CONCAT, 0);
if (BE (new_idx == -1 || right_par == NULL || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
dfa->nodes[new_idx].opr.idx = cur_nsub;
return tree;
@ -2252,7 +2273,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
work_tree = duplicate_tree (elem, dfa);
tree = create_tree (tree, work_tree, CONCAT, 0);
if (BE (work_tree == NULL || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
}
}
}
@ -2261,7 +2285,10 @@ parse_dup_op (dup_elem, regexp, dfa, token, syntax, err)
new_idx = re_dfa_add_node (dfa, *token, 0);
tree = create_tree (tree, NULL, 0, new_idx);
if (BE (new_idx == -1 || tree == NULL, 0))
return *err = REG_ESPACE, NULL;
{
*err = REG_ESPACE;
return NULL;
}
}
*token = fetch_token (regexp, syntax);
return tree;

View file

@ -929,20 +929,18 @@ re_dfa_add_node (dfa, token, mode)
dfa->nodes = new_array;
if (mode)
{
int *new_firsts, *new_nexts;
int *new_nexts;
re_node_set *new_edests, *new_eclosures, *new_inveclosures;
new_firsts = re_realloc (dfa->firsts, int, dfa->nodes_alloc);
new_nexts = re_realloc (dfa->nexts, int, dfa->nodes_alloc);
new_edests = re_realloc (dfa->edests, re_node_set, dfa->nodes_alloc);
new_eclosures = re_realloc (dfa->eclosures, re_node_set,
dfa->nodes_alloc);
new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
dfa->nodes_alloc);
if (BE (new_firsts == NULL || new_nexts == NULL || new_edests == NULL
if (BE (new_nexts == NULL || new_edests == NULL
|| new_eclosures == NULL || new_inveclosures == NULL, 0))
return -1;
dfa->firsts = new_firsts;
dfa->nexts = new_nexts;
dfa->edests = new_edests;
dfa->eclosures = new_eclosures;
@ -951,6 +949,7 @@ re_dfa_add_node (dfa, token, mode)
}
dfa->nodes[dfa->nodes_len] = token;
dfa->nodes[dfa->nodes_len].duplicated = 0;
dfa->nodes[dfa->nodes_len].constraint = 0;
return dfa->nodes_len++;
}
@ -1126,7 +1125,7 @@ create_ci_newstate (dfa, nodes, hash)
{
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
if (type == CHARACTER)
if (type == CHARACTER && !node->constraint)
continue;
/* If the state has the halt node, the state is a halt state. */
@ -1139,13 +1138,8 @@ create_ci_newstate (dfa, nodes, hash)
#endif /* RE_ENABLE_I18N */
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR || OP_CONTEXT_NODE)
{
newstate->has_constraint = 1;
if (type == OP_CONTEXT_NODE
&& dfa->nodes[node->opr.ctx_info->entity].type == END_OF_RE)
newstate->halt = 1;
}
else if (type == ANCHOR || node->constraint)
newstate->has_constraint = 1;
}
err = register_state (dfa, newstate, hash);
return (err != REG_NOERROR) ? NULL : newstate;
@ -1175,9 +1169,11 @@ create_cd_newstate (dfa, nodes, context, hash)
unsigned int constraint = 0;
re_token_t *node = dfa->nodes + nodes->elems[i];
re_token_type_t type = node->type;
if (type == CHARACTER)
continue;
if (node->constraint)
constraint = node->constraint;
if (type == CHARACTER && !constraint)
continue;
/* If the state has the halt node, the state is a halt state. */
else if (type == END_OF_RE)
newstate->halt = 1;
@ -1190,20 +1186,6 @@ create_cd_newstate (dfa, nodes, context, hash)
newstate->has_backref = 1;
else if (type == ANCHOR)
constraint = node->opr.ctx_type;
else if (type == OP_CONTEXT_NODE)
{
re_token_type_t ctype = dfa->nodes[node->opr.ctx_info->entity].type;
constraint = node->constraint;
if (ctype == END_OF_RE)
newstate->halt = 1;
else if (ctype == OP_BACK_REF)
newstate->has_backref = 1;
#ifdef RE_ENABLE_I18N
else if (ctype == COMPLEX_BRACKET
|| (type == OP_PERIOD && MB_CUR_MAX > 1))
newstate->accept_mb = 1;
#endif /* RE_ENABLE_I18N */
}
if (constraint)
{

View file

@ -133,7 +133,6 @@ typedef enum
OP_DUP_QUESTION,
OP_BACK_REF,
ANCHOR,
OP_CONTEXT_NODE,
/* Dummy marker. */
END_OF_RE_TOKEN_T
@ -198,11 +197,6 @@ typedef struct
#endif /* RE_ENABLE_I18N */
int idx; /* for BACK_REF */
re_context_type ctx_type; /* for ANCHOR */
struct
{
int entity; /* for OP_CONTEXT_NODE, index of the entity */
re_node_set *bkref_eclosure;
} *ctx_info;
} opr;
#if __GNUC__ >= 2
re_token_type_t type : 8;
@ -474,7 +468,6 @@ struct re_dfa_t
int nodes_alloc;
int nodes_len;
bin_tree_t *str_tree;
int *firsts;
int *nexts;
re_node_set *edests;
re_node_set *eclosures;
@ -519,6 +512,8 @@ static int re_node_set_compare (const re_node_set *set1,
const re_node_set *set2);
static int re_node_set_contains (const re_node_set *set, int elem);
static void re_node_set_remove_at (re_node_set *set, int idx);
#define re_node_set_remove(set,id) \
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode);

View file

@ -95,6 +95,8 @@ static reg_errcode_t set_regs (const regex_t *preg,
const re_match_context_t *mctx,
size_t nmatch, regmatch_t *pmatch,
int fl_backtrack);
static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
#ifdef RE_ENABLE_I18N
static int sift_states_iter_mb (const regex_t *preg,
const re_match_context_t *mctx,
@ -845,11 +847,8 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
for (i = 0; i < cur_state->nodes.nelem; ++i)
{
re_token_type_t type;
int node = cur_state->nodes.elems[i];
int entity = (dfa->nodes[node].type != OP_CONTEXT_NODE ? node
: dfa->nodes[node].opr.ctx_info->entity);
type = dfa->nodes[entity].type;
re_token_type_t type = dfa->nodes[node].type;
if (type == OP_BACK_REF)
{
int clexp_idx;
@ -859,7 +858,7 @@ check_matching (preg, mctx, fl_search, fl_longest_match)
re_token_t *clexp_node;
clexp_node = dfa->nodes + cur_state->nodes.elems[clexp_idx];
if (clexp_node->type == OP_CLOSE_SUBEXP
&& clexp_node->opr.idx + 1== dfa->nodes[entity].opr.idx)
&& clexp_node->opr.idx + 1== dfa->nodes[node].opr.idx)
{
err = match_ctx_add_entry (mctx, node, 0, 0, 0);
if (BE (err != REG_NOERROR, 0))
@ -955,15 +954,13 @@ static int check_halt_node_context (dfa, node, context)
int node;
unsigned int context;
{
int entity;
re_token_type_t type = dfa->nodes[node].type;
if (type == END_OF_RE)
return 1;
if (type != OP_CONTEXT_NODE)
unsigned int constraint = dfa->nodes[node].constraint;
if (type != END_OF_RE)
return 0;
entity = dfa->nodes[node].opr.ctx_info->entity;
if (dfa->nodes[entity].type != END_OF_RE
|| NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[node].constraint, context))
if (!constraint)
return 1;
if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
return 0;
return 1;
}
@ -1008,39 +1005,27 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
struct re_fail_stack_t *fs;
{
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int i, err, dest_node, cur_entity;
int i, err, dest_node;
dest_node = -1;
cur_entity = ((dfa->nodes[node].type == OP_CONTEXT_NODE)
? dfa->nodes[node].opr.ctx_info->entity : node);
if (IS_EPSILON_NODE (dfa->nodes[node].type))
{
int ndest, dest_nodes[2], dest_entities[2];
re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
int ndest, dest_nodes[2];
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -1;
/* Pick up valid destinations. */
for (ndest = 0, i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
for (ndest = 0, i = 0; i < dfa->edests[node].nelem; ++i)
{
int candidate = mctx->state_log[*pidx]->nodes.elems[i];
int entity;
entity = ((dfa->nodes[candidate].type == OP_CONTEXT_NODE)
? dfa->nodes[candidate].opr.ctx_info->entity : candidate);
if (!re_node_set_contains (dfa->edests + node, entity))
int candidate = dfa->edests[node].elems[i];
if (!re_node_set_contains (cur_nodes, candidate))
continue;
dest_nodes[0] = (ndest == 0) ? candidate : dest_nodes[0];
dest_entities[0] = (ndest == 0) ? entity : dest_entities[0];
dest_nodes[1] = (ndest == 1) ? candidate : dest_nodes[1];
dest_entities[1] = (ndest == 1) ? entity : dest_entities[1];
++ndest;
}
if (ndest <= 1)
return ndest == 0 ? -1 : (ndest == 1 ? dest_nodes[0] : 0);
if (dest_entities[0] > dest_entities[1])
{
int swap_work = dest_nodes[0];
dest_nodes[0] = dest_nodes[1];
dest_nodes[1] = swap_work;
}
/* In order to avoid infinite loop like "(a*)*". */
if (re_node_set_contains (eps_via_nodes, dest_nodes[0]))
return dest_nodes[1];
@ -1050,22 +1035,17 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
}
else
{
int naccepted = 0, entity = node;
int naccepted = 0;
re_token_type_t type = dfa->nodes[node].type;
if (type == OP_CONTEXT_NODE)
{
entity = dfa->nodes[node].opr.ctx_info->entity;
type = dfa->nodes[entity].type;
}
#ifdef RE_ENABLE_I18N
if (ACCEPT_MB_NODE (type))
naccepted = check_node_accept_bytes (preg, entity, mctx->input, *pidx);
naccepted = check_node_accept_bytes (preg, node, mctx->input, *pidx);
else
#endif /* RE_ENABLE_I18N */
if (type == OP_BACK_REF)
{
int subexp_idx = dfa->nodes[entity].opr.idx;
int subexp_idx = dfa->nodes[node].opr.idx;
naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
if (fs != NULL)
{
@ -1085,18 +1065,10 @@ proceed_next_node (preg, nregs, regs, mctx, pidx, node, eps_via_nodes, fs)
err = re_node_set_insert (eps_via_nodes, node);
if (BE (err < 0, 0))
return -2;
dest_node = dfa->nexts[node];
dest_node = dfa->edests[node].elems[0];
if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
dest_node))
return dest_node;
for (i = 0; i < mctx->state_log[*pidx]->nodes.nelem; ++i)
{
dest_node = mctx->state_log[*pidx]->nodes.elems[i];
if ((dfa->nodes[dest_node].type == OP_CONTEXT_NODE
&& (dfa->nexts[node]
== dfa->nodes[dest_node].opr.ctx_info->entity)))
return dest_node;
}
}
}
@ -1153,6 +1125,7 @@ pop_fail_stack (fs, pidx, nregs, regs, eps_via_nodes)
*pidx = fs->stack[num].idx;
memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
re_node_set_free (eps_via_nodes);
re_free (fs->stack[num].regs);
*eps_via_nodes = fs->stack[num].eps_via_nodes;
return fs->stack[num].node;
}
@ -1201,12 +1174,18 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
break;
if (reg_idx == nmatch)
return REG_NOERROR;
{
re_node_set_free (&eps_via_nodes);
return free_fail_stack_return (fs);
}
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes);
}
else
return REG_NOERROR;
{
re_node_set_free (&eps_via_nodes);
return REG_NOERROR;
}
}
/* Proceed to next node. */
@ -1221,10 +1200,30 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
&eps_via_nodes);
else
return REG_NOMATCH;
{
re_node_set_free (&eps_via_nodes);
return REG_NOMATCH;
}
}
}
re_node_set_free (&eps_via_nodes);
return free_fail_stack_return (fs);
}
static reg_errcode_t
free_fail_stack_return (fs)
struct re_fail_stack_t *fs;
{
if (fs)
{
int fs_idx;
for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
{
re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
re_free (fs->stack[fs_idx].regs);
}
re_free (fs->stack);
}
return REG_NOERROR;
}
@ -1314,6 +1313,7 @@ sift_states_backward (preg, mctx, sctx)
{
memset (sctx->sifted_states, '\0',
sizeof (re_dfastate_t *) * str_idx);
re_node_set_free (&cur_dest);
return REG_NOERROR;
}
re_node_set_empty (&cur_dest);
@ -1330,22 +1330,16 @@ sift_states_backward (preg, mctx, sctx)
for (i = 0; i < cur_src->nelem; i++)
{
int prev_node = cur_src->elems[i];
int entity = prev_node;
int naccepted = 0;
re_token_type_t type = dfa->nodes[prev_node].type;
if (IS_EPSILON_NODE(type))
continue;
if (type == OP_CONTEXT_NODE)
{
entity = dfa->nodes[prev_node].opr.ctx_info->entity;
type = dfa->nodes[entity].type;
}
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
if (ACCEPT_MB_NODE (type))
naccepted = sift_states_iter_mb (preg, mctx, sctx, entity, str_idx,
sctx->last_str_idx);
naccepted = sift_states_iter_mb (preg, mctx, sctx, prev_node,
str_idx, sctx->last_str_idx);
#endif /* RE_ENABLE_I18N */
/* We don't check backreferences here.
@ -1459,12 +1453,15 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
/* At first, add the nodes which can epsilon transit to a node in
DEST_NODE. */
err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
if (BE (err != REG_NOERROR, 0))
return err;
if (dest_nodes->nelem)
{
err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
if (BE (err != REG_NOERROR, 0))
return err;
}
/* Then, check the limitations in the current sift_context. */
if (sctx->limits.nelem)
if (dest_nodes->nelem && sctx->limits.nelem)
{
err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
mctx->bkref_ents, str_idx);
@ -1479,7 +1476,7 @@ update_cur_sifted_state (preg, mctx, sctx, str_idx, dest_nodes)
/* If we are searching for the subexpression candidates.
Note that we were from transit_state_bkref_loop() in this case. */
if (sctx->check_subexp)
if (dest_nodes->nelem && sctx->check_subexp)
{
err = search_subexp (preg, mctx, sctx, str_idx, dest_nodes);
if (BE (err != REG_NOERROR, 0))
@ -1538,7 +1535,7 @@ sub_epsilon_src_nodes (dfa, node, dest_nodes, candidates)
int cur_node = inv_eclosure->elems[ecl_idx];
if (cur_node == node)
continue;
if (dfa->edests[cur_node].nelem)
if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
{
int edst1 = dfa->edests[cur_node].elems[0];
int edst2 = ((dfa->edests[cur_node].nelem > 1)
@ -1580,12 +1577,10 @@ check_dst_limits (dfa, limits, mctx, dst_node, dst_idx, src_node, src_idx)
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
{
int bkref, subexp_idx/*, node_idx, cls_node*/;
int subexp_idx;
struct re_backref_cache_entry *ent;
ent = mctx->bkref_ents + limits->elems[lim_idx];
bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
subexp_idx = dfa->nodes[bkref].opr.idx - 1;
subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
dst_pos = check_dst_limits_calc_pos (dfa, mctx, limits->elems[lim_idx],
dfa->eclosures + dst_node,
@ -1624,13 +1619,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
{
int node = eclosures->elems[node_idx];
int entity = node;
re_token_type_t type= dfa->nodes[node].type;
if (type == OP_CONTEXT_NODE)
{
entity = dfa->nodes[node].opr.ctx_info->entity;
type = dfa->nodes[entity].type;
}
if (type == OP_BACK_REF)
{
int bi;
@ -1641,7 +1630,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, node,
&& ent->str_idx == str_idx)
{
int cpos, dst;
dst = dfa->nexts[node];
dst = dfa->edests[node].elems[0];
cpos = check_dst_limits_calc_pos (dfa, mctx, limit,
dfa->eclosures + dst,
subexp_idx, dst,
@ -1685,17 +1674,14 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
{
int bkref, subexp_idx;
int subexp_idx;
struct re_backref_cache_entry *ent;
ent = bkref_ents + limits->elems[lim_idx];
if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
continue; /* This is unrelated limitation. */
bkref = (dfa->nodes[ent->node].type == OP_CONTEXT_NODE
? dfa->nodes[ent->node].opr.ctx_info->entity : ent->node);
subexp_idx = dfa->nodes[bkref].opr.idx - 1;
subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
if (ent->subexp_to == str_idx)
{
int ops_node = -1;
@ -1790,12 +1776,8 @@ search_subexp (preg, mctx, sctx, str_idx, dest_nodes)
for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
{
re_token_type_t type;
int entity;
node = dest_nodes->elems[node_idx];
type = dfa->nodes[node].type;
entity = (type != OP_CONTEXT_NODE ? node
: dfa->nodes[node].opr.ctx_info->entity);
type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
if (type == OP_CLOSE_SUBEXP
&& sctx->check_subexp == dfa->nodes[node].opr.idx + 1)
@ -1933,14 +1915,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
{
int entity;
int cur_bkref_idx = re_string_cur_idx (mctx->input);
re_token_type_t type;
node = candidates->elems[node_idx];
type = dfa->nodes[node].type;
entity = (type != OP_CONTEXT_NODE ? node
: dfa->nodes[node].opr.ctx_info->entity);
type = (type != OP_CONTEXT_NODE ? type : dfa->nodes[entity].type);
if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
continue;
/* Avoid infinite loop for the REs like "()\1+". */
@ -1951,37 +1929,25 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
int enabled_idx;
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
{
int disabled_idx, subexp_len, to_idx;
int disabled_idx, subexp_len, to_idx, dst_node;
struct re_backref_cache_entry *entry;
entry = mctx->bkref_ents + enabled_idx;
subexp_len = entry->subexp_to - entry->subexp_from;
to_idx = str_idx + subexp_len;
dst_node = (subexp_len ? dfa->nexts[node]
: dfa->edests[node].elems[0]);
if (entry->node != node || entry->str_idx != str_idx
|| to_idx > sctx->last_str_idx
|| sctx->sifted_states[to_idx] == NULL)
continue;
if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
dfa->nexts[node]))
{
int dst_idx;
re_node_set *dsts = &sctx->sifted_states[to_idx]->nodes;
for (dst_idx = 0; dst_idx < dsts->nelem; ++dst_idx)
{
int dst_node = dsts->elems[dst_idx];
if (dfa->nodes[dst_node].type == OP_CONTEXT_NODE
&& (dfa->nodes[dst_node].opr.ctx_info->entity
== dfa->nexts[node]))
break;
}
if (dst_idx == dsts->nelem)
continue;
}
if (!STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node))
continue;
if (check_dst_limits (dfa, &sctx->limits, mctx, node,
str_idx, dfa->nexts[node], to_idx))
continue;
if (sctx->check_subexp == dfa->nodes[entity].opr.idx)
if (sctx->check_subexp == dfa->nodes[node].opr.idx)
{
char *buf;
buf = (char *) re_string_get_buffer (mctx->input);
@ -2038,9 +2004,10 @@ sift_states_bkref (preg, mctx, sctx, str_idx, dest_nodes)
return err;
}
local_sctx.sifted_states[str_idx] = cur_state;
re_node_set_remove_at (&local_sctx.limits,
local_sctx.limits.nelem - 1);
entry->flag = 1;
re_node_set_remove (&local_sctx.limits, enabled_idx);
/* We must not use the variable entry here, since
mctx->bkref_ents might be realloced. */
mctx->bkref_ents[enabled_idx].flag = 1;
}
}
for (enabled_idx = 0; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
@ -2301,15 +2268,14 @@ transit_state_mb (preg, pstate, mctx)
unsigned int context;
re_dfastate_t *dest_state;
if (dfa->nodes[cur_node_idx].type == OP_CONTEXT_NODE)
if (dfa->nodes[cur_node_idx].constraint)
{
context = re_string_context_at (mctx->input,
re_string_cur_idx (mctx->input),
mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
context))
context))
continue;
cur_node_idx = dfa->nodes[cur_node_idx].opr.ctx_info->entity;
}
/* How many bytes the node can accepts? */
@ -2404,17 +2370,16 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
/* Check whether `node' is a backreference or not. */
if (node->type == OP_BACK_REF)
subexp_idx = node->opr.idx;
else if (node->type == OP_CONTEXT_NODE &&
dfa->nodes[node->opr.ctx_info->entity].type == OP_BACK_REF)
else
continue;
if (node->constraint)
{
context = re_string_context_at (mctx->input, cur_str_idx,
mctx->eflags, preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
continue;
subexp_idx = dfa->nodes[node->opr.ctx_info->entity].opr.idx;
}
else
continue;
/* `node' is a backreference.
Check the substring which the substring matched. */
@ -2440,8 +2405,8 @@ transit_state_bkref_loop (preg, nodes, work_state_log, mctx)
if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
continue;
subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
new_dest_nodes = ((node->type == OP_CONTEXT_NODE && subexp_len == 0)
? dfa->nodes[node_idx].opr.ctx_info->bkref_eclosure
new_dest_nodes = (subexp_len == 0
? dfa->eclosures + dfa->edests[node_idx].elems[0]
: dfa->eclosures + dfa->nexts[node_idx]);
dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
- bkref_ent->subexp_from);
@ -2688,16 +2653,9 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
/* For all the nodes belonging to `state', */
for (i = 0; i < cur_nodes->nelem; ++i)
{
unsigned int constraint = 0;
re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
re_token_type_t type = node->type;
if (type == OP_CONTEXT_NODE)
{
constraint = node->constraint;
node = dfa->nodes + node->opr.ctx_info->entity;
type = node->type;
}
unsigned int constraint = node->constraint;
/* Enumerate all single byte character this node can accept. */
if (type == CHARACTER)
@ -2724,10 +2682,10 @@ group_nodes_into_DFAstates (preg, state, dests_node, dests_ch)
if (constraint & NEXT_WORD_CONSTRAINT)
for (j = 0; j < BITSET_UINTS; ++j)
accepts[j] &= dfa->word_char[j];
else if (constraint & NEXT_NOTWORD_CONSTRAINT)
if (constraint & NEXT_NOTWORD_CONSTRAINT)
for (j = 0; j < BITSET_UINTS; ++j)
accepts[j] &= ~dfa->word_char[j];
else if (constraint & NEXT_NEWLINE_CONSTRAINT)
if (constraint & NEXT_NEWLINE_CONSTRAINT)
{
int accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
bitset_empty (accepts);
@ -3058,10 +3016,8 @@ check_node_accept (preg, node, mctx, idx)
const re_match_context_t *mctx;
int idx;
{
const re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
const re_token_t *cur_node;
unsigned char ch;
if (node->type == OP_CONTEXT_NODE)
if (node->constraint)
{
/* The node has constraints. Check whether the current context
satisfies the constraints. */
@ -3070,17 +3026,13 @@ check_node_accept (preg, node, mctx, idx)
preg->newline_anchor);
if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
return 0;
cur_node = dfa->nodes + node->opr.ctx_info->entity;
}
else
cur_node = node;
ch = re_string_byte_at (mctx->input, idx);
if (cur_node->type == CHARACTER)
return cur_node->opr.c == ch;
else if (cur_node->type == SIMPLE_BRACKET)
return bitset_contain (cur_node->opr.sbcset, ch);
else if (cur_node->type == OP_PERIOD)
if (node->type == CHARACTER)
return node->opr.c == ch;
else if (node->type == SIMPLE_BRACKET)
return bitset_contain (node->opr.sbcset, ch);
else if (node->type == OP_PERIOD)
return !((ch == '\n' && !(preg->syntax & RE_DOT_NEWLINE))
|| (ch == '\0' && (preg->syntax & RE_DOT_NOT_NULL)));
else
@ -3221,5 +3173,7 @@ sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
sctx->last_node = last_node;
sctx->last_str_idx = last_str_idx;
sctx->check_subexp = check_subexp;
sctx->cur_bkref = -1;
sctx->cls_subexp_idx = -1;
re_node_set_init_empty (&sctx->limits);
}

View file

@ -290,8 +290,8 @@ asm (".L__X'%ebx = 1\n\t"
#undef INLINE_SYSCALL
#define INLINE_SYSCALL(name, nr, args...) \
({ \
unsigned int resultvar = INTERNAL_SYSCALL(name, nr, args); \
if (INTERNAL_SYSCALL_ERROR_P (resultvar)) \
unsigned int resultvar = INTERNAL_SYSCALL (name, nr, args); \
if (__builtin_expect (INTERNAL_SYSCALL_ERROR_P (resultvar), 0)) \
{ \
__set_errno (INTERNAL_SYSCALL_ERRNO (resultvar)); \
resultvar = 0xffffffff; \