Skip to content

Commit

Permalink
Move in-state tag operations before YYFILL label if $-rule is used.
Browse files Browse the repository at this point in the history
If $-rule is used the lexer may need to jump to the YYFILL label to
rescan the current input character after YYFILL supplies more input.
This may cause re-application of tag operations if they are generated
after the YYFILL label, which may produce incorrect results for non-
idempotent tag operations.

In-state tag operations are used either with sta-DFA (where all tag
operations are in-state and there are no on-transition operations), or
when operations on all transitions agree and can be hoisted out of
transitions into the state body.
  • Loading branch information
skvadrik committed Oct 25, 2020
1 parent 68611a5 commit 56e4654
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 44 deletions.
52 changes: 11 additions & 41 deletions src/adfa/prepare.cc
Original file line number Diff line number Diff line change
Expand Up @@ -326,34 +326,7 @@ void DFA::calc_stats(OutputBlock &out)
}
}

// Check if subsequent appplication of tag operations produces the same results.
static bool idempotent_tag_operations(const DFA *dfa, tcid_t tcid)
{
// Empty operation sequence has no effect.
if (tcid == TCID0) return true;

// Non-idempotent operations are those that change values of tags used on
// the RHS of one of the "copy"/"add" operations.
const tcmd_t *cmd = dfa->tcpool[tcid];
for (const tcmd_t *p = cmd; p; p = p->next) {
if (tcmd_t::isset(p)) {
// "save" operations are idempotent, as they have no RHS tag
} else if (tcmd_t::isadd(p)) {
// "add" operations are non-idempotent, as they have the same LHS
// and RHS tags, eg 'x = x + 1;'
return false;
} else {
// "copy" operations may be non-idempotent, eg 'x = y; y = z;'
for (const tcmd_t *q = cmd; q; q = q->next) {
if (p->rhs == q->lhs) return false;
}
}
}

return true;
}

static bool can_hoist_tags(const DFA *dfa, const State *s, const opt_t *opts)
static bool can_hoist_tags(const State *s, const opt_t *opts)
{
Span *span = s->go.span;
const size_t nspan = s->go.nspans;
Expand All @@ -369,19 +342,16 @@ static bool can_hoist_tags(const DFA *dfa, const State *s, const opt_t *opts)
}
}

// If end-of-input rule $ is used, there are additional restrictions.
if (opts->eof != NOEOF) {
// Check that final/fallback tags agree with other tags: if the end of
// input is reached, the lexer may follow the final/fallback transition.
if (tags != (s->rule == Rule::NONE ? s->fall_tags : s->rule_tags)) {
return false;
}

// Check that tag operations are idempotent, because the lexer may need
// to rescan the current input symbol and re-apply hoisted operations.
if (!idempotent_tag_operations(dfa, tags)) return false;
// If end-of-input rule $ is used, check that final/fallback tags agree with
// other tags, as the lexer may follow the final/fallback transition.
if (opts->eof != NOEOF
&& tags != (s->rule == Rule::NONE ? s->fall_tags : s->rule_tags)) {
return false;
}

// No need to check idempotence of tag operations in case of the end-of-input
// rule $, as they are applied before YYFILL label and there is no risk of
// re-application if the current input character is re-scanned after YYFILL.
return true;
}

Expand Down Expand Up @@ -410,7 +380,7 @@ void DFA::hoist_tags(const opt_t *opts)
const size_t nspan = s->go.nspans;
if (nspan == 0) continue;

if (can_hoist_tags(this, s, opts)) {
if (can_hoist_tags(s, opts)) {
s->go.tags = span[0].tags;
for (uint32_t i = 0; i < nspan; ++i) {
span[i].tags = TCID0;
Expand All @@ -429,7 +399,7 @@ void DFA::hoist_tags_and_skip(const opt_t *opts)
if (nspan == 0) continue;

// check if it is possible to hoist tags and/or skip
bool hoist_tags = can_hoist_tags(this, s, opts);
bool hoist_tags = can_hoist_tags(s, opts);
bool hoist_skip = can_hoist_skip(s, opts);
if (opts->lookahead) {
// skip must go after tags
Expand Down
9 changes: 8 additions & 1 deletion src/codegen/gen_goto.cc
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,14 @@ void gen_go(Output &output, const DFA &dfa, const CodeGo *go, const State *from,
append(stmts, code_skip(alc));
}

gen_settags(output, stmts, dfa, go->tags, opts->stadfa /* delayed */);
DASSERT(consume(from) || go->tags == TCID0);
if (opts->eof == NOEOF) {
// With the end-of-input rule $ tag operations *must* be generated
// before YYFILL label. Without $ rule the are no strict requirements,
// but generating them here (after YYFILL label) allows to fuse skip and
// peek into one statement.
gen_settags(output, stmts, dfa, go->tags, opts->stadfa /* delayed */);
}

if (go->skip && opts->lookahead) {
append(stmts, code_skip(alc));
Expand Down
9 changes: 9 additions & 0 deletions src/codegen/gen_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,15 @@ void gen_fill_and_label(Output &output, CodeList *stmts, const DFA &dfa, const S
gen_fill(output, stmts, dfa, s, NULL);
}

if (opts->eof != NOEOF) {
// If the end-of-input rule $ is used, the lexer may jump to the YYFILL
// label to rescan the current input character. Generate tag operations
// before the label to avoid applying them multiple times in the above
// scenario (re-application may produce incorrect results in case of
// non-idempotent operations).
gen_settags(output, stmts, dfa, s->go.tags, opts->stadfa /* delayed */);
}

if (need_fill_label) {
const char *flabel = gen_fill_label(output, output.block().fill_index - 1);
append(stmts, code_slabel(output.allocator, flabel));
Expand Down
2 changes: 1 addition & 1 deletion test/eof/goto_elision.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@
{ one }
yy8:
++YYCURSOR;
yyt1 = YYCURSOR;
yyFillLabel4:
yych = *YYCURSOR;
yyt1 = YYCURSOR;
if (yych >= 0x01) goto yy13;
if (YYLIMIT <= YYCURSOR) {
if (YYFILL() == 0) goto yyFillLabel4;
Expand Down
2 changes: 1 addition & 1 deletion test/eof/goto_elision_f.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,9 @@
{ one }
yy9:
++YYCURSOR;
yyt1 = YYCURSOR;
yyFillLabel4:
yych = *YYCURSOR;
yyt1 = YYCURSOR;
if (yych >= 0x01) goto yy14;
if (YYLIMIT <= YYCURSOR) {
YYSETSTATE(4);
Expand Down

0 comments on commit 56e4654

Please sign in to comment.