diff --git a/src/skeleton/generate_data.cc b/src/skeleton/generate_data.cc index e09182b55..d3aca1304 100644 --- a/src/skeleton/generate_data.cc +++ b/src/skeleton/generate_data.cc @@ -165,79 +165,94 @@ template static void write_keys(const path_t &path, const Skeleton &skel, size_t width, FILE *file) { + const size_t nver = skel.ntagver; + const size_t offby = skel.opts->lookahead ? 0 : 1; + // find last accepting node size_t f; for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f); - // calculate tags: start with default and apply commands step by step - const size_t - nver = skel.ntagver, - ntag = width * nver, - offby = skel.opts->lookahead ? 0 : 1; - std::vector *tags = new std::vector[ntag]; + const size_t rule = path.node(skel, f).rule; - // initial tags (TDFA(0)) - for (size_t w = 0; w < width; ++w) { - apply(&tags[w * nver], skel.cmd0, 0); + size_t ltag = 0, htag = 0, trail = 0; + if (rule != Rule::NONE) { + const Rule &r = skel.rules[rule]; + ltag = r.ltag; + htag = r.htag; + trail = r.ttag; } + + // arc iterators and character iterators within each arc + std::vector arcs; + arcs.reserve(f); + std::vector chars(f); for (size_t i = 0; i < f; ++i) { - Node::wciter_t a(path.arc(skel, i)); + arcs.push_back(Node::wciter_t(path.arc(skel, i))); + chars[i] = nsteps(arcs.back()->lower, arcs.back()->upper); + } + + // process each subpath of the multipath separately (rather than iterate + // on all subpaths in lockstep) to reduce the size of storage used for + // intermediate tag values + for (size_t w = 0; w < width; ++w) { - // tags commands in state (staDFA), -1 because of "delayed store" - for (size_t w = 0; w < width; ++w) { - apply(&tags[w * nver], path.node(skel, i).stacmd, i - 1); + // clear buffers for tag values + std::vector *tags = skel.tagvals; + for (size_t i = 0; i < nver; ++i) { + tags[i].clear(); } - // tag commands on transitions (TDFA(0), TDFA(1)) - for (size_t w = 0; w < width; ++a) { - uint32_t n = nsteps(a->lower, a->upper); - for (; n --> 0 && w < width; ++w) { - apply(&tags[w * nver], a->cmd, i + offby); + // initial tags (TDFA(0)) + apply(tags, skel.cmd0, 0); + + for (size_t i = 0; i < f; ++i) { + // tags commands in state (staDFA), -1 because of "delayed store" + apply(tags, path.node(skel, i).stacmd, i - 1); + + // tag commands on transitions (TDFA(0), TDFA(1)) + Node::wciter_t &a = arcs[i]; + apply(tags, a->cmd, i + offby); + + // advance character iterator + // if it's the last one, then switch to the next arc + if (--chars[i] == 0) { + ++a; + chars[i] = nsteps(a->lower, a->upper); } } - } - // tag commands in final states - const tcmd_t *fcmd = path.node(skel, f).cmd; - for (size_t w = 0; w < width; ++w) { + + // tag commands in final states + const tcmd_t *fcmd = path.node(skel, f).cmd; // staDFA, -1 because of "delayed store" - apply(&tags[w * nver], path.node(skel, f).stacmd, f - 1); + apply(tags, path.node(skel, f).stacmd, f - 1); // TDFA(1) - apply(&tags[w * nver], fcmd, f); - } - - const size_t rule = path.node(skel, f).rule; - size_t matched = 0, ltag = 0, htag = 0, trail = 0; - if (rule != Rule::NONE) { + apply(tags, fcmd, f); - const Rule &r = skel.rules[rule]; - ltag = r.ltag; - htag = r.htag; - trail = r.ttag; - - if (trail == htag) { - // no trailing context - matched = f; - } else { - const Tag &tag = skel.tags[trail]; - if (!fixed(tag)) { - // variable-length trailing context - matched = tags[skel.finvers[trail]].back(); - } else if (tag.base != Tag::RIGHTMOST) { - // fixed-length trailing context based on tag - matched = tags[skel.finvers[tag.base]].back() - tag.dist; + size_t matched = 0; + if (rule != Rule::NONE) { + if (trail == htag) { + // no trailing context + matched = f; } else { - // fixed-length trailing context based on cursor - matched = f - tag.dist; + const Tag &tag = skel.tags[trail]; + if (!fixed(tag)) { + // variable-length trailing context + matched = tags[skel.finvers[trail]].back(); + } else if (tag.base != Tag::RIGHTMOST) { + // fixed-length trailing context based on tag + matched = tags[skel.finvers[tag.base]].back() - tag.dist; + } else { + // fixed-length trailing context based on cursor + matched = f - tag.dist; + } } + DASSERT(matched != Skeleton::DEFTAG); } - DASSERT(matched != Skeleton::DEFTAG); - } - // count keys - size_t nkey = 0; - for (size_t w = 0; w < width; ++w) { + // count keys + size_t nkey = 0; nkey += 3; for (size_t t = ltag; t < htag; ++t) { const Tag &tag = skel.tags[t]; @@ -245,14 +260,13 @@ static void write_keys(const path_t &path, const Skeleton &skel, const size_t base = fixed(tag) ? tag.base : t, bver = static_cast(skel.finvers[base]); - if (history(tag)) nkey += tags[w * nver + bver].size(); + if (history(tag)) nkey += tags[bver].size(); ++nkey; } - } - // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags - key_t *keys = new key_t[nkey], *k = keys; - for (size_t w = 0; w < width; ++w) { + // keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags + key_t *keys = new key_t[nkey], *k = keys; + *k++ = to_le(static_cast(path.len())); *k++ = to_le(static_cast(matched)); *k++ = to_le(rule2key(rule, skel.def_rule)); @@ -265,7 +279,7 @@ static void write_keys(const path_t &path, const Skeleton &skel, DASSERT(!fixed(tag)); // variable-length tag const size_t tver = static_cast(skel.finvers[t]); - const std::vector &h = tags[w * nver + tver]; + const std::vector &h = tags[tver]; const size_t hlen = h.size(); // Abort if history length exceeds maximum value of key type. @@ -287,11 +301,11 @@ static void write_keys(const path_t &path, const Skeleton &skel, if (!fixed(tag)) { // variable-length tag const size_t tver = static_cast(skel.finvers[t]); - tval = tags[w * nver + tver].back(); + tval = tags[tver].back(); } else if (tag.base != Tag::RIGHTMOST) { // fixed-length tag based on another tag const size_t tver = static_cast(skel.finvers[tag.base]); - tval = tags[w * nver + tver].back(); + tval = tags[tver].back(); if (tval != Skeleton::DEFTAG) tval -= tag.dist; } else { // fixed-length tag based on cursor @@ -300,13 +314,12 @@ static void write_keys(const path_t &path, const Skeleton &skel, *k++ = to_le(static_cast(tval)); } } - } - // dump to file - fwrite(keys, sizeof(key_t), nkey, file); + // dump to file + fwrite(keys, sizeof(key_t), nkey, file); - delete[] tags; - delete[] keys; + delete[] keys; + } } template diff --git a/src/skeleton/skeleton.cc b/src/skeleton/skeleton.cc index 2c27fe2ec..4e4977ecf 100644 --- a/src/skeleton/skeleton.cc +++ b/src/skeleton/skeleton.cc @@ -67,6 +67,7 @@ Skeleton::Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name, , rules(dfa.rules) , tags(dfa.tags) , finvers(dfa.finvers) + , tagvals(new std::vector[ntagver]) { // initialize nodes const size_t nil = nodes_count - 1; @@ -89,6 +90,7 @@ Skeleton::Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name, Skeleton::~Skeleton() { + delete[] tagvals; delete[] nodes; } diff --git a/src/skeleton/skeleton.h b/src/skeleton/skeleton.h index 1542f8fd9..29583756d 100644 --- a/src/skeleton/skeleton.h +++ b/src/skeleton/skeleton.h @@ -86,6 +86,8 @@ struct Skeleton const std::vector &tags; const tagver_t *finvers; + std::vector *tagvals; + Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name, const std::string &cond, const loc_t &loc, Msg &msg); ~Skeleton ();