Skip to content

Commit

Permalink
Skeleton: reorder loops to speedup data generation.
Browse files Browse the repository at this point in the history
Process each subpath of the multipath separately (instead of iterating on all
subpaths in lockstep) in order to reduce the size of storage used for the
intermediate tag values. Previously the required storage was a matrix of size
M x N, where M is the width of the multipath and N is the maximum tag variable
number (prior to tag optimizations). Matrix elements are vectors, because some
tags are m-tags (they need to record the full history, not just the last value).
Allocating and initializing the matrix takes a long time. Now the required
storage is a vector of length N. The same storage is reused for all subpaths.

This partially fixes #331.
  • Loading branch information
skvadrik committed Oct 30, 2020
1 parent 56e4654 commit e8cdf1d
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 65 deletions.
143 changes: 78 additions & 65 deletions src/skeleton/generate_data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -165,94 +165,108 @@ template<typename key_t>
static void write_keys(const path_t &path, const Skeleton &skel,
size_t width, FILE *file)
{
const size_t nver = skel.ntagver;
const size_t offby = skel.opts->lookahead ? 0 : 1;

// find last accepting node
size_t f;
for (f = path.len(); f > 0 && path.node(skel, f).rule == Rule::NONE; --f);

// calculate tags: start with default and apply commands step by step
const size_t
nver = skel.ntagver,
ntag = width * nver,
offby = skel.opts->lookahead ? 0 : 1;
std::vector<size_t> *tags = new std::vector<size_t>[ntag];
const size_t rule = path.node(skel, f).rule;

// initial tags (TDFA(0))
for (size_t w = 0; w < width; ++w) {
apply(&tags[w * nver], skel.cmd0, 0);
size_t ltag = 0, htag = 0, trail = 0;
if (rule != Rule::NONE) {
const Rule &r = skel.rules[rule];
ltag = r.ltag;
htag = r.htag;
trail = r.ttag;
}

// arc iterators and character iterators within each arc
std::vector<Node::wciter_t> arcs;
arcs.reserve(f);
std::vector<size_t> chars(f);
for (size_t i = 0; i < f; ++i) {
Node::wciter_t a(path.arc(skel, i));
arcs.push_back(Node::wciter_t(path.arc(skel, i)));
chars[i] = nsteps(arcs.back()->lower, arcs.back()->upper);
}

// process each subpath of the multipath separately (rather than iterate
// on all subpaths in lockstep) to reduce the size of storage used for
// intermediate tag values
for (size_t w = 0; w < width; ++w) {

// tags commands in state (staDFA), -1 because of "delayed store"
for (size_t w = 0; w < width; ++w) {
apply(&tags[w * nver], path.node(skel, i).stacmd, i - 1);
// clear buffers for tag values
std::vector<size_t> *tags = skel.tagvals;
for (size_t i = 0; i < nver; ++i) {
tags[i].clear();
}

// tag commands on transitions (TDFA(0), TDFA(1))
for (size_t w = 0; w < width; ++a) {
uint32_t n = nsteps(a->lower, a->upper);
for (; n --> 0 && w < width; ++w) {
apply(&tags[w * nver], a->cmd, i + offby);
// initial tags (TDFA(0))
apply(tags, skel.cmd0, 0);

for (size_t i = 0; i < f; ++i) {
// tags commands in state (staDFA), -1 because of "delayed store"
apply(tags, path.node(skel, i).stacmd, i - 1);

// tag commands on transitions (TDFA(0), TDFA(1))
Node::wciter_t &a = arcs[i];
apply(tags, a->cmd, i + offby);

// advance character iterator
// if it's the last one, then switch to the next arc
if (--chars[i] == 0) {
++a;
chars[i] = nsteps(a->lower, a->upper);
}
}
}
// tag commands in final states
const tcmd_t *fcmd = path.node(skel, f).cmd;
for (size_t w = 0; w < width; ++w) {

// tag commands in final states
const tcmd_t *fcmd = path.node(skel, f).cmd;

// staDFA, -1 because of "delayed store"
apply(&tags[w * nver], path.node(skel, f).stacmd, f - 1);
apply(tags, path.node(skel, f).stacmd, f - 1);

// TDFA(1)
apply(&tags[w * nver], fcmd, f);
}

const size_t rule = path.node(skel, f).rule;
size_t matched = 0, ltag = 0, htag = 0, trail = 0;
if (rule != Rule::NONE) {
apply(tags, fcmd, f);

const Rule &r = skel.rules[rule];
ltag = r.ltag;
htag = r.htag;
trail = r.ttag;

if (trail == htag) {
// no trailing context
matched = f;
} else {
const Tag &tag = skel.tags[trail];
if (!fixed(tag)) {
// variable-length trailing context
matched = tags[skel.finvers[trail]].back();
} else if (tag.base != Tag::RIGHTMOST) {
// fixed-length trailing context based on tag
matched = tags[skel.finvers[tag.base]].back() - tag.dist;
size_t matched = 0;
if (rule != Rule::NONE) {
if (trail == htag) {
// no trailing context
matched = f;
} else {
// fixed-length trailing context based on cursor
matched = f - tag.dist;
const Tag &tag = skel.tags[trail];
if (!fixed(tag)) {
// variable-length trailing context
matched = tags[skel.finvers[trail]].back();
} else if (tag.base != Tag::RIGHTMOST) {
// fixed-length trailing context based on tag
matched = tags[skel.finvers[tag.base]].back() - tag.dist;
} else {
// fixed-length trailing context based on cursor
matched = f - tag.dist;
}
}
DASSERT(matched != Skeleton::DEFTAG);
}
DASSERT(matched != Skeleton::DEFTAG);
}

// count keys
size_t nkey = 0;
for (size_t w = 0; w < width; ++w) {
// count keys
size_t nkey = 0;
nkey += 3;
for (size_t t = ltag; t < htag; ++t) {
const Tag &tag = skel.tags[t];
if (t == trail || fictive(tag)) continue;
const size_t
base = fixed(tag) ? tag.base : t,
bver = static_cast<size_t>(skel.finvers[base]);
if (history(tag)) nkey += tags[w * nver + bver].size();
if (history(tag)) nkey += tags[bver].size();
++nkey;
}
}

// keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags
key_t *keys = new key_t[nkey], *k = keys;
for (size_t w = 0; w < width; ++w) {
// keys: 1 - scanned length, 2 - matched length, 3 - matched rule, the rest - tags
key_t *keys = new key_t[nkey], *k = keys;

*k++ = to_le(static_cast<key_t>(path.len()));
*k++ = to_le(static_cast<key_t>(matched));
*k++ = to_le(rule2key<key_t>(rule, skel.def_rule));
Expand All @@ -265,7 +279,7 @@ static void write_keys(const path_t &path, const Skeleton &skel,
DASSERT(!fixed(tag));
// variable-length tag
const size_t tver = static_cast<size_t>(skel.finvers[t]);
const std::vector<size_t> &h = tags[w * nver + tver];
const std::vector<size_t> &h = tags[tver];
const size_t hlen = h.size();

// Abort if history length exceeds maximum value of key type.
Expand All @@ -287,11 +301,11 @@ static void write_keys(const path_t &path, const Skeleton &skel,
if (!fixed(tag)) {
// variable-length tag
const size_t tver = static_cast<size_t>(skel.finvers[t]);
tval = tags[w * nver + tver].back();
tval = tags[tver].back();
} else if (tag.base != Tag::RIGHTMOST) {
// fixed-length tag based on another tag
const size_t tver = static_cast<size_t>(skel.finvers[tag.base]);
tval = tags[w * nver + tver].back();
tval = tags[tver].back();
if (tval != Skeleton::DEFTAG) tval -= tag.dist;
} else {
// fixed-length tag based on cursor
Expand All @@ -300,13 +314,12 @@ static void write_keys(const path_t &path, const Skeleton &skel,
*k++ = to_le(static_cast<key_t>(tval));
}
}
}

// dump to file
fwrite(keys, sizeof(key_t), nkey, file);
// dump to file
fwrite(keys, sizeof(key_t), nkey, file);

delete[] tags;
delete[] keys;
delete[] keys;
}
}

template<typename cunit_t, typename key_t>
Expand Down
2 changes: 2 additions & 0 deletions src/skeleton/skeleton.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ Skeleton::Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name,
, rules(dfa.rules)
, tags(dfa.tags)
, finvers(dfa.finvers)
, tagvals(new std::vector<size_t>[ntagver])
{
// initialize nodes
const size_t nil = nodes_count - 1;
Expand All @@ -89,6 +90,7 @@ Skeleton::Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name,

Skeleton::~Skeleton()
{
delete[] tagvals;
delete[] nodes;
}

Expand Down
2 changes: 2 additions & 0 deletions src/skeleton/skeleton.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ struct Skeleton
const std::vector<Tag> &tags;
const tagver_t *finvers;

std::vector<size_t> *tagvals;

Skeleton(const dfa_t &dfa, const opt_t *opts, const std::string &name,
const std::string &cond, const loc_t &loc, Msg &msg);
~Skeleton ();
Expand Down

0 comments on commit e8cdf1d

Please sign in to comment.