Skip to content

Commit

Permalink
Restructure benchmark layout (prepare to add new benchmarks).
Browse files Browse the repository at this point in the history
Update pre-generated benchmark files (trivial line directive changes).

Disable regeneration of Kleenex files, as Kleenex is not actively maintained and
fails to build with the recent cabal and Haskell ecosystem.
  • Loading branch information
skvadrik committed Dec 6, 2024
1 parent e5cb9a1 commit 4ff4851
Show file tree
Hide file tree
Showing 479 changed files with 95,184 additions and 54,823 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,20 +88,20 @@ jobs:
run: cmake --build .build

- name: Run submatch DFA AOT benchmark
working-directory: .build/benchmarks/submatch_dfa_aot
working-directory: .build/benchmarks/c
run: |
./run.py --quickverify \
&& ./run.py --repetitions 5 --output results.json \
&& cat results.json
- name: Run submatch DFA JIT benchmark
working-directory: .build/benchmarks/submatch_dfa_jit
run: ./bench_submatch_dfa_jit
working-directory: .build/benchmarks/c/libre2c/jit
run: ./bench_submatch_jit

- name: Run submatch Java benchmark (ReTdfa)
working-directory: .build/benchmarks/submatch_java
working-directory: .build/benchmarks/c/libre2c/java
run: ./run.py

- name: Run submatch NFA benchmark
working-directory: .build/benchmarks/submatch_nfa
working-directory: .build/benchmarks/c/libre2c/nfa
run: ./bench_submatch_nfa
4 changes: 2 additions & 2 deletions .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "benchmarks/submatch_java/REgen"]
path = benchmarks/submatch_java/REgen
[submodule "benchmarks/c/libre2c/java/REgen"]
path = benchmarks/c/libre2c/java/REgen
url = https://github.com/skvadrik/REgen.git
8 changes: 4 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -633,9 +633,9 @@ if (RE2C_BUILD_LIBS)
endif()

if(RE2C_BUILD_BENCHMARKS)
add_subdirectory(benchmarks/submatch_nfa)
add_subdirectory(benchmarks/submatch_dfa_jit)
add_subdirectory(benchmarks/submatch_java)
add_subdirectory(benchmarks/c/libre2c/nfa)
add_subdirectory(benchmarks/c/libre2c/jit)
add_subdirectory(benchmarks/c/libre2c/java)
endif()
else()
# empty check target
Expand All @@ -645,7 +645,7 @@ else()
endif()

if(RE2C_BUILD_BENCHMARKS)
add_subdirectory(benchmarks/submatch_dfa_aot)
add_subdirectory(benchmarks/c)
endif()

if(RE2C_BUILD_TESTS)
Expand Down
4 changes: 2 additions & 2 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -761,8 +761,8 @@ TESTS = \

# benchmarks
if WITH_BENCHMARKS
SUBDIRS += benchmarks/submatch_dfa_aot
EXTRA_DIST += benchmarks/submatch_dfa_aot
SUBDIRS += benchmarks/c
EXTRA_DIST += benchmarks/c
endif

# libre2c
Expand Down
12 changes: 6 additions & 6 deletions Makefile.lib.am
Original file line number Diff line number Diff line change
Expand Up @@ -169,12 +169,12 @@ check_PROGRAMS += test_libre2c

# benchmarks
if WITH_BENCHMARKS
SUBDIRS += benchmarks/submatch_nfa
SUBDIRS += benchmarks/submatch_dfa_jit
EXTRA_DIST += benchmarks/submatch_nfa
EXTRA_DIST += benchmarks/submatch_dfa_jit
SUBDIRS += benchmarks/c/libre2c/nfa
SUBDIRS += benchmarks/c/libre2c/jit
EXTRA_DIST += benchmarks/c/libre2c/nfa
EXTRA_DIST += benchmarks/c/libre2c/jit
if WITH_JAVA
SUBDIRS += benchmarks/submatch_java
EXTRA_DIST += benchmarks/submatch_java
SUBDIRS += benchmarks/c/libre2c/java
EXTRA_DIST += benchmarks/c/libre2c/java
endif # WITH_JAVA
endif # WITH_BENCHMARKS
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
"""


VARIANTS = ['dfa_aot', 'dfa_jit', 'nfa']
VARIANTS = ['aot', 'jit', 'nfa']


def parse_args():
Expand Down Expand Up @@ -192,10 +192,10 @@ class Params:
def __init__(self, args):
self.barwidth = 0.08

if args.variant == 'dfa_aot':
self.override_params_dfa_aot()
elif args.variant == 'dfa_jit':
self.override_params_dfa_jit()
if args.variant == 'aot':
self.override_params_aot()
elif args.variant == 'jit':
self.override_params_jit()
elif args.variant == 'nfa':
self.override_params_nfa()

Expand All @@ -210,7 +210,7 @@ def __init__(self, args):
if args.relative_to: # override only if set
self.relative_to = args.relative_to

def override_params_dfa_aot(self):
def override_params_aot(self):
self.plotwidth = 1.5
self.xshift = 2 * self.plotwidth
self.header = """
Expand All @@ -225,7 +225,7 @@ def override_params_dfa_aot(self):
self.global_styles = ""
self.local_styles = plot_labels_and_title % 'left'

def override_params_dfa_jit(self):
def override_params_jit(self):
self.plotwidth = 2.7
self.xshift = self.plotwidth
self.header = """
Expand Down
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -1,42 +1,42 @@
set(BENCHMARKS
"00__http_rfc7230"
"01__http_simple"
"02__uri_rfc3986"
"03__uri_simple"
"04__apache_log"
"05__datetime"
"06__email"
"07__ipv4"
"submatch_00__http_rfc7230"
"submatch_01__http_simple"
"submatch_02__uri_rfc3986"
"submatch_03__uri_simple"
"submatch_04__apache_log"
"submatch_05__datetime"
"submatch_06__email"
"submatch_07__ipv4"

"10__alt1_2"
"11__alt1_4"
"12__alt1_8"
"13__alt2_2"
"14__alt2_4"
"15__alt2_8"
"16__alt4_2"
"17__alt4_4"
"18__alt4_8"
"submatch_10__alt1_2"
"submatch_11__alt1_4"
"submatch_12__alt1_8"
"submatch_13__alt2_2"
"submatch_14__alt2_4"
"submatch_15__alt2_8"
"submatch_16__alt4_2"
"submatch_17__alt4_4"
"submatch_18__alt4_8"

"20__cat2_0"
"21__cat2_4"
"22__cat2_8"
"23__cat4_0"
"24__cat4_2"
"25__cat4_4"
"26__cat8_0"
"27__cat8_1"
"28__cat8_2"
"submatch_20__cat2_0"
"submatch_21__cat2_4"
"submatch_22__cat2_8"
"submatch_23__cat4_0"
"submatch_24__cat4_2"
"submatch_25__cat4_4"
"submatch_26__cat8_0"
"submatch_27__cat8_1"
"submatch_28__cat8_2"

"30__rep_cat_5_3_2"
"31__rep_cat_13_11_7"
"32__rep_cat_23_19_17"
"33__rep_alt_5_3_2"
"34__rep_alt_13_11_7"
"35__rep_alt_23_19_17"
"36__rep_5_rep_3_rep_2"
"37__rep_13_rep_11_rep_7"
"38__rep_23_rep_19_rep_17"
"submatch_30__rep_cat_5_3_2"
"submatch_31__rep_cat_13_11_7"
"submatch_32__rep_cat_23_19_17"
"submatch_33__rep_alt_5_3_2"
"submatch_34__rep_alt_13_11_7"
"submatch_35__rep_alt_23_19_17"
"submatch_36__rep_5_rep_3_rep_2"
"submatch_37__rep_13_rep_11_rep_7"
"submatch_38__rep_23_rep_19_rep_17"
)

set(SRC_DIR "${CMAKE_CURRENT_SOURCE_DIR}/src")
Expand All @@ -53,16 +53,16 @@ file(MAKE_DIRECTORY "${GEN_DIR}/kleenex/")
set(KLEENEX "${ENG_DIR}/kleenex/kexc")
add_custom_command(
OUTPUT "${KLEENEX}"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/engines/kleenex/getkleenex.sh"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/../_engines/kleenex/getkleenex.sh"
WORKING_DIRECTORY "${ENG_DIR}/kleenex/"
)
# Masked benchmarks for which Kleenex either generates very large output
# (tens of megabytes of C code), or even causes out of memory condition.
set(KLEENEX_MASKED
"34__rep_alt_13_11_7"
"35__rep_alt_23_19_17"
"37__rep_13_rep_11_rep_7"
"38__rep_23_rep_19_rep_17"
"submatch_34__rep_alt_13_11_7"
"submatch_35__rep_alt_23_19_17"
"submatch_37__rep_13_rep_11_rep_7"
"submatch_38__rep_23_rep_19_rep_17"
)
list(JOIN KLEENEX_MASKED "|" KLEENEX_MASKED_REGEXP)
set(KLEENEX_BENCHMARKS ${BENCHMARKS})
Expand All @@ -71,23 +71,24 @@ foreach(bench ${KLEENEX_BENCHMARKS})
set(src_file "${SRC_DIR}/kleenex/${bench}.kex")
set(gen_file "${GEN_DIR}/kleenex/${bench}.c")
set(pregen_file "${PREGEN_DIR}/kleenex/${bench}.c")
# optionally regenerate kleenex benchmarks if RE2C_REGEN_BENCHMARKS is set
if(RE2C_REGEN_BENCHMARKS)
file(RELATIVE_PATH rel_src_file "${CMAKE_CURRENT_BINARY_DIR}" "${src_file}")
file(RELATIVE_PATH rel_gen_file "${CMAKE_CURRENT_BINARY_DIR}" "${gen_file}")
add_custom_command(
OUTPUT "${gen_file}"
COMMAND "${KLEENEX}" compile "${rel_src_file}" --srcout "${rel_gen_file}" --act=false --la=true
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${gen_file}" "${pregen_file}"
DEPENDS "${src_file}" "${KLEENEX}"
)
else()
# don't regenerate kleenex benchmarks, as kleenex is not actively developed
# and it cannot be built with the recent cabal / haskell ecosystem
# if(RE2C_REGEN_BENCHMARKS)
# file(RELATIVE_PATH rel_src_file "${CMAKE_CURRENT_BINARY_DIR}" "${src_file}")
# file(RELATIVE_PATH rel_gen_file "${CMAKE_CURRENT_BINARY_DIR}" "${gen_file}")
# add_custom_command(
# OUTPUT "${gen_file}"
# COMMAND "${KLEENEX}" compile "${rel_src_file}" --srcout "${rel_gen_file}" --act=false --la=true
# COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${gen_file}" "${pregen_file}"
# DEPENDS "${src_file}" "${KLEENEX}"
# )
# else()
add_custom_command(
OUTPUT "${gen_file}"
COMMAND "${CMAKE_COMMAND}" -E copy "${pregen_file}" "${gen_file}"
DEPENDS "${pregen_file}"
)
endif()
# endif()
list(APPEND GEN "${gen_file}")
endforeach()

Expand All @@ -97,7 +98,7 @@ file(MAKE_DIRECTORY "${GEN_DIR}/ragel/")
set(RAGEL "${ENG_DIR}/ragel/ragel7")
add_custom_command(
OUTPUT "${RAGEL}"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/engines/ragel/getragel7.sh"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/../_engines/ragel/getragel7.sh"
WORKING_DIRECTORY "${ENG_DIR}/ragel/"
)
foreach(bench ${BENCHMARKS})
Expand Down Expand Up @@ -131,7 +132,7 @@ set(RE2C "${CMAKE_BINARY_DIR}/re2c")
set(RE2C3 "${ENG_DIR}/re2c/re2c3")
add_custom_command(
OUTPUT "${RE2C3}"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/engines/re2c/getre2c3.sh"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/../_engines/re2c/getre2c3.sh"
WORKING_DIRECTORY "${ENG_DIR}/re2c/"
)
set(RE2C_FLAGS "--reusable" "--tags" "--no-generation-date" "--no-version")
Expand Down Expand Up @@ -220,13 +221,13 @@ endfunction()

# input data
set(DAT)
# Some benchmarks share the same data, like 03__uri_simple and 02__uri_rfc3986.
regex_replace_list("^[0-9]+__([^_]+).*" "\\1" INPUTS ${BENCHMARKS})
# Some benchmarks share the same data, like submatch_03__uri_simple and submatch_02__uri_rfc3986.
regex_replace_list("^submatch_[0-9]+__([^_]+).*" "\\1" INPUTS ${BENCHMARKS})
list(REMOVE_DUPLICATES INPUTS)
foreach(input ${INPUTS})
set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/data/${input}")
set(src_dir "${CMAKE_CURRENT_SOURCE_DIR}/../_data/${input}")
set(dst_dir "${CMAKE_CURRENT_BINARY_DIR}/data/${input}")
file(GLOB script "${CMAKE_CURRENT_SOURCE_DIR}/data/gen.py")
file(GLOB script "${CMAKE_CURRENT_SOURCE_DIR}/../_data/gen.py")
add_custom_command(
OUTPUT "${dst_dir}/small"
COMMAND "${CMAKE_COMMAND}" -E copy "${src_dir}/small" "${dst_dir}/small"
Expand All @@ -241,7 +242,7 @@ foreach(input ${INPUTS})
list(APPEND DAT "${dst_dir}/small" "${dst_dir}/big")
endforeach()

add_custom_target(bench_submatch_dfa_aot ALL DEPENDS ${BIN} ${DAT})
add_custom_target(bench_submatch_aot ALL DEPENDS ${BIN} ${DAT})

# benchmark scripts
set(srcdir "${CMAKE_CURRENT_SOURCE_DIR}")
Expand Down
Loading

0 comments on commit 4ff4851

Please sign in to comment.