Skip to content

Commit

Permalink
Benchmarks (C/C++): remove deprecated TDFA(0) and StaDFA algorithms.
Browse files Browse the repository at this point in the history
For anyone interested, see the paper by Borsotti and Trafimovich
"A Closer Look at TDFA" (https://arxiv.org/abs/2206.01398). It has benchmark
results that cover the removed algorithms.
  • Loading branch information
skvadrik committed Dec 6, 2024
1 parent 4ff4851 commit 96e06c7
Show file tree
Hide file tree
Showing 217 changed files with 1,228 additions and 663,351 deletions.
22 changes: 0 additions & 22 deletions benchmarks/_engines/re2c/getre2c3.sh

This file was deleted.

65 changes: 18 additions & 47 deletions benchmarks/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,62 +129,33 @@ endforeach()
file(MAKE_DIRECTORY "${ENG_DIR}/re2c/")
file(MAKE_DIRECTORY "${GEN_DIR}/re2c/")
set(RE2C "${CMAKE_BINARY_DIR}/re2c")
set(RE2C3 "${ENG_DIR}/re2c/re2c3")
add_custom_command(
OUTPUT "${RE2C3}"
COMMAND "${CMAKE_CURRENT_SOURCE_DIR}/../_engines/re2c/getre2c3.sh"
WORKING_DIRECTORY "${ENG_DIR}/re2c/"
)
set(RE2C_FLAGS "--reusable" "--tags" "--no-generation-date" "--no-version")
set(COMMON_RE2C
"${SRC_DIR}/re2c/common.re"
"${SRC_DIR}/re2c/include/fill.re"
"${SRC_DIR}/re2c/include/fill_email.re"
"${SRC_DIR}/re2c/include-eof/fill.re"
"${SRC_DIR}/re2c/include-eof/fill_email.re"
"${SRC_DIR}/re2c/include-padding/fill.re"
"${SRC_DIR}/re2c/include-padding/fill_email.re"
"${SRC_DIR}/re2c/include-eofrule/fill.re"
"${SRC_DIR}/re2c/include-eofrule/fill_email.re"
)
# deprecated algorithms were removed after re2c-3.0
set(DEPRECATED_ALGS "tdfa0" "stadfa")
# always regenerate re2c benchmarks
foreach(bench ${BENCHMARKS})
foreach(eof "" "-eof")
foreach(alg "tdfa1" "tdfa0" "stadfa")
foreach(eof "-padding" "-eofrule")
set(src_file "${SRC_DIR}/re2c/${bench}.re")
set(gen_file "${GEN_DIR}/re2c/${bench}${eof}-${alg}.c")
set(pregen_file "${PREGEN_DIR}/re2c/${bench}${eof}-${alg}.c")
# always regenerate re2c benchmarks, except for deprecated algorithms
if(RE2C_REGEN_BENCHMARKS OR NOT (alg IN_LIST DEPRECATED_ALGS))
# for deprecated algorithms use re2c-3.0
if(alg IN_LIST DEPRECATED_ALGS)
set(re2c_for_gen "${RE2C3}")
else()
set(re2c_for_gen "${RE2C}")
endif()
file(RELATIVE_PATH rel_src_file "${CMAKE_CURRENT_BINARY_DIR}" "${src_file}")
file(RELATIVE_PATH rel_gen_file "${CMAKE_CURRENT_BINARY_DIR}" "${gen_file}")
file(RELATIVE_PATH rel_inc_path "${CMAKE_CURRENT_BINARY_DIR}" "${SRC_DIR}/re2c/include${eof}")
set(re2c_flags ${RE2C_FLAGS} "-I" "${rel_inc_path}")
if("${alg}" STREQUAL "tdfa0")
set(re2c_flags ${re2c_flags} "--no-lookahead")
elseif("${alg}" STREQUAL "stadfa")
set(re2c_flags ${re2c_flags} "--stadfa")
endif()
add_custom_command(
OUTPUT "${gen_file}"
COMMAND "${re2c_for_gen}" ${re2c_flags} "${rel_src_file}" -o "${rel_gen_file}"
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${gen_file}" "${pregen_file}"
DEPENDS "${src_file}" ${COMMON_RE2C} "${re2c_for_gen}"
)
else()
add_custom_command(
OUTPUT "${gen_file}"
COMMAND "${CMAKE_COMMAND}" -E copy "${pregen_file}" "${gen_file}"
DEPENDS "${pregen_file}"
)
endif()
set(gen_file "${GEN_DIR}/re2c/${bench}${eof}.c")
set(pregen_file "${PREGEN_DIR}/re2c/${bench}${eof}.c")
file(RELATIVE_PATH rel_src_file "${CMAKE_CURRENT_BINARY_DIR}" "${src_file}")
file(RELATIVE_PATH rel_gen_file "${CMAKE_CURRENT_BINARY_DIR}" "${gen_file}")
file(RELATIVE_PATH rel_inc_path "${CMAKE_CURRENT_BINARY_DIR}" "${SRC_DIR}/re2c/include${eof}")
set(re2c_flags ${RE2C_FLAGS} "-I" "${rel_inc_path}")
add_custom_command(
OUTPUT "${gen_file}"
COMMAND "${RE2C}" ${re2c_flags} "${rel_src_file}" -o "${rel_gen_file}"
COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${gen_file}" "${pregen_file}"
DEPENDS "${src_file}" ${COMMON_RE2C} "${RE2C}"
)
list(APPEND GEN "${gen_file}")
endforeach()
endforeach()
endforeach()

# binaries
set(OBJ "")
Expand Down
72 changes: 18 additions & 54 deletions benchmarks/c/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,10 @@ DAT_DIR = data
CFLAGS = -O3 -I $(SRC_DIR)
RAGEL = $(ENG_DIR)/ragel/ragel7
KLEENEX = $(ENG_DIR)/kleenex/kexc
RE2C3 = $(ENG_DIR)/re2c/re2c3
RE2C = $(top_builddir)/re2c
RE2C_FLAGS_COMMON = --reusable --tags --no-generation-date --no-version
RE2C_FLAGS = $(RE2C_FLAGS_COMMON) -I $(SRC_DIR)/re2c/include
RE2C_FLAGS_EOF = $(RE2C_FLAGS_COMMON) -I $(SRC_DIR)/re2c/include-eof
RE2C_FLAGS_PADDING = $(RE2C_FLAGS_COMMON) -I $(SRC_DIR)/re2c/include-padding
RE2C_FLAGS_EOFRULE = $(RE2C_FLAGS_COMMON) -I $(SRC_DIR)/re2c/include-eofrule

BENCHMARKS = \
submatch_00__http_rfc7230 \
Expand Down Expand Up @@ -61,31 +60,21 @@ BIN_RAGEL = $(patsubst $(GEN_DIR)%.c, $(BIN_DIR)%, $(GEN_RAGEL))

COMMON_RAGEL = $(COMMON_SRC) $(SRC_DIR)/ragel/common.c

GEN_RE2C_TDFA1 = $(patsubst %, $(GEN_DIR)/re2c/%-tdfa1.c, $(BENCHMARKS))
GEN_RE2C_EOF_TDFA1 = $(patsubst %, $(GEN_DIR)/re2c/%-eof-tdfa1.c, $(BENCHMARKS))
GEN_RE2C_PADDING = $(patsubst %, $(GEN_DIR)/re2c/%-padding.c, $(BENCHMARKS))
GEN_RE2C_EOFRULE = $(patsubst %, $(GEN_DIR)/re2c/%-eofrule.c, $(BENCHMARKS))
GEN_RE2C = \
$(GEN_RE2C_TDFA1) \
$(GEN_RE2C_EOF_TDFA1)
$(GEN_RE2C_PADDING) \
$(GEN_RE2C_EOFRULE)

GEN_RE2C3_TDFA0 = $(patsubst %, $(GEN_DIR)/re2c/%-tdfa0.c, $(BENCHMARKS))
GEN_RE2C3_EOF_TDFA0 = $(patsubst %, $(GEN_DIR)/re2c/%-eof-tdfa0.c, $(BENCHMARKS))
GEN_RE2C3_STADFA = $(patsubst %, $(GEN_DIR)/re2c/%-stadfa.c, $(BENCHMARKS))
GEN_RE2C3_EOF_STADFA = $(patsubst %, $(GEN_DIR)/re2c/%-eof-stadfa.c, $(BENCHMARKS))
GEN_RE2C3 = \
$(GEN_RE2C_TDFA0) \
$(GEN_RE2C_EOF_TDFA0) \
$(GEN_RE2C3_STADFA) \
$(GEN_RE2C3_EOF_STADFA)

BIN_RE2C = $(patsubst $(GEN_DIR)%.c, $(BIN_DIR)%, $(GEN_RE2C) $(GEN_RE2C3))
BIN_RE2C = $(patsubst $(GEN_DIR)%.c, $(BIN_DIR)%, $(GEN_RE2C))

COMMON_RE2C = \
$(COMMON_SRC) \
$(SRC_DIR)/re2c/common.re \
$(SRC_DIR)/re2c/include/fill.re \
$(SRC_DIR)/re2c/include/fill_email.re \
$(SRC_DIR)/re2c/include-eof/fill.re \
$(SRC_DIR)/re2c/include-eof/fill_email.re
$(SRC_DIR)/re2c/include-padding/fill.re \
$(SRC_DIR)/re2c/include-padding/fill_email.re \
$(SRC_DIR)/re2c/include-eofrule/fill.re \
$(SRC_DIR)/re2c/include-eofrule/fill_email.re

# Masked benchmarks for which Kleenex either generates very large output
# (tens of megabytes of C code), or even causes out of memory condition.
Expand All @@ -100,7 +89,7 @@ GEN_KLEENEX = $(patsubst %, $(GEN_DIR)/kleenex/%.c, \

BIN_KLEENEX = $(patsubst $(GEN_DIR)%.c, $(BIN_DIR)%, $(GEN_KLEENEX))

GEN = $(GEN_RAGEL) $(GEN_RE2C) $(GEN_RE2C3) $(GEN_KLEENEX)
GEN = $(GEN_RAGEL) $(GEN_RE2C) $(GEN_KLEENEX)

BIN_ = $(BIN_RAGEL) $(BIN_RE2C) $(BIN_KLEENEX)
BIN_GCC = $(patsubst %, %-gcc, $(BIN_))
Expand All @@ -118,42 +107,21 @@ all-local: $(BIN) $(DAT)

# always regenerate re2c benchmarks (except for deprecated algorithms)

$(GEN_RE2C_TDFA1): $(GEN_DIR)/%-tdfa1.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C)
$(GEN_RE2C_PADDING): $(GEN_DIR)/%-padding.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C) $(RE2C_FLAGS) $< -o $@
$(AM_V_GEN)$(RE2C) $(RE2C_FLAGS_PADDING) $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi

$(GEN_RE2C_EOF_TDFA1): $(GEN_DIR)/%-eof-tdfa1.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C)
$(GEN_RE2C_EOFRULE): $(GEN_DIR)/%-eofrule.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C) $(RE2C_FLAGS_EOF) $< -o $@
$(AM_V_GEN)$(RE2C) $(RE2C_FLAGS_EOFRULE) $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi


# optionally regenerate ragel, kleenex and re2c-3.0 benchmarks
# (TDFA(0) and staDFA algorithms were removed in re2c versions after 3.0)
# optionally regenerate ragel benchmarks

if REGEN_BENCHMARKS

$(GEN_RE2C3_TDFA0): $(GEN_DIR)/%-tdfa0.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C3)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C3) $(RE2C_FLAGS) --no-lookahead $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi

$(GEN_RE2C3_EOF_TDFA0): $(GEN_DIR)/%-eof-tdfa0.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C3)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C3) $(RE2C_FLAGS_EOF) --no-lookahead $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi

$(GEN_RE2C3_STADFA): $(GEN_DIR)/%-stadfa.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C3)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C3) $(RE2C_FLAGS) --stadfa $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi

$(GEN_RE2C3_EOF_STADFA): $(GEN_DIR)/%-eof-stadfa.c: $(SRC_DIR)/%.re $(COMMON_RE2C) $(RE2C3)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RE2C3) $(RE2C_FLAGS_EOF) --stadfa $< -o $@
$(AM_V_at)if ! cmp -s $@ $(PREGEN_DIR)/re2c/$(@F) ; then cp -f $@ $(PREGEN_DIR)/re2c/$(@F) ; fi

$(GEN_RAGEL): $(GEN_DIR)/%.c: $(SRC_DIR)/%.rl $(COMMON_RAGEL) $(RAGEL)
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)$(RAGEL) -G2 $< -o $@
Expand All @@ -171,7 +139,7 @@ $(GEN_KLEENEX): $(GEN_DIR)/%: $(PREGEN_DIR)/%

else

$(GEN_RAGEL) $(GEN_RE2C3) $(GEN_KLEENEX): $(GEN_DIR)/%: $(PREGEN_DIR)/%
$(GEN_RAGEL) $(GEN_KLEENEX): $(GEN_DIR)/%: $(PREGEN_DIR)/%
$(AM_V_at)mkdir -p $(@D)
$(AM_V_GEN)cp -f $< $@

Expand Down Expand Up @@ -199,9 +167,5 @@ $(RAGEL):
$(AM_V_at)mkdir -p $(@D) && cp $(srcdir)/../_engines/ragel/getragel7.sh $(@D)
$(AM_V_GEN)( cd $(@D) && ./getragel7.sh )

$(RE2C3):
$(AM_V_at)mkdir -p $(@D) && cp $(srcdir)/../_engines/re2c/getre2c3.sh $(@D)
$(AM_V_GEN)( cd $(@D) && ./getre2c3.sh )

clean-local:
$(AM_V_at)rm -f $(GEN) $(OBJ) $(BIN) $(DAT)
Loading

0 comments on commit 96e06c7

Please sign in to comment.