Skip to content

Commit

Permalink
Extra benchmark & note about the trie building
Browse files Browse the repository at this point in the history
  • Loading branch information
kmike committed Jul 30, 2012
1 parent 83af0e5 commit 9b08ebd
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 16 deletions.
25 changes: 14 additions & 11 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -223,17 +223,20 @@ unicode)::
trie.keys(prefix="xxx"), NON_EXISTING: 1857.531K ops/sec
trie.values(prefix="xxx"), NON_EXISTING: 1822.818K ops/sec

Insert time is very slow compared to dict, this is the limitation
of double-array tries; updates are quite fast::

dict __setitem__ (updates): 3.489M ops/sec
trie __setitem__ (updates): 1.862M ops/sec
dict __setitem__ (inserts): 3.628M ops/sec
trie __setitem__ (inserts): 0.050M ops/sec
dict setdefault (updates): 2.575M ops/sec
trie setdefault (updates): 1.600M ops/sec
dict setdefault (inserts): 2.596M ops/sec
trie setdefault (inserts): 0.050M ops/sec
Random insert time is very slow compared to dict, this is the limitation
of double-array tries; updates are quite fast. If you want to build a trie,
consider sorting keys before the insertion::

dict __setitem__ (updates): 3.489M ops/sec
trie __setitem__ (updates): 1.862M ops/sec
dict __setitem__ (inserts, random): 3.628M ops/sec
trie __setitem__ (inserts, random): 0.050M ops/sec
dict __setitem__ (inserts, sorted): 3.272M ops/sec
trie __setitem__ (inserts, sorted): 0.585M ops/sec
dict setdefault (updates): 2.575M ops/sec
trie setdefault (updates): 1.600M ops/sec
dict setdefault (inserts): 2.596M ops/sec
trie setdefault (inserts): 0.050M ops/sec

Other results (note that ``len(trie)`` is currently implemented
using trie traversal)::
Expand Down
11 changes: 6 additions & 5 deletions bench/speed.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,9 @@ def benchmark():
('__contains__ (hits)', "for word in words: word in data", 'M ops/sec', 0.1, 3),
('__contains__ (misses)', "for word in NON_WORDS100k: word in data", 'M ops/sec', 0.1, 3),
('__len__', 'len(data)', ' ops/sec', 1, 1),
('__setitem__ (updates)', 'for word in words: data[word]=1', 'M ops/sec',0.1, 3),
('__setitem__ (inserts)', 'for word in NON_WORDS_10k: data[word]=1', 'M ops/sec',0.01, 3),
('__setitem__ (updates)', 'for word in words: data[word]=1', 'M ops/sec', 0.1, 3),
('__setitem__ (inserts, random)', 'for word in NON_WORDS_10k: data[word]=1', 'M ops/sec',0.01, 3),
('__setitem__ (inserts, sorted)', 'for word in words: empty_data[word]=1', 'M ops/sec', 0.1, 3),
('setdefault (updates)', 'for word in words: data.setdefault(word, 1)', 'M ops/sec', 0.1, 3),
('setdefault (inserts)', 'for word in NON_WORDS_10k: data.setdefault(word, 1)', 'M ops/sec', 0.01, 3),
('values()', 'list(data.values())', ' ops/sec', 1, 1),
Expand All @@ -96,14 +97,14 @@ def benchmark():
]

common_setup = """
from __main__ import create_trie, WORDS100k, NON_WORDS100k, MIXED_WORDS100k
from __main__ import create_trie, WORDS100k, NON_WORDS100k, MIXED_WORDS100k, datrie
from __main__ import PREFIXES_3_1k, PREFIXES_5_1k, PREFIXES_8_1k, PREFIXES_15_1k
words = WORDS100k
NON_WORDS_10k = NON_WORDS100k[:10000]
NON_WORDS_1k = ['ыва', 'xyz', 'соы', 'Axx', 'avы']*200
"""
dict_setup = common_setup + 'data = dict((word, 1) for word in words);'
trie_setup = common_setup + 'data = create_trie();'
dict_setup = common_setup + 'data = dict((word, 1) for word in words); empty_data=dict()'
trie_setup = common_setup + 'data = create_trie(); empty_data = datrie.Trie(ranges=[("\'", "\'"), ("A", "z"), ("А", "я")])'

for test_name, test, descr, op_count, repeats in tests:
t_dict = timeit.Timer(test, dict_setup)
Expand Down

0 comments on commit 9b08ebd

Please sign in to comment.