diff --git a/textsum_data_convert.py b/textsum_data_convert.py index 73f0397..7d40154 100644 --- a/textsum_data_convert.py +++ b/textsum_data_convert.py @@ -59,6 +59,8 @@ def _text_to_vocabulary(input_directories, vocabulary_filename, max_words=200000 with open(vocabulary_filename, 'w') as writer: for word, count in counter.most_common(max_words - 2): writer.write(word + ' ' + str(count) + '\n') + writer.write(' 0\n') + writer.write(' 0\n') writer.write(' 0\n') writer.write(' 0\n')