Skip to content

Commit

Permalink
Add sentence start and sentence end tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
surmenok committed Feb 21, 2017
1 parent f5e4c59 commit 967b935
Showing 1 changed file with 2 additions and 0 deletions.
2 changes: 2 additions & 0 deletions textsum_data_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def _text_to_vocabulary(input_directories, vocabulary_filename, max_words=200000
with open(vocabulary_filename, 'w') as writer:
for word, count in counter.most_common(max_words - 2):
writer.write(word + ' ' + str(count) + '\n')
writer.write('<s> 0\n')
writer.write('</s> 0\n')
writer.write('<UNK> 0\n')
writer.write('<PAD> 0\n')

Expand Down

0 comments on commit 967b935

Please sign in to comment.