Skip to content

Commit

Permalink
Add random shuffling to dataset generation
Browse files Browse the repository at this point in the history
  • Loading branch information
surmenok committed Feb 12, 2017
1 parent d8ac7a0 commit f5e4c59
Showing 1 changed file with 6 additions and 0 deletions.
6 changes: 6 additions & 0 deletions textsum_data_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@
import tensorflow as tf
from tensorflow.core.example import example_pb2

from numpy.random import seed as random_seed
from numpy.random import shuffle as random_shuffle

random_seed(123) # Reproducibility

FLAGS = tf.app.flags.FLAGS
tf.app.flags.DEFINE_string('command', 'text_to_binary',
Expand All @@ -28,6 +32,8 @@
def _text_to_binary(input_directories, output_filenames, split_fractions):
filenames = _get_filenames(input_directories)

random_shuffle(filenames)

start_from_index = 0
for index, output_filename in enumerate(output_filenames):
sample_count = int(len(filenames) * split_fractions[index])
Expand Down

0 comments on commit f5e4c59

Please sign in to comment.