diff --git a/make_data_files.py b/make_data_files.py index b22ff38..bea6465 100644 --- a/make_data_files.py +++ b/make_data_files.py @@ -56,8 +56,8 @@ def write_to_bin(article_path, abstract_path, out_file, vocab_counter = None): abstract = next(abstract_itr).strip() tf_example = example_pb2.Example() - tf_example.features.feature['article'].bytes_list.value.extend([article]) - tf_example.features.feature['abstract'].bytes_list.value.extend([abstract]) + tf_example.features.feature['article'].bytes_list.value.extend([bytes(article, 'utf-8')]) + tf_example.features.feature['abstract'].bytes_list.value.extend([bytes(abstract, 'utf-8')]) tf_example_str = tf_example.SerializeToString() str_len = len(tf_example_str) writer.write(struct.pack('q', str_len))