Skip to content

Commit 15c92de

Browse files
tomvdwThe TensorFlow Datasets Authors
authored andcommitted
Add method to the dataset builder base class to read a text file, which records lineage
PiperOrigin-RevId: 523331131
1 parent 4b8fe19 commit 15c92de

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

tensorflow_datasets/core/dataset_builder.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1540,6 +1540,12 @@ def _download_and_prepare( # pytype: disable=signature-mismatch # overriding-p
15401540
split_dict = splits_lib.SplitDict(split_infos)
15411541
self.info.set_splits(split_dict)
15421542

1543+
def read_text_file(self, filename: epath.PathLike) -> str:
1544+
"""Returns the text in the given file and records the lineage."""
1545+
filename = epath.Path(filename)
1546+
self.info.add_file_data_source_access(filename)
1547+
return filename.read_text()
1548+
15431549
def read_tfrecord_as_dataset(
15441550
self, filenames: Union[str, Sequence[str]]
15451551
) -> tf.data.Dataset:

0 commit comments

Comments
 (0)