tensorflow
diff --git a/‎tensorflow_datasets/audio/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/audio/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/audio/libritts.py‎
Lines changed: 147 additions & 0 deletions b/‎tensorflow_datasets/audio/libritts.py‎
Lines changed: 147 additions & 0 deletions
diff --git a/‎tensorflow_datasets/audio/libritts_test.py‎
Lines changed: 49 additions & 0 deletions b/‎tensorflow_datasets/audio/libritts_test.py‎
Lines changed: 49 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/11/01/11_01.trans.tsv‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/11/01/11_01.trans.tsv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/11/01/11_01_0000_000.wav‎
65.4 KB b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/11/01/11_01_0000_000.wav‎
65.4 KB
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/12/02/12_02.trans.tsv‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/12/02/12_02.trans.tsv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/12/02/12_02_0000_000.wav‎
126 KB b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/dev-clean/12/02/12_02_0000_000.wav‎
126 KB
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/speakers.tsv‎
Lines changed: 15 additions & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-clean/LibriTTS/speakers.tsv‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-other/LibriTTS/dev-other/13/03/13_03.trans.tsv‎
Lines changed: 1 addition & 0 deletions b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-other/LibriTTS/dev-other/13/03/13_03.trans.tsv‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-other/LibriTTS/dev-other/13/03/13_03_0000_000.wav‎
65.4 KB b/‎tensorflow_datasets/testing/test_data/fake_examples/libritts/dev-other/LibriTTS/dev-other/13/03/13_03_0000_000.wav‎
65.4 KB
@@ -18,5 +18,6 @@
 from tensorflow_datasets.audio.groove import Groove
 from tensorflow_datasets.audio.librispeech import Librispeech
 from tensorflow_datasets.audio.librispeech import LibrispeechConfig
+from tensorflow_datasets.audio.libritts import Libritts
 from tensorflow_datasets.audio.nsynth import Nsynth
 from tensorflow_datasets.audio.speech_commands import SpeechCommands
@@ -0,0 +1,147 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""LibriTTS dataset."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import tensorflow.compat.v2 as tf
+
+import tensorflow_datasets.public_api as tfds
+
+_CITATION = """\
+@inproceedings{zen2019libritts,
+  title = {LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech},
+  author = {H. Zen and V. Dang and R. Clark and Y. Zhang and R. J. Weiss and Y. Jia and Z. Chen and Y. Wu},
+  booktitle = {Proc. Interspeech},
+  month = sep,
+  year = {2019},
+  doi = {10.21437/Interspeech.2019-2441},
+}
+"""
+
+_DESCRIPTION = """\
+LibriTTS is a multi-speaker English corpus of approximately 585 hours of read
+English speech at 24kHz sampling rate, prepared by Heiga Zen with the assistance
+of Google Speech and Google Brain team members. The LibriTTS corpus is designed
+for TTS research. It is derived from the original materials (mp3 audio files
+from LibriVox and text files from Project Gutenberg) of the LibriSpeech corpus.
+The main differences from the LibriSpeech corpus are listed below:
+
+1. The audio files are at 24kHz sampling rate.
+2. The speech is split at sentence breaks.
+3. Both original and normalized texts are included.
+4. Contextual information (e.g., neighbouring sentences) can be extracted.
+5. Utterances with significant background noise are excluded.
+"""
+
+_URL = "http://www.openslr.org/60"
+_DL_URL = "http://www.openslr.org/resources/60/"
+_DL_URLS = {
+    "dev_clean": _DL_URL + "dev-clean.tar.gz",
+    "dev_other": _DL_URL + "dev-other.tar.gz",
+    "test_clean": _DL_URL + "test-clean.tar.gz",
+    "test_other": _DL_URL + "test-other.tar.gz",
+    "train_clean100": _DL_URL + "train-clean-100.tar.gz",
+    "train_clean360": _DL_URL + "train-clean-360.tar.gz",
+    "train_other500": _DL_URL + "train-other-500.tar.gz",
+}
+
+
+class Libritts(tfds.core.BeamBasedBuilder):
+  """LibriTTS dataset."""
+
+  VERSION = tfds.core.Version("1.0.0")
+
+  def _info(self):
+    return tfds.core.DatasetInfo(
+        builder=self,
+        description=_DESCRIPTION,
+        features=tfds.features.FeaturesDict({
+            "speech": tfds.features.Audio(),
+            "text_original": tfds.features.Text(),
+            "text_normalized": tfds.features.Text(),
+            "speaker_id": tf.int64,
+            "chapter_id": tf.int64,
+            "id": tf.string,
+        }),
+        supervised_keys=("text_normalized", "speech"),
+        homepage=_URL,
+        citation=_CITATION,
+        metadata=tfds.core.MetadataDict(sample_rate=24000,),
+    )
+
+  def _populate_metadata(self, dirs):
+    # All dirs contain the same metadata.
+    directory = list(dirs.values())[0]
+
+    speaker_info = {}
+    path = os.path.join(directory, "LibriTTS/speakers.tsv")
+    with tf.io.gfile.GFile(path) as f:
+      for n, line in enumerate(f):
+        # Skip the first line which is just a header.
+        if n == 0:
+          continue
+        fields = line.strip().split("\t")
+        if len(fields) == 3:
+          # Some lines are missing the final field, so leave it blank.
+          fields.append("")
+        id_str, gender, subset, name = fields
+        speaker_info[int(id_str)] = {
+            "gender": gender,
+            "subset": subset,
+            "name": name,
+        }
+    self.info.metadata["speakers"] = speaker_info
+
+  def _split_generators(self, dl_manager):
+    extracted_dirs = dl_manager.download_and_extract(_DL_URLS)
+    self._populate_metadata(extracted_dirs)
+    splits = [tfds.core.SplitGenerator(name=k, gen_kwargs={"directory": v})
+              for k, v in extracted_dirs.items()]
+    return splits
+
+  def _build_pcollection(self, pipeline, directory):
+    """Generates examples as dicts."""
+    beam = tfds.core.lazy_imports.apache_beam
+    return (pipeline
+            | beam.Create([directory])
+            | beam.FlatMap(_generate_libritts_examples)
+            | beam.Reshuffle())
+
+
+def _generate_libritts_examples(directory):
+  """Generate examples from a LibriTTS directory."""
+  transcripts_glob = os.path.join(directory, "LibriTTS", "*/*/*/*.trans.tsv")
+  for transcript_file in tf.io.gfile.glob(transcripts_glob):
+    path = os.path.dirname(transcript_file)
+    with tf.io.gfile.GFile(os.path.join(path, transcript_file)) as f:
+      for line in f:
+        key, text_original, text_normalized = line.split("\t")
+        audio_file = "%s.wav" % key
+        speaker_id, chapter_id = [int(el) for el in key.split("_")[:2]]
+        example = {
+            "speech": os.path.join(path, audio_file),
+            "text_normalized": text_normalized,
+            "text_original": text_original,
+            "speaker_id": speaker_id,
+            "chapter_id": chapter_id,
+            "id": key,
+        }
+        yield key, example
@@ -0,0 +1,49 @@
+# coding=utf-8
+# Copyright 2020 The TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for libritts dataset module."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow_datasets import testing
+from tensorflow_datasets.audio import libritts
+
+
+class LibriTTSTest(testing.DatasetBuilderTestCase):
+  DATASET_CLASS = libritts.Libritts
+  SPLITS = {
+      "train_clean100": 2,
+      "train_clean360": 2,
+      "train_other500": 2,
+      "test_clean": 2,
+      "test_other": 2,
+      "dev_clean": 2,
+      "dev_other": 2,
+  }
+  DL_EXTRACT_RESULT = {
+      "train_clean100": "train-clean-100",
+      "train_clean360": "train-clean-360",
+      "train_other500": "train-other-500",
+      "test_clean": "test-clean",
+      "test_other": "test-other",
+      "dev_clean": "dev-clean",
+      "dev_other": "dev-other",
+  }
+
+
+if __name__ == "__main__":
+  testing.test_main()
@@ -0,0 +1 @@
+11_01_0000_000	GO DO YOU HEAR	go do you hear
@@ -0,0 +1 @@
+12_02_0000_000	FORGOTTEN TOO THE NAME OF GILLIAN THE LOVELY CAPTIVE	forgotten too the name of gillian the lovely captive
@@ -0,0 +1,15 @@
+READER	GENDER	SUBSET NAME
+11	F	dev-clean	Wolverine
+12	M	dev-clean	Hulk
+13	F	dev-other	Zimmer
+14	M	dev-other	Carla
+15	F	test-clean	Groot
+16	M	test-clean	Tony
+17	F	test-other	Anita
+18	M	test-other	John
+19	F	train-clean-100	Denny
+20	M	train-clean-100	Sean
+21	F	train-clean-360	Kristin
+22	M	train-clean-360	Linton
+23	F	train-other-500	Annie
+24	M	train-other-500	Martin
@@ -0,0 +1 @@
+13_03_0000_000	GO DO YOU HEAR	go do you hear
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+11_01_0000_000 GO DO YOU HEAR go do you hear`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+12_02_0000_000 FORGOTTEN TOO THE NAME OF GILLIAN THE LOVELY CAPTIVE forgotten too the name of gillian the lovely captive`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+13_03_0000_000 GO DO YOU HEAR go do you hear`