Add vocoder selection.

niknah · niknah · commit 8ebacb886f26 · 2025-01-30T23:38:36.000+11:00
#24
diff --git a/F5-TTS b/F5-TTS
@@ -1 +1 @@
-Subproject commit 8898d05e374bcb8d3fc0b1286037e95df61f491f
+Subproject commit bebbfbb91650c13574e1423b27769e3b398cba0b
diff --git a/F5TTS.py b/F5TTS.py
@@ -26,6 +26,8 @@
 )
 sys.path.pop()
 
+Install.check_install()
+
 
 class F5TTSCreate:
     voice_reg = re.compile(r"\{([^\}]+)\}")
@@ -299,7 +301,9 @@ def INPUT_TYPES(s):
                     "tooltip": F5TTSCreate.tooltip_seed,
                 }),
                 "model": (model_types,),
-                # "vocoder": (F5TTSCreate.vocoder_types,),
+                "vocoder": (F5TTSCreate.vocoder_types, {
+                    "tooltip": "Most models are usally vocos",
+                }),
             },
         }
 
@@ -342,9 +346,10 @@ def remove_wave_file(self):
                 print(e)
 
     def create(
-        self, sample_audio, sample_text, speech, seed=-1, model="F5"
+        self,
+        sample_audio, sample_text,
+        speech, seed=-1, model="F5", vocoder="vocos"
     ):
-        vocoder = "vocos"
         try:
             main_voice = self.load_voice_from_input(sample_audio, sample_text)
 
@@ -362,13 +367,14 @@ def create(
         return (audio, )
 
     @classmethod
-    def IS_CHANGED(s, sample_audio, sample_text, speech, seed, model):
+    def IS_CHANGED(s, sample_audio, sample_text, speech, seed, model, vocoder):
         m = hashlib.sha256()
         m.update(sample_text)
         m.update(sample_audio)
         m.update(speech)
         m.update(seed)
         m.update(model)
+        m.update(vocoder)
         return m.digest().hex()
 
 
@@ -404,7 +410,9 @@ def INPUT_TYPES(s):
                     "tooltip": F5TTSCreate.tooltip_seed,
                 }),
                 "model": (model_types,),
-                # "vocoder": (F5TTSCreate.vocoder_types,),
+                "vocoder": (F5TTSCreate.vocoder_types, {
+                    "tooltip": "Most models are usally vocos",
+                }),
             }
         }
 
@@ -463,8 +471,8 @@ def load_voices_from_files(self, sample, voice_names):
             voices[voice_name] = self.load_voice_from_file(sample_file)
         return voices
 
-    def create(self, sample, speech, seed=-2, model="F5"):
-        vocoder = "vocos"
+    def create(self, sample, speech, seed=-2, model="F5", vocoder="vocos"):
+        # vocoder = "vocos"
         # Install.check_install()
         main_voice = self.load_voice_from_file(sample)
 
@@ -488,7 +496,7 @@ def create(self, sample, speech, seed=-2, model="F5"):
         return (audio, )
 
     @classmethod
-    def IS_CHANGED(s, sample, speech, seed, model):
+    def IS_CHANGED(s, sample, speech, seed, model, vocoder):
         m = hashlib.sha256()
         audio_path = folder_paths.get_annotated_filepath(sample)
         audio_txt_path = F5TTSCreate.get_txt_file_path(audio_path)
@@ -500,4 +508,5 @@ def IS_CHANGED(s, sample, speech, seed, model):
         m.update(speech)
         m.update(seed)
         m.update(model)
+        m.update(vocoder)
         return m.digest().hex()
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ You can use the examples here...
 * [Workflow with all features](examples/F5TTS-test-all.json)
 
 
-### Custom models...
+### Other languages / custom models...
 
 You can put the model & vocab txt files into "models/checkpoints/F5-TTS" folder if you have any more models.  Name the .txt vocab file and the .pt model file the same names.  Press "refresh" and it should appear under the "model" selection.
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "comfyui-f5-tts"
 description = "Text to speech with F5-TTS"
-version = "1.0.9"
+version = "1.0.10"
 license = {text = "MIT License"}
 
 [project.urls]