|
29 | 29 |
|
30 | 30 | class F5TTSCreate:
|
31 | 31 | voice_reg = re.compile(r"\{([^\}]+)\}")
|
32 |
| - model_types = ["F5", "F5-JP", "F5-FR", "E2"] |
| 32 | + model_types = ["F5", "F5-HI", "F5-JP", "F5-FR", "E2"] |
33 | 33 | vocoder_types = ["vocos", "bigvgan"]
|
34 | 34 | tooltip_seed = "Seed. -1 = random"
|
35 | 35 |
|
@@ -87,6 +87,7 @@ def load_voice(ref_audio, ref_text):
|
87 | 87 | def get_model_funcs(self):
|
88 | 88 | return {
|
89 | 89 | "F5": self.load_f5_model,
|
| 90 | + "F5-HI": self.load_f5_model_hi, |
90 | 91 | "F5-JP": self.load_f5_model_jp,
|
91 | 92 | "F5-FR": self.load_f5_model_fr,
|
92 | 93 | "E2": self.load_e2_model,
|
@@ -170,13 +171,29 @@ def cached_path(self, url):
|
170 | 171 | return None
|
171 | 172 | return str(cached_path(url)) # noqa E501
|
172 | 173 |
|
173 |
| - def load_f5_model_url(self, url, vocoder_name, vocab_url=None): |
174 |
| - vocoder = self.load_vocoder(vocoder_name) |
175 |
| - model_cls = DiT |
| 174 | + def load_f5_model_hi(self, vocoder): |
176 | 175 | model_cfg = dict(
|
177 |
| - dim=1024, depth=22, heads=16, |
| 176 | + dim=768, depth=18, heads=12, |
178 | 177 | ff_mult=2, text_dim=512, conv_layers=4
|
179 | 178 | )
|
| 179 | + return self.load_f5_model_url( |
| 180 | + "hf://SPRINGLab/F5-Hindi-24KHz/model_2500000.safetensors", |
| 181 | + "vocos", |
| 182 | + "hf://SPRINGLab/F5-Hindi-24KHz/vocab.txt", |
| 183 | + model_cfg=model_cfg, |
| 184 | + ) |
| 185 | + |
| 186 | + def load_f5_model_url( |
| 187 | + self, url, vocoder_name, vocab_url=None, model_cfg=None |
| 188 | + ): |
| 189 | + vocoder = self.load_vocoder(vocoder_name) |
| 190 | + model_cls = DiT |
| 191 | + if model_cfg is None: |
| 192 | + model_cfg = dict( |
| 193 | + dim=1024, depth=22, heads=16, |
| 194 | + ff_mult=2, text_dim=512, conv_layers=4 |
| 195 | + ) |
| 196 | + |
180 | 197 | ckpt_file = str(self.cached_path(url)) # noqa E501
|
181 | 198 |
|
182 | 199 | if vocab_url is None:
|
|
0 commit comments