3
3
from .Install import Install
4
4
import subprocess
5
5
import wave
6
+ import torch
6
7
import torchaudio
7
8
import hashlib
8
9
import folder_paths
27
28
28
29
class F5TTSCreate :
29
30
voice_reg = re .compile (r"\{(\w+)\}" )
31
+ tooltip_seed = "Seed. -1 = random"
30
32
31
33
def is_voice_name (self , word ):
32
34
return self .voice_reg .match (word .strip ())
@@ -68,7 +70,12 @@ def load_model(self):
68
70
ema_model = load_model (model_cls , model_cfg , ckpt_file , vocab_file )
69
71
return ema_model
70
72
71
- def generate_audio (self , voices , model_obj , chunks ):
73
+ def generate_audio (self , voices , model_obj , chunks , seed ):
74
+ if seed >= 0 :
75
+ torch .manual_seed (seed )
76
+ else :
77
+ torch .random .seed ()
78
+
72
79
frame_rate = 44100
73
80
generated_audio_segments = []
74
81
pbar = ProgressBar (len (chunks ))
@@ -110,9 +117,9 @@ def generate_audio(self, voices, model_obj, chunks):
110
117
os .unlink (wave_file .name )
111
118
return audio
112
119
113
- def create (self , voices , chunks ):
120
+ def create (self , voices , chunks , seed = - 1 ):
114
121
model_obj = self .load_model ()
115
- return self .generate_audio (voices , model_obj , chunks )
122
+ return self .generate_audio (voices , model_obj , chunks , seed )
116
123
117
124
118
125
class F5TTSAudioInputs :
@@ -129,6 +136,11 @@ def INPUT_TYPES(s):
129
136
"multiline" : True ,
130
137
"default" : "This is what I want to say"
131
138
}),
139
+ "seed" : ("INT" , {
140
+ "display" : "number" , "step" : 1 ,
141
+ "default" : 1 , "min" : - 1 ,
142
+ "tooltip" : F5TTSCreate .tooltip_seed ,
143
+ }),
132
144
},
133
145
}
134
146
@@ -162,7 +174,7 @@ def remove_wave_file(self):
162
174
print ("F5TTS: Cannot remove? " + self .wave_file .name )
163
175
print (e )
164
176
165
- def create (self , sample_audio , sample_text , speech ):
177
+ def create (self , sample_audio , sample_text , speech , seed = - 1 ):
166
178
try :
167
179
main_voice = self .load_voice_from_input (sample_audio , sample_text )
168
180
@@ -172,17 +184,18 @@ def create(self, sample_audio, sample_text, speech):
172
184
chunks = f5ttsCreate .split_text (speech )
173
185
voices ['main' ] = main_voice
174
186
175
- audio = f5ttsCreate .create (voices , chunks )
187
+ audio = f5ttsCreate .create (voices , chunks , seed )
176
188
finally :
177
189
self .remove_wave_file ()
178
190
return (audio , )
179
191
180
192
@classmethod
181
- def IS_CHANGED (s , sample_audio , sample_text , speech ):
193
+ def IS_CHANGED (s , sample_audio , sample_text , speech , seed ):
182
194
m = hashlib .sha256 ()
183
195
m .update (sample_text )
184
196
m .update (sample_audio )
185
197
m .update (speech )
198
+ m .update (seed )
186
199
return m .digest ().hex ()
187
200
188
201
@@ -215,6 +228,11 @@ def INPUT_TYPES(s):
215
228
"multiline" : True ,
216
229
"default" : "This is what I want to say"
217
230
}),
231
+ "seed" : ("INT" , {
232
+ "display" : "number" , "step" : 1 ,
233
+ "default" : 1 , "min" : - 1 ,
234
+ "tooltip" : F5TTSCreate .tooltip_seed ,
235
+ }),
218
236
}
219
237
}
220
238
@@ -271,7 +289,7 @@ def load_voices_from_files(self, sample, voice_names):
271
289
voices [voice_name ] = self .load_voice_from_file (sample_file )
272
290
return voices
273
291
274
- def create (self , sample , speech ):
292
+ def create (self , sample , speech , seed = - 1 ):
275
293
# Install.check_install()
276
294
main_voice = self .load_voice_from_file (sample )
277
295
@@ -291,11 +309,11 @@ def create(self, sample, speech):
291
309
voices = self .load_voices_from_files (sample , voice_names )
292
310
voices ['main' ] = main_voice
293
311
294
- audio = f5ttsCreate .create (voices , chunks )
312
+ audio = f5ttsCreate .create (voices , chunks , seed )
295
313
return (audio , )
296
314
297
315
@classmethod
298
- def IS_CHANGED (s , sample , speech ):
316
+ def IS_CHANGED (s , sample , speech , seed ):
299
317
m = hashlib .sha256 ()
300
318
audio_path = folder_paths .get_annotated_filepath (sample )
301
319
audio_txt_path = F5TTSAudio .get_txt_file_path (audio_path )
@@ -305,4 +323,5 @@ def IS_CHANGED(s, sample, speech):
305
323
m .update (str (last_modified_timestamp ))
306
324
m .update (str (txt_last_modified_timestamp ))
307
325
m .update (speech )
326
+ m .update (seed )
308
327
return m .digest ().hex ()
0 commit comments