Spaces:
Running
on
Zero
Running
on
Zero
Sync from GitHub repo
Browse filesThis Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there
- app.py +3 -2
- src/f5_tts/infer/utils_infer.py +3 -1
app.py
CHANGED
@@ -42,6 +42,7 @@ from f5_tts.infer.utils_infer import (
|
|
42 |
preprocess_ref_audio_text,
|
43 |
remove_silence_for_generated_wav,
|
44 |
save_spectrogram,
|
|
|
45 |
)
|
46 |
from f5_tts.model import DiT, UNetT
|
47 |
|
@@ -190,7 +191,7 @@ def infer(
|
|
190 |
|
191 |
# Remove silence
|
192 |
if remove_silence:
|
193 |
-
with tempfile.NamedTemporaryFile(suffix=".wav",
|
194 |
temp_path = f.name
|
195 |
try:
|
196 |
sf.write(temp_path, final_wave, final_sample_rate)
|
@@ -201,7 +202,7 @@ def infer(
|
|
201 |
final_wave = final_wave.squeeze().cpu().numpy()
|
202 |
|
203 |
# Save the spectrogram
|
204 |
-
with tempfile.NamedTemporaryFile(suffix=".png",
|
205 |
spectrogram_path = tmp_spectrogram.name
|
206 |
save_spectrogram(combined_spectrogram, spectrogram_path)
|
207 |
|
|
|
42 |
preprocess_ref_audio_text,
|
43 |
remove_silence_for_generated_wav,
|
44 |
save_spectrogram,
|
45 |
+
tempfile_kwargs,
|
46 |
)
|
47 |
from f5_tts.model import DiT, UNetT
|
48 |
|
|
|
191 |
|
192 |
# Remove silence
|
193 |
if remove_silence:
|
194 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", **tempfile_kwargs) as f:
|
195 |
temp_path = f.name
|
196 |
try:
|
197 |
sf.write(temp_path, final_wave, final_sample_rate)
|
|
|
202 |
final_wave = final_wave.squeeze().cpu().numpy()
|
203 |
|
204 |
# Save the spectrogram
|
205 |
+
with tempfile.NamedTemporaryFile(suffix=".png", **tempfile_kwargs) as tmp_spectrogram:
|
206 |
spectrogram_path = tmp_spectrogram.name
|
207 |
save_spectrogram(combined_spectrogram, spectrogram_path)
|
208 |
|
src/f5_tts/infer/utils_infer.py
CHANGED
@@ -45,6 +45,8 @@ device = (
|
|
45 |
else "cpu"
|
46 |
)
|
47 |
|
|
|
|
|
48 |
# -----------------------------------------
|
49 |
|
50 |
target_sample_rate = 24000
|
@@ -306,7 +308,7 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, show_info=print):
|
|
306 |
ref_audio = _ref_audio_cache[audio_hash]
|
307 |
|
308 |
else: # first pass, do preprocess
|
309 |
-
with tempfile.NamedTemporaryFile(
|
310 |
temp_path = f.name
|
311 |
|
312 |
aseg = AudioSegment.from_file(ref_audio_orig)
|
|
|
45 |
else "cpu"
|
46 |
)
|
47 |
|
48 |
+
tempfile_kwargs = {"delete_on_close": False} if sys.version_info >= (3, 12) else {"delete": False}
|
49 |
+
|
50 |
# -----------------------------------------
|
51 |
|
52 |
target_sample_rate = 24000
|
|
|
308 |
ref_audio = _ref_audio_cache[audio_hash]
|
309 |
|
310 |
else: # first pass, do preprocess
|
311 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", **tempfile_kwargs) as f:
|
312 |
temp_path = f.name
|
313 |
|
314 |
aseg = AudioSegment.from_file(ref_audio_orig)
|