ζΉιζ¨ηεθ½θ―΄ζ
ζ¬ζζ‘£δ»η»δΊ ReSpark TTS 樑εηζΉιζ¨ηεθ½οΌθ―₯εθ½ε―δ»₯ζΎθζι«ε€δΈͺζζ¬ηθ―ι³εζζηγ
δ½Ώη¨ζΉζ³
εΊζ¬ζΉιζ¨η
from utilities import generate_embeddings_batch
from tts_batch_infer import generate_speech_batch
# εε€ζζ¬ε葨
texts = [
"第δΈδΈͺθ¦εζηζζ¬γ",
"第δΊδΈͺθ¦εζηζζ¬γ",
"第δΈδΈͺθ¦εζηζζ¬γ"
]
# ζΉιηζθ―ι³
wavs = generate_speech_batch(
model, tokenizer, texts, audio_tokenizer,
prompt_text="ζη€Ίζζ¬",
prompt_audio=prompt_audio,
device=device
)
# δΏει³ι’ζδ»Ά
for i, wav in enumerate(wavs):
sf.write(f'output_{i}.wav', wav, sample_rate)