mrfakename commited on
Commit
9766167
·
verified ·
1 Parent(s): c00ee74

Sync from GitHub repo

Browse files

This Space is synced from the GitHub repo: https://github.com/SWivid/F5-TTS. Please submit contributions to the Space there

README_REPO.md CHANGED
@@ -112,7 +112,7 @@ docker container run --rm -it --gpus=all --mount 'type=volume,source=f5-tts,targ
112
  Deployment solution with Triton and TensorRT-LLM.
113
 
114
  #### Benchmark Results
115
- Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs.
116
 
117
  | Model | Concurrency | Avg Latency | RTF | Mode |
118
  |---------------------|----------------|-------------|--------|-----------------|
 
112
  Deployment solution with Triton and TensorRT-LLM.
113
 
114
  #### Benchmark Results
115
+ Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs, 16 NFE.
116
 
117
  | Model | Concurrency | Avg Latency | RTF | Mode |
118
  |---------------------|----------------|-------------|--------|-----------------|
pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
 
5
  [project]
6
  name = "f5-tts"
7
- version = "1.1.1"
8
  description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
9
  readme = "README.md"
10
  license = {text = "MIT License"}
 
4
 
5
  [project]
6
  name = "f5-tts"
7
+ version = "1.1.2"
8
  description = "F5-TTS: A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching"
9
  readme = "README.md"
10
  license = {text = "MIT License"}
src/f5_tts/runtime/triton_trtllm/README.md CHANGED
@@ -57,7 +57,7 @@ benchmark.py --output-dir $log_dir \
57
  ```
58
 
59
  ### Benchmark Results
60
- Decoding on a single L20 GPU, using 26 different prompt_audio/target_text pairs.
61
 
62
  | Model | Concurrency | Avg Latency | RTF | Mode |
63
  |---------------------|----------------|-------------|--------|-----------------|
 
57
  ```
58
 
59
  ### Benchmark Results
60
+ Decoding on a single L20 GPU, using 26 different prompt_audio & target_text pairs, 16 NFE.
61
 
62
  | Model | Concurrency | Avg Latency | RTF | Mode |
63
  |---------------------|----------------|-------------|--------|-----------------|
src/f5_tts/runtime/triton_trtllm/benchmark.py CHANGED
@@ -168,7 +168,9 @@ def data_collator(batch, vocab_char_map, device="cuda", use_perf=False):
168
  ref_mel_list.append(ref_mel)
169
  ref_mel_len_list.append(ref_mel_len)
170
 
171
- estimated_reference_target_mel_len.append(int(ref_mel.shape[0] * (1 + len(target_text) / len(prompt_text))))
 
 
172
 
173
  max_seq_len = max(estimated_reference_target_mel_len)
174
  ref_mel_batch = padded_mel_batch(ref_mel_list, max_seq_len)
 
168
  ref_mel_list.append(ref_mel)
169
  ref_mel_len_list.append(ref_mel_len)
170
 
171
+ estimated_reference_target_mel_len.append(
172
+ int(ref_mel.shape[0] * (1 + len(target_text.encode("utf-8")) / len(prompt_text.encode("utf-8"))))
173
+ )
174
 
175
  max_seq_len = max(estimated_reference_target_mel_len)
176
  ref_mel_batch = padded_mel_batch(ref_mel_list, max_seq_len)
src/f5_tts/runtime/triton_trtllm/model_repo_f5_tts/f5_tts/1/model.py CHANGED
@@ -219,7 +219,9 @@ class TritonPythonModel:
219
 
220
  reference_mel_len.append(mel_features.shape[1])
221
  estimated_reference_target_mel_len.append(
222
- int(mel_features.shape[1] * (1 + len(target_text) / len(reference_text)))
 
 
223
  )
224
 
225
  max_seq_len = min(max(estimated_reference_target_mel_len), self.max_mel_len)
 
219
 
220
  reference_mel_len.append(mel_features.shape[1])
221
  estimated_reference_target_mel_len.append(
222
+ int(
223
+ mel_features.shape[1] * (1 + len(target_text.encode("utf-8")) / len(reference_text.encode("utf-8")))
224
+ )
225
  )
226
 
227
  max_seq_len = min(max(estimated_reference_target_mel_len), self.max_mel_len)