yueyulin commited on Jul 15

Commit

fc99023

verified ·

1 Parent(s): ee3b868

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +34 -0
trained_190k_steps/.gitignore +16 -0
trained_190k_steps/.vscode/launch.json +17 -0
trained_190k_steps/BATCH_INFERENCE_README.md +30 -0
trained_190k_steps/BiCodec/config.yaml +60 -0
trained_190k_steps/BiCodec/model.safetensors +3 -0
trained_190k_steps/Readme.md +130 -0
trained_190k_steps/Readme_zh.md +130 -0
trained_190k_steps/__init__.py +0 -0
trained_190k_steps/__pycache__/spark_llm.cpython-311.pyc +0 -0
trained_190k_steps/__pycache__/utilities.cpython-311.pyc +0 -0
trained_190k_steps/added_tokens.json +3 -0
trained_190k_steps/config.json +66 -0
trained_190k_steps/config.yaml +7 -0
trained_190k_steps/configuration_rwkv7.py +91 -0
trained_190k_steps/generation_config.json +6 -0
trained_190k_steps/hf_rwkv_tokenizer.py +280 -0
trained_190k_steps/kafka.wav +3 -0
trained_190k_steps/model.safetensors +3 -0
trained_190k_steps/modeling_rwkvspeech.py +6 -0
trained_190k_steps/output.wav +3 -0
trained_190k_steps/output_0.wav +3 -0
trained_190k_steps/output_1.wav +3 -0
trained_190k_steps/output_10.wav +3 -0
trained_190k_steps/output_2.wav +3 -0
trained_190k_steps/output_3.wav +3 -0
trained_190k_steps/output_4.wav +3 -0
trained_190k_steps/output_5.wav +3 -0
trained_190k_steps/output_6.wav +3 -0
trained_190k_steps/output_7.wav +3 -0
trained_190k_steps/output_8.wav +3 -0
trained_190k_steps/output_9.wav +3 -0
trained_190k_steps/rtf_test_results/rtf_test_results_20250714_103617.json +206 -0
trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104243.json +206 -0
trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104655.json +206 -0
trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104853.json +206 -0
trained_190k_steps/rtf_test_results/test_001.wav +3 -0
trained_190k_steps/rtf_test_results/test_002.wav +3 -0
trained_190k_steps/rtf_test_results/test_003.wav +3 -0
trained_190k_steps/rtf_test_results/test_004.wav +3 -0
trained_190k_steps/rtf_test_results/test_005.wav +3 -0
trained_190k_steps/rtf_test_results/test_006.wav +3 -0
trained_190k_steps/rtf_test_results/test_007.wav +3 -0
trained_190k_steps/rtf_test_results/test_008.wav +3 -0
trained_190k_steps/rtf_test_results/test_009.wav +3 -0
trained_190k_steps/rtf_test_results/test_010.wav +3 -0
trained_190k_steps/rtf_test_results/test_011.wav +3 -0
trained_190k_steps/rtf_test_results/test_012.wav +3 -0
trained_190k_steps/rtf_test_results/test_013.wav +3 -0
trained_190k_steps/rtf_test_results/test_014.wav +3 -0

.gitattributes CHANGED Viewed

@@ -48,3 +48,37 @@ trained_50_percents/output_6.wav filter=lfs diff=lfs merge=lfs -text
 trained_50_percents/output_7.wav filter=lfs diff=lfs merge=lfs -text
 trained_50_percents/output_8.wav filter=lfs diff=lfs merge=lfs -text
 trained_50_percents/output_9.wav filter=lfs diff=lfs merge=lfs -text

 trained_50_percents/output_7.wav filter=lfs diff=lfs merge=lfs -text
 trained_50_percents/output_8.wav filter=lfs diff=lfs merge=lfs -text
 trained_50_percents/output_9.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/kafka.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_0.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_1.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_10.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_2.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_3.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_4.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_5.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_6.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_7.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_8.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/output_9.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_001.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_002.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_003.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_004.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_005.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_006.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_007.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_008.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_009.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_010.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_011.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_012.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_013.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_014.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_015.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_016.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_017.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_018.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_019.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/rtf_test_results/test_020.wav filter=lfs diff=lfs merge=lfs -text
+trained_190k_steps/test.wav filter=lfs diff=lfs merge=lfs -text

trained_190k_steps/.gitignore ADDED Viewed

	@@ -0,0 +1,16 @@

+# Python build artifacts
+__pycache__/
+*.pyc
+# Environment variables
+.env
+# Virtual environment
+venv/
+# Model backups and outputs
+model.fp32.safetensors
+output.wav
+# Temporary scripts
+check_dtype.py

trained_190k_steps/.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+    // 使用 IntelliSense 了解相关属性。
+    // 悬停以查看现有属性的描述。
+    // 欲了解更多信息，请访问: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python 调试程序: 当前文件",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
+    ]
+}

trained_190k_steps/BATCH_INFERENCE_README.md ADDED Viewed

	@@ -0,0 +1,30 @@

+# 批量推理功能说明
+本文档介绍了 ReSpark TTS 模型的批量推理功能，该功能可以显著提高多个文本的语音合成效率。
+## 使用方法
+### 基本批量推理
+```python
+from utilities import generate_embeddings_batch
+from tts_batch_infer import generate_speech_batch
+# 准备文本列表
+texts = [
+    "第一个要合成的文本。",
+    "第二个要合成的文本。",
+    "第三个要合成的文本。"
+]
+# 批量生成语音
+wavs = generate_speech_batch(
+    model, tokenizer, texts, audio_tokenizer,
+    prompt_text="提示文本",
+    prompt_audio=prompt_audio,
+    device=device
+)
+# 保存音频文件
+for i, wav in enumerate(wavs):
+    sf.write(f'output_{i}.wav', wav, sample_rate)
+```

trained_190k_steps/BiCodec/config.yaml ADDED Viewed

	@@ -0,0 +1,60 @@

+audio_tokenizer:
+  mel_params:
+    sample_rate: 16000
+    n_fft: 1024
+    win_length: 640
+    hop_length: 320
+    mel_fmin: 10
+    mel_fmax: null
+    num_mels: 128
+  encoder:
+    input_channels: 1024
+    vocos_dim: 384
+    vocos_intermediate_dim: 2048
+    vocos_num_layers: 12
+    out_channels: 1024
+    sample_ratios: [1,1]
+  decoder:
+    input_channel: 1024
+    channels: 1536
+    rates: [8, 5, 4, 2]
+    kernel_sizes: [16,11,8,4]
+  quantizer:
+    input_dim: 1024
+    codebook_size: 8192
+    codebook_dim: 8
+    commitment: 0.25
+    codebook_loss_weight: 2.0
+    use_l2_normlize: True
+    threshold_ema_dead_code: 0.2
+  speaker_encoder:
+    input_dim: 128
+    out_dim: 1024
+    latent_dim: 128
+    token_num: 32
+    fsq_levels: [4, 4, 4, 4, 4, 4]
+    fsq_num_quantizers: 1
+  prenet:
+    input_channels: 1024
+    vocos_dim: 384
+    vocos_intermediate_dim: 2048
+    vocos_num_layers: 12
+    out_channels: 1024
+    condition_dim: 1024
+    sample_ratios: [1,1]
+    use_tanh_at_final: False
+  postnet:
+    input_channels: 1024
+    vocos_dim: 384
+    vocos_intermediate_dim: 2048
+    vocos_num_layers: 6
+    out_channels: 1024
+    use_tanh_at_final: False

trained_190k_steps/BiCodec/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9940cd48d4446e4340ced82d234bf5618350dd9f5db900ebe47a4fdb03867ec
+size 625518756

trained_190k_steps/Readme.md ADDED Viewed

	@@ -0,0 +1,130 @@

+---
+license: apache-2.0
+---
+# ReSpark TTS Model
+This repository contains the ReSpark Text-to-Speech (TTS) model, a powerful and efficient model for generating high-quality speech from text. It is based on the RWKV architecture and utilizes the BiCodec tokenizer for audio processing.
+## Installation
+First, install the required dependencies:
+```bash
+pip install transformers rwkv-fla torch torchaudio torchvision transformers soundfile numpy librosa omegaconf soxr soundfile einx librosa
+```
+## Usage
+The `tts.py` script provides a complete example of how to use this model for text-to-speech synthesis with voice cloning.
+### Running the Test Script
+To generate speech, simply run the script:
+```bash
+python tts.py
+```
+### How it Works
+The script performs the following steps:
+1.  Loads the pre-trained `AutoModelForCausalLM` and `AutoTokenizer` from the current directory.
+2.  Initializes the `BiCodecTokenizer` for audio encoding and decoding.
+3.  Loads a reference audio file (`kafka.wav`) and its corresponding transcript (`prompt_text`) to provide a voice prompt.
+4.  Resamples the reference audio to match the model's expected sample rate (24000 Hz).
+5.  Takes a target text (`text`) to be synthesized.
+6.  Calls the `generate_speech` function, which generates audio based on the target text and the voice from the reference audio.
+7.  Saves the generated audio to `output.wav`.
+You can modify the `prompt_text`, `prompt_audio_file`, and `text` variables in `tts.py` to synthesize different text with different voices.
+### Example Code (`tts.py`)
+```python
+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(__file__))
+print('add current dir to sys.path', current_dir)
+sys.path.append(current_dir)
+from sparktts.models.audio_tokenizer import BiCodecTokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import soundfile as sf
+import numpy as np
+import torch
+from utilities import generate_embeddings
+def generate_speech(model, tokenizer, text, bicodec, prompt_text=None, prompt_audio=None,
+                   max_new_tokens=3000, do_sample=True, top_k=50, top_p=0.95,
+                   temperature=1.0, device="cuda:0"):
+    """
+    Function to generate speech.
+    """
+    eos_token_id = model.config.vocab_size - 1
+    embeddings = generate_embeddings(
+        model=model,
+        tokenizer=tokenizer,
+        text=text,
+        bicodec=bicodec,
+        prompt_text=prompt_text,
+        prompt_audio=prompt_audio
+    )
+    global_tokens = embeddings['global_tokens'].unsqueeze(0)
+    model.eval()
+    with torch.no_grad():
+        generated_outputs = model.generate(
+            inputs_embeds=embeddings['input_embs'],
+            attention_mask=torch.ones((1, embeddings['input_embs'].shape[1]),dtype=torch.long,device=device),
+            max_new_tokens=max_new_tokens,
+            do_sample=do_sample,
+            top_k=top_k,
+            top_p=top_p,
+            temperature=temperature,
+            eos_token_id=eos_token_id,
+            pad_token_id=tokenizer.pad_token_id if hasattr(tokenizer, 'pad_token_id') else tokenizer.eos_token_id,
+            use_cache=True
+        )
+    semantic_tokens_tensor = generated_outputs[:,:-1]
+    with torch.no_grad():
+        wav = bicodec.detokenize(global_tokens, semantic_tokens_tensor)
+    return wav
+# --- Main execution ---
+device = 'cuda:0'
+# Initialize tokenizers and model
+audio_tokenizer = BiCodecTokenizer(model_dir=current_dir, device=device)
+tokenizer = AutoTokenizer.from_pretrained(current_dir, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(current_dir, trust_remote_code=True)
+model = model.bfloat16().to(device)
+model.eval()
+# Prepare prompt audio and text for voice cloning
+prompt_text = "我们并不是通过物理移动手段找到星河的。"
+prompt_audio_file = os.path.join(current_dir, 'kafka.wav')
+prompt_audio, sampling_rate = sf.read(prompt_audio_file)
+# Resample audio if necessary
+target_sample_rate = audio_tokenizer.config['sample_rate']
+if sampling_rate != target_sample_rate:
+    from librosa import resample
+    prompt_audio = resample(prompt_audio, orig_sr=sampling_rate, target_sr=target_sample_rate)
+    prompt_audio = np.array(prompt_audio, dtype=np.float32)
+# Text to synthesize
+text = "科学技术是第一生产力，最近 AI的迅猛发展让我们看到了迈向星辰大海的希望。"
+# Generate speech
+wav = generate_speech(model, tokenizer, text, audio_tokenizer, prompt_audio=prompt_audio, device=device)
+# Save the output
+sf.write('output.wav', wav, target_sample_rate)
+print("Generated audio saved to output.wav")
+```

trained_190k_steps/Readme_zh.md ADDED Viewed

	@@ -0,0 +1,130 @@

+---
+license: apache-2.0
+---
+# ReSpark TTS 模型
+本仓库包含 ReSpark 文本转语音 (TTS) 模型，这是一个强大而高效的模型，可以从文本生成高质量的语音。它基于 RWKV 架构，并利用 BiCodec-Tokenizer 进行音频处理。
+## 安装
+首先，请安装所需的依赖库：
+```bash
+pip install transformers rwkv-fla torch torchaudio torchvision transformers soundfile numpy librosa omegaconf soxr soundfile einx librosa
+```
+## 使用方法
+`tts.py` 脚本提供了一个完整的使用该模型进行文本转语音合成（带声音克隆功能）的示例。
+### 运行测试脚本
+要生成语音，只需运行以下脚本：
+```bash
+python tts.py
+```
+### 工作原理
+该脚本执行以下步骤：
+1.  从当前目录加载预训练的 `AutoModelForCausalLM` 和 `AutoTokenizer`。
+2.  初始化用于音频编码和解码的 `BiCodecTokenizer`。
+3.  加载一个参考音频文件 (`kafka.wav`) 及其对应的文本 (`prompt_text`) 以提供声音提示（voice prompt）。
+4.  如果需要，将参考音频重采样以匹配模型期望的采样率 (24000 Hz)。
+5.  指定一个需要被合成的目标文本 (`text`)。
+6.  调用 `generate_speech` 函数，该函数会根据目标文本和参考音频中的声音生成音频。
+7.  将生成的音频保存到 `output.wav`。
+您可以修改 `tts.py` 文件中的 `prompt_text`、`prompt_audio_file` 和 `text` 变量，以使用不同的声音合成不同的文本。
+### 示例代码 (`tts.py`)
+```python
+import os
+import sys
+current_dir = os.path.dirname(os.path.abspath(__file__))
+print('add current dir to sys.path', current_dir)
+sys.path.append(current_dir)
+from sparktts.models.audio_tokenizer import BiCodecTokenizer
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import soundfile as sf
+import numpy as np
+import torch
+from utilities import generate_embeddings
+def generate_speech(model, tokenizer, text, bicodec, prompt_text=None, prompt_audio=None,
+                   max_new_tokens=3000, do_sample=True, top_k=50, top_p=0.95,
+                   temperature=1.0, device="cuda:0"):
+    """
+    生成语音的函数
+    """
+    eos_token_id = model.config.vocab_size - 1
+    embeddings = generate_embeddings(
+        model=model,
+        tokenizer=tokenizer,
+        text=text,
+        bicodec=bicodec,
+        prompt_text=prompt_text,
+        prompt_audio=prompt_audio
+    )
+    global_tokens = embeddings['global_tokens'].unsqueeze(0)
+    model.eval()
+    with torch.no_grad():
+        generated_outputs = model.generate(
+            inputs_embeds=embeddings['input_embs'],
+            attention_mask=torch.ones((1, embeddings['input_embs'].shape[1]),dtype=torch.long,device=device),
+            max_new_tokens=max_new_tokens,
+            do_sample=do_sample,
+            top_k=top_k,
+            top_p=top_p,
+            temperature=temperature,
+            eos_token_id=eos_token_id,
+            pad_token_id=tokenizer.pad_token_id if hasattr(tokenizer, 'pad_token_id') else tokenizer.eos_token_id,
+            use_cache=True
+        )
+    semantic_tokens_tensor = generated_outputs[:,:-1]
+    with torch.no_grad():
+        wav = bicodec.detokenize(global_tokens, semantic_tokens_tensor)
+    return wav
+# --- 主程序 ---
+device = 'cuda:0'
+# 初始化分词器和模型
+audio_tokenizer = BiCodecTokenizer(model_dir=current_dir, device=device)
+tokenizer = AutoTokenizer.from_pretrained(current_dir, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(current_dir, trust_remote_code=True)
+model = model.bfloat16().to(device)
+model.eval()
+# 准备用于声音克隆的提示音频和文本
+prompt_text = "我们并不是通过物理移动手段找到星河的。"
+prompt_audio_file = os.path.join(current_dir, 'kafka.wav')
+prompt_audio, sampling_rate = sf.read(prompt_audio_file)
+# 如果需要，重采样音频
+target_sample_rate = audio_tokenizer.config['sample_rate']
+if sampling_rate != target_sample_rate:
+    from librosa import resample
+    prompt_audio = resample(prompt_audio, orig_sr=sampling_rate, target_sr=target_sample_rate)
+    prompt_audio = np.array(prompt_audio, dtype=np.float32)
+# 要合成的文本
+text = "科学技术是第一生产力，最近 AI的迅猛发展让我们看到了迈向星辰大海的希望。"
+# 生成语音
+wav = generate_speech(model, tokenizer, text, audio_tokenizer, prompt_audio=prompt_audio, device=device)
+# 保存输出
+sf.write('output.wav', wav, target_sample_rate)
+print("生成的音频已保存到 output.wav")
+```

trained_190k_steps/__init__.py ADDED Viewed

File without changes

trained_190k_steps/__pycache__/spark_llm.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

trained_190k_steps/__pycache__/utilities.cpython-311.pyc ADDED Viewed

Binary file (20.5 kB). View file

trained_190k_steps/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<|rwkv_tokenizer_end_of_text|>": 0
+}

trained_190k_steps/config.json ADDED Viewed

	@@ -0,0 +1,66 @@

+{
+  "a_low_rank_dim": 64,
+  "architectures": [
+    "RWKV7ForSpeech"
+  ],
+  "attn": null,
+  "attn_mode": "chunk",
+  "audio_global_vocab_size": 4096,
+  "auto_map": {
+    "AutoConfig": "modeling_rwkvspeech.RWKV7SpeechConfig",
+    "AutoModel": "modeling_rwkvspeech.RWKV7Model",
+    "AutoModelForCausalLM": "modeling_rwkvspeech.RWKV7ForSpeech"
+  },
+  "bos_token_id": 0,
+  "decay_low_rank_dim": 64,
+  "eos_token_id": 0,
+  "fuse_cross_entropy": true,
+  "fuse_norm": false,
+  "gate_low_rank_dim": 128,
+  "head_dim": 64,
+  "hidden_act": "sqrelu",
+  "hidden_ratio": 4.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.006,
+  "intermediate_size": 4096,
+  "max_position_embeddings": 2048,
+  "model_type": "rwkv7",
+  "norm_bias": true,
+  "norm_eps": 1e-05,
+  "norm_first": true,
+  "num_heads": 32,
+  "num_hidden_layers": 24,
+  "text_vocab_size": 65536,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.52.4",
+  "use_cache": true,
+  "v_low_rank_dim": 32,
+  "value_dim": [
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024,
+    1024
+  ],
+  "vocab_size": 8193
+}

trained_190k_steps/config.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+highpass_cutoff_freq: 40
+sample_rate: 16000
+segment_duration: 2.4 # (s)
+max_val_duration: 12 # (s)
+latent_hop_length: 320
+ref_segment_duration: 6
+volume_normalize: true

trained_190k_steps/configuration_rwkv7.py ADDED Viewed

	@@ -0,0 +1,91 @@

+# -*- coding: utf-8 -*-
+from typing import Dict, Optional
+from transformers.configuration_utils import PretrainedConfig
+class RWKV7Config(PretrainedConfig):
+    model_type = 'rwkv7'
+    keys_to_ignore_at_inference = ['past_key_values']
+    def __init__(
+        self,
+        attn_mode: str = "chunk",
+        hidden_size: int = 2048,
+        hidden_ratio: Optional[int] = 4,
+        intermediate_size: Optional[int] = None,
+        num_hidden_layers: int = 24,
+        head_dim: Optional[int] = 64,
+        num_heads: Optional[int] = None,
+        decay_low_rank_dim: int = 64,
+        gate_low_rank_dim: int = 128,
+        a_low_rank_dim: int = 64,
+        v_low_rank_dim: int = 16,
+        hidden_act: str = "sqrelu",
+        max_position_embeddings: int = 2048,
+        norm_first: bool = True,
+        norm_bias: bool = True,
+        norm_eps: float = 1e-5,
+        attn: Optional[Dict] = None,
+        use_cache: bool = True,
+        pad_token_id: int = None,
+        bos_token_id: int = 1,
+        eos_token_id: int = 2,
+        tie_word_embeddings: bool = False,
+        initializer_range: float = 0.006,
+        fuse_norm: bool = True,
+        fuse_cross_entropy: bool = True,
+        vocab_size: int = 32000,
+        **kwargs
+    ):
+        self.attn_mode = attn_mode
+        self.hidden_size = hidden_size
+        self.hidden_ratio = hidden_ratio
+        self.intermediate_size = intermediate_size
+        self.norm_first = norm_first
+        self.num_hidden_layers = num_hidden_layers
+        if head_dim is None and num_heads is not None:
+            head_dim = int(hidden_size // num_heads)
+        elif head_dim is not None and num_heads is None:
+            num_heads = int(hidden_size // head_dim)
+        self.head_dim = head_dim
+        self.num_heads = num_heads
+        self.decay_low_rank_dim = decay_low_rank_dim
+        self.gate_low_rank_dim = gate_low_rank_dim
+        self.a_low_rank_dim = a_low_rank_dim
+        self.v_low_rank_dim = v_low_rank_dim
+        self.hidden_act = hidden_act
+        self.max_position_embeddings = max_position_embeddings
+        self.norm_bias = norm_bias
+        self.norm_eps = norm_eps
+        self.attn = attn
+        self.use_cache = use_cache
+        self.initializer_range = initializer_range
+        self.fuse_norm = fuse_norm
+        self.fuse_cross_entropy = fuse_cross_entropy
+        self.vocab_size = vocab_size
+        if attn is not None:
+            if not isinstance(attn, Dict):
+                raise ValueError("attn must be a dictionary")
+            if 'layers' not in attn:
+                raise ValueError("Layer indices must be provided to initialize hybrid attention layers")
+            if 'num_heads' not in attn:
+                raise ValueError("Number of heads must be provided to initialize hybrid attention layers")
+            attn['num_kv_heads'] = attn.get('num_kv_heads', attn['num_heads'])
+            attn['qkv_bias'] = attn.get('qkv_bias', False)
+            attn['window_size'] = attn.get('window_size', None)
+            attn['rope_theta'] = attn.get('rope_theta', 10000.)
+        super().__init__(
+            pad_token_id=pad_token_id,
+            bos_token_id=bos_token_id,
+            eos_token_id=eos_token_id,
+            tie_word_embeddings=tie_word_embeddings,
+            **kwargs,
+        )

trained_190k_steps/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "eos_token_id": 0,
+  "transformers_version": "4.52.4"
+}

trained_190k_steps/hf_rwkv_tokenizer.py ADDED Viewed

	@@ -0,0 +1,280 @@

+# coding=utf-8
+# Copyright 2024 The HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes for RWKV."""
+import os
+import re
+from typing import TYPE_CHECKING, List, Optional, Tuple
+from transformers.tokenization_utils import AddedToken, PreTrainedTokenizer
+from transformers.utils import logging
+if TYPE_CHECKING:
+    pass
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {
+    "vocab_file": "rwkv_vocab_v20230424.txt",
+}
+class TRIE:
+    __slots__ = tuple("ch,to,values,front".split(","))
+    to: list
+    values: set
+    def __init__(self, front=None, ch=None):
+        self.ch = ch
+        self.to = [None for ch in range(256)]
+        self.values = set()
+        self.front = front
+    def __repr__(self):
+        fr = self
+        ret = []
+        while fr != None:
+            if fr.ch != None:
+                ret.append(fr.ch)
+            fr = fr.front
+        return "<TRIE %s %s>" % (ret[::-1], self.values)
+    def add(self, key: bytes, idx: int = 0, val=None):
+        if idx == len(key):
+            if val is None:
+                val = key
+            self.values.add(val)
+            return self
+        ch = key[idx]
+        if self.to[ch] is None:
+            self.to[ch] = TRIE(front=self, ch=ch)
+        return self.to[ch].add(key, idx=idx + 1, val=val)
+    def find_longest(self, key: bytes, idx: int = 0):
+        u: TRIE = self
+        ch: int = key[idx]
+        while u.to[ch] is not None:
+            u = u.to[ch]
+            idx += 1
+            if u.values:
+                ret = idx, u, u.values
+            if idx == len(key):
+                break
+            ch = key[idx]
+        return ret
+class RWKV_TOKENIZER:
+    def __init__(self, file_name):
+        self.idx2token = {}
+        sorted = []  # must be already sorted
+        with open(file_name, "r", encoding="utf-8") as f:
+            lines = f.readlines()
+        for l in lines:
+            idx = int(l[: l.index(" ")])
+            x = eval(l[l.index(" ") : l.rindex(" ")])
+            x = x.encode("utf-8") if isinstance(x, str) else x
+            assert isinstance(x, bytes)
+            assert len(x) == int(l[l.rindex(" ") :])
+            sorted += [x]
+            self.idx2token[idx] = x
+        self.token2idx = {}
+        for k, v in self.idx2token.items():
+            self.token2idx[v] = int(k)
+        self.root = TRIE()
+        for t, i in self.token2idx.items():
+            _ = self.root.add(t, val=(t, i))
+    def encodeBytes(self, src: bytes):
+        idx: int = 0
+        tokens = []
+        while idx < len(src):
+            _idx: int = idx
+            idx, _, values = self.root.find_longest(src, idx)
+            assert idx != _idx
+            _, token = next(iter(values))
+            tokens.append(token)
+        return tokens
+    def decodeBytes(self, tokens):
+        return b"".join(map(lambda i: self.idx2token[i], tokens))
+    def encode(self, src):
+        if isinstance(src, str):
+            return [self.encodeBytes(src.encode("utf-8"))]
+        elif isinstance(src, list):
+            return [self.encodeBytes(s.encode("utf-8")) for s in src]
+    def decode(self, tokens):
+        return [self.decodeBytes(batch).decode("utf-8") for batch in tokens]
+        # try:
+        #     return self.decodeBytes(tokens).decode('utf-8')
+        # except:
+        #     return '\ufffd' # bad utf-8
+    def printTokens(self, tokens):
+        for i in tokens:
+            s = self.idx2token[i]
+            try:
+                s = s.decode("utf-8")
+            except:
+                pass
+            print(f"{repr(s)}{i}", end=" ")
+        print()
+class RwkvTokenizer(PreTrainedTokenizer):
+    vocab_files_names = VOCAB_FILES_NAMES
+    model_input_names = ["input_ids", "attention_mask"]
+    def __init__(
+        self, vocab_file, bos_token="<|rwkv_tokenizer_end_of_text|>", eos_token="<|rwkv_tokenizer_end_of_text|>", unk_token="<|rwkv_tokenizer_end_of_text|>", **kwargs
+    ):
+        if not os.path.isfile(vocab_file):
+            raise ValueError(
+                f"Can't find a vocabulary file at path '{vocab_file}'."
+            )
+        with open(vocab_file, "r", encoding="utf-8") as reader:
+            tokens = reader.readlines()
+        if "add_bos_token" in kwargs:
+            self.add_bos_token = kwargs["add_bos_token"]
+        else:
+            self.add_bos_token = False
+        self.trie_tokenizer = RWKV_TOKENIZER(vocab_file)
+        vocab = self.trie_tokenizer.token2idx
+        self.encoder = vocab
+        self.decoder = {v: k for k, v in vocab.items()}
+        self._added_tokens_decoder = {0: AddedToken(str(bos_token))}
+        super().__init__(
+            bos_token=bos_token, eos_token=eos_token, unk_token=unk_token, **kwargs
+        )
+    @property
+    def vocab_size(self):
+        return len(self.encoder)
+    def get_vocab(self):
+        vocab = self.encoder
+        vocab.update(self.added_tokens_encoder)
+        vocab = dict(sorted(vocab.items(), key=lambda item: item[1]))
+        return vocab
+    def _tokenize(self, text, split_special_tokens=False):
+        # return self.wordpiece_tokenizer.tokenize(text.encode("utf-8"))
+        return self.trie_tokenizer.encode(text)[0]
+    def _convert_token_to_id(self, token):
+        return token
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (byte) using the vocab."""
+        token = self.decoder.get(index, self.unk_token)
+        if isinstance(token, (bytes)):
+            token = token.decode("utf-8", errors="replace")
+        return token
+    def convert_tokens_to_string(self, tokens):
+        """Converts a sequence of tokens (bytes) in a single string. Additional tokens are encoded to bytes"""
+        out_string = b"".join(
+            [k.encode(errors="replace") if isinstance(k, str) else k for k in tokens]
+        ).decode("utf-8")
+        return out_string
+    def save_vocabulary(
+        self, save_directory: str, filename_prefix: Optional[str] = None
+    ) -> Tuple[str]:
+        index = 0
+        if os.path.isdir(save_directory):
+            vocab_file = os.path.join(
+                save_directory,
+                (filename_prefix + "-" if filename_prefix else "") + "vocab.txt",
+            )
+        else:
+            vocab_file = (
+                filename_prefix + "-" if filename_prefix else ""
+            ) + save_directory
+        with open(vocab_file, "w", encoding="utf-8") as writer:
+            for token, token_index in sorted(
+                self.encoder.items(), key=lambda kv: kv[1]
+            ):
+                if index != token_index:
+                    logger.warning(
+                        f"Saving vocabulary to {vocab_file}: vocabulary indices are not consecutive."
+                        " Please check that the vocabulary is not corrupted!"
+                    )
+                    index = token_index
+                writer.write(str(token) + "\n")
+                index += 1
+        return (vocab_file,)
+    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+        if self.add_bos_token:
+            bos_token_ids = [self.bos_token_id]
+        else:
+            bos_token_ids = []
+        output = bos_token_ids + token_ids_0
+        if token_ids_1 is None:
+            return output
+        return output + bos_token_ids + token_ids_1
+    def get_special_tokens_mask(
+        self,
+        token_ids_0: List[int],
+        token_ids_1: Optional[List[int]] = None,
+        already_has_special_tokens: bool = False,
+    ) -> List[int]:
+        """
+        Retrieves sequence ids from a token list that has no special tokens added. This method is called when adding
+        special tokens using the tokenizer `prepare_for_model` or `encode_plus` methods.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not the token list is already formatted with special tokens for the model.
+        Returns:
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0,
+                token_ids_1=token_ids_1,
+                already_has_special_tokens=True,
+            )
+        if not self.add_bos_token:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0,
+                token_ids_1=token_ids_1,
+                already_has_special_tokens=False,
+            )
+        if token_ids_1 is None:
+            return [1] + ([0] * len(token_ids_0))
+        return [1] + ([0] * len(token_ids_0)) + [1] + ([0] * len(token_ids_1))

trained_190k_steps/kafka.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7928aeaf90600d6a014a5fececdc59cdf0e2971db327a0cf56b922b7cd8f8a7
+size 265524

trained_190k_steps/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:77c5578b1aaab351a1c89b8695ec465456268a5586020c5046fcd8544328a002
+size 809355976

trained_190k_steps/modeling_rwkvspeech.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from spark_llm import RWKV7SpeechConfig,RWKV7ForSpeech
+from rwkvfla.models.rwkv7 import RWKV7Model
+RWKV7ForCausalLM = RWKV7ForSpeech
+RWKV7Model = RWKV7Model
+RWKV7Config = RWKV7SpeechConfig

trained_190k_steps/output.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b56b3b68f11fdb8539634bb27312f1346b3876ede818d311f6c89dd8b8e94dd
+size 499244

trained_190k_steps/output_0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:559514056f3c45e9362937acedf1a6a4def27f14443842ff1193dad9c6a274a3
+size 439724

trained_190k_steps/output_1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e4afaaa5498043eff67996feb3310b0b02d1fb62cdf52c7c93fcbd6c936be9d
+size 228524

trained_190k_steps/output_10.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e28726c1f0d4199c061bebb5770c14f448f177fe1d6d9341596bd112b5b0fc9f
+size 133804

trained_190k_steps/output_2.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b80e10f88d2214da41180723783bed9bdfced387834377b92e6dad064174b0b1
+size 150444

trained_190k_steps/output_3.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbbef7cad347491d556cf82287293c353588ec2bc0c2247110766004d6c1dd2e
+size 586284

trained_190k_steps/output_4.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2d00b4160fc563d9149a8bc834186542807f853d270db3b27a2d2c39a004edc4
+size 209964

trained_190k_steps/output_5.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1049970bb90cf801a9e2ffac5f66715a277928b495900e4a7ce940f0c18f49c8
+size 256044

trained_190k_steps/output_6.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2c46bea5a3c52f6832612f92f0f89c8aa8ea9f9b15b0d4afcd3152c24037692a
+size 184364

trained_190k_steps/output_7.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:628147e65cdd71c534ff65c47d4d80a21588522c78a1ddfae2ec1c1d0d83b103
+size 307244

trained_190k_steps/output_8.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:040f941a4a1962d4c8b8cad6d1b0b6b0db3b36a955d2801cd1c59992fb6053c9
+size 171564

trained_190k_steps/output_9.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f87822f47fe7cf8626bb0e9de61455e862063cfdbfc8443e7e2f7aee93d2b169
+size 241964

trained_190k_steps/rtf_test_results/rtf_test_results_20250714_103617.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "test_info": {
+    "timestamp": "20250714_103617",
+    "device": "cuda:2",
+    "model_path": "/home/yueyulin/tmp/respark",
+    "batch_size": 4
+  },
+  "statistics": {
+    "total_tests": 20,
+    "successful_tests": 20,
+    "failed_tests": 0,
+    "batch_size": 4,
+    "total_batches": 5,
+    "total_processing_time": 100.95156717300415,
+    "total_audio_length": 123.94000000000001,
+    "total_rtf": 1.227717443827294,
+    "avg_rtf": 1.4573588824585872,
+    "avg_processing_time": 5.047578358650208,
+    "avg_audio_length": 6.197000000000001,
+    "min_rtf": 0.3893700157312124,
+    "max_rtf": 2.0969803734054207,
+    "std_rtf": 0.48215195815676065
+  },
+  "detailed_results": [
+    {
+      "index": 1,
+      "batch": 1,
+      "text": "一九五二年二月十日，志愿军大英雄张积慧击落美军双料王牌飞行员戴维斯，在自己飞机坠毁处距离戴维斯坠机处不足五百米的情况下，取得了世界空战史不可能复制的奇迹。伟大的张积慧。",
+      "processing_time": 11.5057652592659,
+      "audio_length": 14.06,
+      "rtf": 1.2219960761564388,
+      "output_file": "rtf_test_results/test_001.wav"
+    },
+    {
+      "index": 2,
+      "batch": 1,
+      "text": "在数字浪潮汹涌的今天，数智技术正以前所未有的力量重塑着社会的每一个角落。",
+      "processing_time": 11.5057652592659,
+      "audio_length": 6.16,
+      "rtf": 0.5353837716304171,
+      "output_file": "rtf_test_results/test_002.wav"
+    },
+    {
+      "index": 3,
+      "batch": 1,
+      "text": "为了点燃青少年对科技的热情，培养他们的创新思维与动手能力",
+      "processing_time": 11.5057652592659,
+      "audio_length": 4.48,
+      "rtf": 0.3893700157312124,
+      "output_file": "rtf_test_results/test_003.wav"
+    },
+    {
+      "index": 4,
+      "batch": 1,
+      "text": "杏花岭区巨轮街道社区教育学校携手中车社区教育分校，与太原市科学技术协会联手，于暑期精心策划了一场别开生面的青少年数智技术服务港探索之旅，吸引了众多社区青少年的积极参与。",
+      "processing_time": 11.5057652592659,
+      "audio_length": 13.54,
+      "rtf": 1.1768013421876373,
+      "output_file": "rtf_test_results/test_004.wav"
+    },
+    {
+      "index": 5,
+      "batch": 2,
+      "text": "一踏入数智技术服务港的大门，一股浓厚的科技气息便扑面而来。",
+      "processing_time": 4.353760182857513,
+      "audio_length": 4.82,
+      "rtf": 1.1070889983739247,
+      "output_file": "rtf_test_results/test_005.wav"
+    },
+    {
+      "index": 6,
+      "batch": 2,
+      "text": "科普课堂上，“简易红绿灯”科学实验更是将抽象的电路原理与日常生活紧密相连。",
+      "processing_time": 4.353760182857513,
+      "audio_length": 5.9,
+      "rtf": 1.3551504336942233,
+      "output_file": "rtf_test_results/test_006.wav"
+    },
+    {
+      "index": 7,
+      "batch": 2,
+      "text": "实验开始前，老师生动地介绍了实验物品，并引导青少年思考红绿灯的工作原理，激发了他们浓厚的探索兴趣。",
+      "processing_time": 4.353760182857513,
+      "audio_length": 9.02,
+      "rtf": 2.071772357952863,
+      "output_file": "rtf_test_results/test_007.wav"
+    },
+    {
+      "index": 8,
+      "batch": 2,
+      "text": "在老师的指导下，青少年们开始动手组装电路，将红绿灯的各个部件连接起来。",
+      "processing_time": 4.353760182857513,
+      "audio_length": 6.0,
+      "rtf": 1.3781190851127694,
+      "output_file": "rtf_test_results/test_008.wav"
+    },
+    {
+      "index": 9,
+      "batch": 3,
+      "text": "他们小心翼翼地调整电路，确保每个部件都正确连接，红灯、绿灯、黄灯依次亮起，仿佛在讲述一个关于交通规则的故事。",
+      "processing_time": 4.711536705493927,
+      "audio_length": 9.88,
+      "rtf": 2.0969803734054207,
+      "output_file": "rtf_test_results/test_009.wav"
+    },
+    {
+      "index": 10,
+      "batch": 3,
+      "text": "实验过程中，青少年们不仅学到了电路知识，还体验到了动手实践的乐趣。",
+      "processing_time": 4.711536705493927,
+      "audio_length": 5.8,
+      "rtf": 1.2310208669788905,
+      "output_file": "rtf_test_results/test_010.wav"
+    },
+    {
+      "index": 11,
+      "batch": 3,
+      "text": "他们纷纷表示，这次实验不仅让他们对科技有了更深的理解，还培养了他们的创新思维和动手能力。",
+      "processing_time": 4.711536705493927,
+      "audio_length": 6.9,
+      "rtf": 1.4644903417507493,
+      "output_file": "rtf_test_results/test_011.wav"
+    },
+    {
+      "index": 12,
+      "batch": 3,
+      "text": "数智技术服务港，让科技触手可及，让创新无处不在。",
+      "processing_time": 4.711536705493927,
+      "audio_length": 3.86,
+      "rtf": 0.8192656114721582,
+      "output_file": "rtf_test_results/test_012.wav"
+    },
+    {
+      "index": 13,
+      "batch": 4,
+      "text": "人工智能技术正在快速发展，为各行各业带来了革命性的变化。",
+      "processing_time": 2.7456793189048767,
+      "audio_length": 4.4,
+      "rtf": 1.602517806687984,
+      "output_file": "rtf_test_results/test_013.wav"
+    },
+    {
+      "index": 14,
+      "batch": 4,
+      "text": "深度学习模型在语音识别、图像处理、自然语言处理等领域取得了突破性进展。",
+      "processing_time": 2.7456793189048767,
+      "audio_length": 5.68,
+      "rtf": 2.0687048049972154,
+      "output_file": "rtf_test_results/test_014.wav"
+    },
+    {
+      "index": 15,
+      "batch": 4,
+      "text": "机器学习算法能够从大量数据中学习模式，并做出准确的预测和决策。",
+      "processing_time": 2.7456793189048767,
+      "audio_length": 5.2,
+      "rtf": 1.8938846806312535,
+      "output_file": "rtf_test_results/test_015.wav"
+    },
+    {
+      "index": 16,
+      "batch": 4,
+      "text": "神经网络模拟人脑的工作方式，通过多层神经元处理复杂的信息。",
+      "processing_time": 2.7456793189048767,
+      "audio_length": 4.86,
+      "rtf": 1.770053759205364,
+      "output_file": "rtf_test_results/test_016.wav"
+    },
+    {
+      "index": 17,
+      "batch": 5,
+      "text": "计算机视觉技术让机器能够理解和分析图像内容。",
+      "processing_time": 1.9211503267288208,
+      "audio_length": 3.4,
+      "rtf": 1.7697730118752573,
+      "output_file": "rtf_test_results/test_017.wav"
+    },
+    {
+      "index": 18,
+      "batch": 5,
+      "text": "自然语言处理技术使计算机能够理解和生成人类语言。",
+      "processing_time": 1.9211503267288208,
+      "audio_length": 3.9,
+      "rtf": 2.0300337489157365,
+      "output_file": "rtf_test_results/test_018.wav"
+    },
+    {
+      "index": 19,
+      "batch": 5,
+      "text": "语音合成技术将文本转换为自然的语音输出。",
+      "processing_time": 1.9211503267288208,
+      "audio_length": 3.06,
+      "rtf": 1.5927957106877317,
+      "output_file": "rtf_test_results/test_019.wav"
+    },
+    {
+      "index": 20,
+      "batch": 5,
+      "text": "大数据分析帮助企业发现隐藏的模式和趋势。",
+      "processing_time": 1.9211503267288208,
+      "audio_length": 3.02,
+      "rtf": 1.5719748517244934,
+      "output_file": "rtf_test_results/test_020.wav"
+    }
+  ]
+}

trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104243.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "test_info": {
+    "timestamp": "20250714_104243",
+    "device": "cuda:2",
+    "model_path": "/home/yueyulin/tmp/respark",
+    "batch_size": 4
+  },
+  "statistics": {
+    "total_tests": 20,
+    "successful_tests": 20,
+    "failed_tests": 0,
+    "batch_size": 4,
+    "total_batches": 5,
+    "total_processing_time": 97.51047849655151,
+    "total_audio_length": 127.58000000000001,
+    "total_rtf": 1.3083722074496016,
+    "avg_batch_rtf": 1.5580788079346588,
+    "avg_batch_processing_time": 19.502095699310303,
+    "avg_audio_length": 6.3790000000000004,
+    "min_batch_rtf": 0.9216863966136871,
+    "max_batch_rtf": 1.9146887765353369,
+    "std_batch_rtf": 0.35584074129598514
+  },
+  "detailed_results": [
+    {
+      "index": 1,
+      "batch": 1,
+      "text": "一九五二年二月十日，志愿军大英雄张积慧击落美军双料王牌飞行员戴维斯，在自己飞机坠毁处距离戴维斯坠机处不足五百米的情况下，取得了世界空战史不可能复制的奇迹。伟大的张积慧。",
+      "batch_processing_time": 46.76210927963257,
+      "audio_length": 15.72,
+      "batch_rtf": 0.9216863966136871,
+      "output_file": "rtf_test_results/test_001.wav"
+    },
+    {
+      "index": 2,
+      "batch": 1,
+      "text": "在数字浪潮汹涌的今天，数智技术正以前所未有的力量重塑着社会的每一个角落。",
+      "batch_processing_time": 46.76210927963257,
+      "audio_length": 6.66,
+      "batch_rtf": 0.9216863966136871,
+      "output_file": "rtf_test_results/test_002.wav"
+    },
+    {
+      "index": 3,
+      "batch": 1,
+      "text": "为了点燃青少年对科技的热情，培养他们的创新思维与动手能力",
+      "batch_processing_time": 46.76210927963257,
+      "audio_length": 4.82,
+      "batch_rtf": 0.9216863966136871,
+      "output_file": "rtf_test_results/test_003.wav"
+    },
+    {
+      "index": 4,
+      "batch": 1,
+      "text": "杏花岭区巨轮街道社区教育学校携手中车社区教育分校，与太原市科学技术协会联手，于暑期精心策划了一场别开生面的青少年数智技术服务港探索之旅，吸引了众多社区青少年的积极参与。",
+      "batch_processing_time": 46.76210927963257,
+      "audio_length": 15.9,
+      "batch_rtf": 0.9216863966136871,
+      "output_file": "rtf_test_results/test_004.wav"
+    },
+    {
+      "index": 5,
+      "batch": 2,
+      "text": "一踏入数智技术服务港的大门，一股浓厚的科技气息便扑面而来。",
+      "batch_processing_time": 15.214709043502808,
+      "audio_length": 4.66,
+      "batch_rtf": 1.6300015944498414,
+      "output_file": "rtf_test_results/test_005.wav"
+    },
+    {
+      "index": 6,
+      "batch": 2,
+      "text": "科普课堂上，“简易红绿灯”科学实验更是将抽象的电路原理与日常生活紧密相连。",
+      "batch_processing_time": 15.214709043502808,
+      "audio_length": 5.86,
+      "batch_rtf": 1.6300015944498414,
+      "output_file": "rtf_test_results/test_006.wav"
+    },
+    {
+      "index": 7,
+      "batch": 2,
+      "text": "实验开始前，老师生动地介绍了实验物品，并引导青少年思考红绿灯的工作原理，激发了他们浓厚的探索兴趣。",
+      "batch_processing_time": 15.214709043502808,
+      "audio_length": 8.06,
+      "batch_rtf": 1.6300015944498414,
+      "output_file": "rtf_test_results/test_007.wav"
+    },
+    {
+      "index": 8,
+      "batch": 2,
+      "text": "在老师的指导下，青少年们开始动手组装电路，将红绿灯的各个部件连接起来。",
+      "batch_processing_time": 15.214709043502808,
+      "audio_length": 6.22,
+      "batch_rtf": 1.6300015944498414,
+      "output_file": "rtf_test_results/test_008.wav"
+    },
+    {
+      "index": 9,
+      "batch": 3,
+      "text": "他们小心翼翼地调整电路，确保每个部件都正确连接，红灯、绿灯、黄灯依次亮起，仿佛在讲述一个关于交通规则的故事。",
+      "batch_processing_time": 17.258368730545044,
+      "audio_length": 8.92,
+      "batch_rtf": 1.4694320416921058,
+      "output_file": "rtf_test_results/test_009.wav"
+    },
+    {
+      "index": 10,
+      "batch": 3,
+      "text": "实验过程中，青少年们不仅学到了电路知识，还体验到了动手实践的乐趣。",
+      "batch_processing_time": 17.258368730545044,
+      "audio_length": 5.68,
+      "batch_rtf": 1.4694320416921058,
+      "output_file": "rtf_test_results/test_010.wav"
+    },
+    {
+      "index": 11,
+      "batch": 3,
+      "text": "他们纷纷表示，这次实验不仅让他们对科技有了更深的理解，还培养了他们的创新思维和动手能力。",
+      "batch_processing_time": 17.258368730545044,
+      "audio_length": 6.86,
+      "batch_rtf": 1.4694320416921058,
+      "output_file": "rtf_test_results/test_011.wav"
+    },
+    {
+      "index": 12,
+      "batch": 3,
+      "text": "数智技术服务港，让科技触手可及，让创新无处不在。",
+      "batch_processing_time": 17.258368730545044,
+      "audio_length": 3.9,
+      "batch_rtf": 1.4694320416921058,
+      "output_file": "rtf_test_results/test_012.wav"
+    },
+    {
+      "index": 13,
+      "batch": 4,
+      "text": "人工智能技术正在快速发展，为各行各业带来了革命性的变化。",
+      "batch_processing_time": 11.172309398651123,
+      "audio_length": 4.96,
+      "batch_rtf": 1.8545852303823243,
+      "output_file": "rtf_test_results/test_013.wav"
+    },
+    {
+      "index": 14,
+      "batch": 4,
+      "text": "深度学习模型在语音识别、图像处理、自然语言处理等领域取得了突破性进展。",
+      "batch_processing_time": 11.172309398651123,
+      "audio_length": 5.72,
+      "batch_rtf": 1.8545852303823243,
+      "output_file": "rtf_test_results/test_014.wav"
+    },
+    {
+      "index": 15,
+      "batch": 4,
+      "text": "机器学习算法能够从大量数据中学习模式，并做出准确的预测和决策。",
+      "batch_processing_time": 11.172309398651123,
+      "audio_length": 5.5,
+      "batch_rtf": 1.8545852303823243,
+      "output_file": "rtf_test_results/test_015.wav"
+    },
+    {
+      "index": 16,
+      "batch": 4,
+      "text": "神经网络模拟人脑的工作方式，通过多层神经元处理复杂的信息。",
+      "batch_processing_time": 11.172309398651123,
+      "audio_length": 4.54,
+      "batch_rtf": 1.8545852303823243,
+      "output_file": "rtf_test_results/test_016.wav"
+    },
+    {
+      "index": 17,
+      "batch": 5,
+      "text": "计算机视觉技术让机器能够理解和分析图像内容。",
+      "batch_processing_time": 7.102982044219971,
+      "audio_length": 3.6,
+      "batch_rtf": 1.9146887765353369,
+      "output_file": "rtf_test_results/test_017.wav"
+    },
+    {
+      "index": 18,
+      "batch": 5,
+      "text": "自然语言处理技术使计算机能够理解和生成人类语言。",
+      "batch_processing_time": 7.102982044219971,
+      "audio_length": 3.44,
+      "batch_rtf": 1.9146887765353369,
+      "output_file": "rtf_test_results/test_018.wav"
+    },
+    {
+      "index": 19,
+      "batch": 5,
+      "text": "语音合成技术将文本转换为自然的语音输出。",
+      "batch_processing_time": 7.102982044219971,
+      "audio_length": 3.42,
+      "batch_rtf": 1.9146887765353369,
+      "output_file": "rtf_test_results/test_019.wav"
+    },
+    {
+      "index": 20,
+      "batch": 5,
+      "text": "大数据分析帮助企业发现隐藏的模式和趋势。",
+      "batch_processing_time": 7.102982044219971,
+      "audio_length": 3.14,
+      "batch_rtf": 1.9146887765353369,
+      "output_file": "rtf_test_results/test_020.wav"
+    }
+  ]
+}

trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104655.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "test_info": {
+    "timestamp": "20250714_104655",
+    "device": "cuda:2",
+    "model_path": "/home/yueyulin/tmp/respark",
+    "batch_size": 4
+  },
+  "statistics": {
+    "total_tests": 20,
+    "successful_tests": 20,
+    "failed_tests": 0,
+    "batch_size": 4,
+    "total_batches": 5,
+    "total_processing_time": 97.07794642448425,
+    "total_audio_length": 126.58,
+    "total_rtf": 0.7669295814858924,
+    "avg_batch_rtf": 0.6971107903490739,
+    "avg_batch_processing_time": 19.41558928489685,
+    "avg_audio_length": 6.328999999999999,
+    "min_batch_rtf": 0.5344265722669661,
+    "max_batch_rtf": 1.088588070754268,
+    "std_batch_rtf": 0.20383940965077615
+  },
+  "detailed_results": [
+    {
+      "index": 1,
+      "batch": 1,
+      "text": "一九五二年二月十日，志愿军大英雄张积慧击落美军双料王牌飞行员戴维斯，在自己飞机坠毁处距离戴维斯坠机处不足五百米的情况下，取得了世界空战史不可能复制的奇迹。伟大的张积慧。",
+      "batch_processing_time": 45.067546129226685,
+      "audio_length": 14.48,
+      "batch_rtf": 1.088588070754268,
+      "output_file": "rtf_test_results/test_001.wav"
+    },
+    {
+      "index": 2,
+      "batch": 1,
+      "text": "在数字浪潮汹涌的今天，数智技术正以前所未有的力量重塑着社会的每一个角落。",
+      "batch_processing_time": 45.067546129226685,
+      "audio_length": 6.24,
+      "batch_rtf": 1.088588070754268,
+      "output_file": "rtf_test_results/test_002.wav"
+    },
+    {
+      "index": 3,
+      "batch": 1,
+      "text": "为了点燃青少年对科技的热情，培养他们的创新思维与动手能力",
+      "batch_processing_time": 45.067546129226685,
+      "audio_length": 5.6,
+      "batch_rtf": 1.088588070754268,
+      "output_file": "rtf_test_results/test_003.wav"
+    },
+    {
+      "index": 4,
+      "batch": 1,
+      "text": "杏花岭区巨轮街道社区教育学校携手中车社区教育分校，与太原市科学技术协会联手，于暑期精心策划了一场别开生面的青少年数智技术服务港探索之旅，吸引了众多社区青少年的积极参与。",
+      "batch_processing_time": 45.067546129226685,
+      "audio_length": 15.08,
+      "batch_rtf": 1.088588070754268,
+      "output_file": "rtf_test_results/test_004.wav"
+    },
+    {
+      "index": 5,
+      "batch": 2,
+      "text": "一踏入数智技术服务港的大门，一股浓厚的科技气息便扑面而来。",
+      "batch_processing_time": 16.357284545898438,
+      "audio_length": 4.8,
+      "batch_rtf": 0.6558654589373872,
+      "output_file": "rtf_test_results/test_005.wav"
+    },
+    {
+      "index": 6,
+      "batch": 2,
+      "text": "科普课堂上，“简易红绿灯”科学实验更是将抽象的电路原理与日常生活紧密相连。",
+      "batch_processing_time": 16.357284545898438,
+      "audio_length": 5.84,
+      "batch_rtf": 0.6558654589373872,
+      "output_file": "rtf_test_results/test_006.wav"
+    },
+    {
+      "index": 7,
+      "batch": 2,
+      "text": "实验开始前，老师生动地介绍了实验物品，并引导青少年思考红绿灯的工作原理，激发了他们浓厚的探索兴趣。",
+      "batch_processing_time": 16.357284545898438,
+      "audio_length": 8.6,
+      "batch_rtf": 0.6558654589373872,
+      "output_file": "rtf_test_results/test_007.wav"
+    },
+    {
+      "index": 8,
+      "batch": 2,
+      "text": "在老师的指导下，青少年们开始动手组装电路，将红绿灯的各个部件连接起来。",
+      "batch_processing_time": 16.357284545898438,
+      "audio_length": 5.7,
+      "batch_rtf": 0.6558654589373872,
+      "output_file": "rtf_test_results/test_008.wav"
+    },
+    {
+      "index": 9,
+      "batch": 3,
+      "text": "他们小心翼翼地调整电路，确保每个部件都正确连接，红灯、绿灯、黄灯依次亮起，仿佛在讲述一个关于交通规则的故事。",
+      "batch_processing_time": 16.95064425468445,
+      "audio_length": 9.0,
+      "batch_rtf": 0.6694567241186591,
+      "output_file": "rtf_test_results/test_009.wav"
+    },
+    {
+      "index": 10,
+      "batch": 3,
+      "text": "实验过程中，青少年们不仅学到了电路知识，还体验到了动手实践的乐趣。",
+      "batch_processing_time": 16.95064425468445,
+      "audio_length": 5.72,
+      "batch_rtf": 0.6694567241186591,
+      "output_file": "rtf_test_results/test_010.wav"
+    },
+    {
+      "index": 11,
+      "batch": 3,
+      "text": "他们纷纷表示，这次实验不仅让他们对科技有了更深的理解，还培养了他们的创新思维和动手能力。",
+      "batch_processing_time": 16.95064425468445,
+      "audio_length": 6.8,
+      "batch_rtf": 0.6694567241186591,
+      "output_file": "rtf_test_results/test_011.wav"
+    },
+    {
+      "index": 12,
+      "batch": 3,
+      "text": "数智技术服务港，让科技触手可及，让创新无处不在。",
+      "batch_processing_time": 16.95064425468445,
+      "audio_length": 3.8,
+      "batch_rtf": 0.6694567241186591,
+      "output_file": "rtf_test_results/test_012.wav"
+    },
+    {
+      "index": 13,
+      "batch": 4,
+      "text": "人工智能技术正在快速发展，为各行各业带来了革命性的变化。",
+      "batch_processing_time": 10.945056200027466,
+      "audio_length": 4.72,
+      "batch_rtf": 0.5344265722669661,
+      "output_file": "rtf_test_results/test_013.wav"
+    },
+    {
+      "index": 14,
+      "batch": 4,
+      "text": "深度学习模型在语音识别、图像处理、自然语言处理等领域取得了突破性进展。",
+      "batch_processing_time": 10.945056200027466,
+      "audio_length": 5.76,
+      "batch_rtf": 0.5344265722669661,
+      "output_file": "rtf_test_results/test_014.wav"
+    },
+    {
+      "index": 15,
+      "batch": 4,
+      "text": "机器学习算法能够从大量数据中学习模式，并做出准确的预测和决策。",
+      "batch_processing_time": 10.945056200027466,
+      "audio_length": 4.98,
+      "batch_rtf": 0.5344265722669661,
+      "output_file": "rtf_test_results/test_015.wav"
+    },
+    {
+      "index": 16,
+      "batch": 4,
+      "text": "神经网络模拟人脑的工作方式，通过多层神经元处理复杂的信息。",
+      "batch_processing_time": 10.945056200027466,
+      "audio_length": 5.02,
+      "batch_rtf": 0.5344265722669661,
+      "output_file": "rtf_test_results/test_016.wav"
+    },
+    {
+      "index": 17,
+      "batch": 5,
+      "text": "计算机视觉技术让机器能够理解和分析图像内容。",
+      "batch_processing_time": 7.757415294647217,
+      "audio_length": 3.82,
+      "batch_rtf": 0.5372171256680899,
+      "output_file": "rtf_test_results/test_017.wav"
+    },
+    {
+      "index": 18,
+      "batch": 5,
+      "text": "自然语言处理技术使计算机能够理解和生成人类语言。",
+      "batch_processing_time": 7.757415294647217,
+      "audio_length": 3.68,
+      "batch_rtf": 0.5372171256680899,
+      "output_file": "rtf_test_results/test_018.wav"
+    },
+    {
+      "index": 19,
+      "batch": 5,
+      "text": "语音合成技术将文本转换为自然的语音输出。",
+      "batch_processing_time": 7.757415294647217,
+      "audio_length": 3.34,
+      "batch_rtf": 0.5372171256680899,
+      "output_file": "rtf_test_results/test_019.wav"
+    },
+    {
+      "index": 20,
+      "batch": 5,
+      "text": "大数据分析帮助企业发现隐藏的模式和趋势。",
+      "batch_processing_time": 7.757415294647217,
+      "audio_length": 3.6,
+      "batch_rtf": 0.5372171256680899,
+      "output_file": "rtf_test_results/test_020.wav"
+    }
+  ]
+}

trained_190k_steps/rtf_test_results/rtf_test_results_20250714_104853.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "test_info": {
+    "timestamp": "20250714_104853",
+    "device": "cuda:2",
+    "model_path": "/home/yueyulin/tmp/respark",
+    "batch_size": 8
+  },
+  "statistics": {
+    "total_tests": 20,
+    "successful_tests": 20,
+    "failed_tests": 0,
+    "batch_size": 8,
+    "total_batches": 3,
+    "total_processing_time": 70.2043297290802,
+    "total_audio_length": 124.24,
+    "total_rtf": 0.5650702650441098,
+    "avg_batch_rtf": 0.5415599169688988,
+    "avg_batch_processing_time": 26.568262767791747,
+    "avg_audio_length": 6.212,
+    "min_batch_rtf": 0.3969690210187983,
+    "max_batch_rtf": 0.6930763216404902,
+    "std_batch_rtf": 0.13260418306880234
+  },
+  "detailed_results": [
+    {
+      "index": 1,
+      "batch": 1,
+      "text": "一九五二年二月十日，志愿军大英雄张积慧击落美军双料王牌飞行员戴维斯，在自己飞机坠毁处距离戴维斯坠机处不足五百米的情况下，取得了世界空战史不可能复制的奇迹。伟大的张积慧。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 14.36,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_001.wav"
+    },
+    {
+      "index": 2,
+      "batch": 1,
+      "text": "在数字浪潮汹涌的今天，数智技术正以前所未有的力量重塑着社会的每一个角落。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 5.54,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_002.wav"
+    },
+    {
+      "index": 3,
+      "batch": 1,
+      "text": "为了点燃青少年对科技的热情，培养他们的创新思维与动手能力",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 4.2,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_003.wav"
+    },
+    {
+      "index": 4,
+      "batch": 1,
+      "text": "杏花岭区巨轮街道社区教育学校携手中车社区教育分校，与太原市科学技术协会联手，于暑期精心策划了一场别开生面的青少年数智技术服务港探索之旅，吸引了众多社区青少年的积极参与。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 14.58,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_004.wav"
+    },
+    {
+      "index": 5,
+      "batch": 1,
+      "text": "一踏入数智技术服务港的大门，一股浓厚的科技气息便扑面而来。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 4.62,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_005.wav"
+    },
+    {
+      "index": 6,
+      "batch": 1,
+      "text": "科普课堂上，“简易红绿灯”科学实验更是将抽象的电路原理与日常生活紧密相连。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 7.0,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_006.wav"
+    },
+    {
+      "index": 7,
+      "batch": 1,
+      "text": "实验开始前，老师生动地介绍了实验物品，并引导青少年思考红绿灯的工作原理，激发了他们浓厚的探索兴趣。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 8.26,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_007.wav"
+    },
+    {
+      "index": 8,
+      "batch": 1,
+      "text": "在老师的指导下，青少年们开始动手组装电路，将红绿灯的各个部件连接起来。",
+      "batch_processing_time": 44.49549984931946,
+      "audio_length": 5.64,
+      "batch_rtf": 0.6930763216404902,
+      "output_file": "rtf_test_results/test_008.wav"
+    },
+    {
+      "index": 9,
+      "batch": 2,
+      "text": "他们小心翼翼地调整电路，确保每个部件都正确连接，红灯、绿灯、黄灯依次亮起，仿佛在讲述一个关于交通规则的故事。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 9.36,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_009.wav"
+    },
+    {
+      "index": 10,
+      "batch": 2,
+      "text": "实验过程中，青少年们不仅学到了电路知识，还体验到了动手实践的乐趣。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 5.6,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_010.wav"
+    },
+    {
+      "index": 11,
+      "batch": 2,
+      "text": "他们纷纷表示，这次实验不仅让他们对科技有了更深的理解，还培养了他们的创新思维和动手能力。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 6.96,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_011.wav"
+    },
+    {
+      "index": 12,
+      "batch": 2,
+      "text": "数智技术服务港，让科技触手可及，让创新无处不在。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 3.88,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_012.wav"
+    },
+    {
+      "index": 13,
+      "batch": 2,
+      "text": "人工智能技术正在快速发展，为各行各业带来了革命性的变化。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 4.86,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_013.wav"
+    },
+    {
+      "index": 14,
+      "batch": 2,
+      "text": "深度学习模型在语音识别、图像处理、自然语言处理等领域取得了突破性进展。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 4.92,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_014.wav"
+    },
+    {
+      "index": 15,
+      "batch": 2,
+      "text": "机器学习算法能够从大量数据中学习模式，并做出准确的预测和决策。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 5.7,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_015.wav"
+    },
+    {
+      "index": 16,
+      "batch": 2,
+      "text": "神经网络模拟人脑的工作方式，通过多层神经元处理复杂的信息。",
+      "batch_processing_time": 18.141484260559082,
+      "audio_length": 4.42,
+      "batch_rtf": 0.3969690210187983,
+      "output_file": "rtf_test_results/test_016.wav"
+    },
+    {
+      "index": 17,
+      "batch": 3,
+      "text": "计算机视觉技术让机器能够理解和分析图像内容。",
+      "batch_processing_time": 7.56734561920166,
+      "audio_length": 3.74,
+      "batch_rtf": 0.5277088995259177,
+      "output_file": "rtf_test_results/test_017.wav"
+    },
+    {
+      "index": 18,
+      "batch": 3,
+      "text": "自然语言处理技术使计算机能够理解和生成人类语言。",
+      "batch_processing_time": 7.56734561920166,
+      "audio_length": 3.86,
+      "batch_rtf": 0.5277088995259177,
+      "output_file": "rtf_test_results/test_018.wav"
+    },
+    {
+      "index": 19,
+      "batch": 3,
+      "text": "语音合成技术将文本转换为自然的语音输出。",
+      "batch_processing_time": 7.56734561920166,
+      "audio_length": 3.38,
+      "batch_rtf": 0.5277088995259177,
+      "output_file": "rtf_test_results/test_019.wav"
+    },
+    {
+      "index": 20,
+      "batch": 3,
+      "text": "大数据分析帮助企业发现隐藏的模式和趋势。",
+      "batch_processing_time": 7.56734561920166,
+      "audio_length": 3.36,
+      "batch_rtf": 0.5277088995259177,
+      "output_file": "rtf_test_results/test_020.wav"
+    }
+  ]
+}

trained_190k_steps/rtf_test_results/test_001.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:acddee462cbe93cfae2db94a0987ab88ed594188ca1bc4ac816c33e91cc9e13d
+size 459564

trained_190k_steps/rtf_test_results/test_002.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6361fee9642fd977e0981276d86d5d58f87100d7309a595035a70731d1671589
+size 177324

trained_190k_steps/rtf_test_results/test_003.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4feb2c3b9a8e330a0300fbaeef3f6c4a8d7adb39ac22104808a83a4f51e57f2
+size 134444

trained_190k_steps/rtf_test_results/test_004.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f3689553a60a8153d5c34c14701905862c79744a10f8e0e1a12866b08121116
+size 466604

trained_190k_steps/rtf_test_results/test_005.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:caa1ab23c8dc026655270e0a8132e6528794f28eab7416d55f7f915608c16e92
+size 147884

trained_190k_steps/rtf_test_results/test_006.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ec9ee466a4439eff14acab039c2aa5a9effa4743a0846c7bdcb79d33f5358712
+size 224044

trained_190k_steps/rtf_test_results/test_007.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a5ff6890b46d01626477bdc6a9908e0bd9647978d76278374391423728299d77
+size 264364

trained_190k_steps/rtf_test_results/test_008.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b79aacacde6ecc15b85a8c98f9a8fcaac3ad0a57a64ec5a7e8d7e18461468852
+size 180524

trained_190k_steps/rtf_test_results/test_009.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:664f58297326d99c6c4c74601674f825623bb854970ade90e1aa76ace913fa0b
+size 299564

trained_190k_steps/rtf_test_results/test_010.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7fe5c8d90a944d990a84dc0488cbc6f5f182f8c92c73487c6e7564956b87da72
+size 179244

trained_190k_steps/rtf_test_results/test_011.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:decd4ce16f819b4f332aeb5abd7eb5920dc09ef1b7760652b59a028657487913
+size 222764

trained_190k_steps/rtf_test_results/test_012.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ad1cb092212b7ef32d1c801a5432383c5e1b5b9ac56df997c8c3089884cd5c9
+size 124204

trained_190k_steps/rtf_test_results/test_013.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:40cf22639096c82432678578973352c31e07f662f7f4aace8bffd81ecca3559f
+size 155564

trained_190k_steps/rtf_test_results/test_014.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19b3793fb37bd806ca97ef02463a6c0870cb3e5bb42a6df9815c558ed7a117bb
+size 157484