Update README.md
Browse files
README.md
CHANGED
@@ -1,3 +1,60 @@
|
|
1 |
-
---
|
2 |
-
license: apache-2.0
|
3 |
-
---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: apache-2.0
|
3 |
+
---
|
4 |
+
** Model Detail
|
5 |
+
|
6 |
+
* Model type:
|
7 |
+
RWKV7-0.4B-wavlmLarge-ENASR-demo is an opensource English ASR Model trained using RWKV7 architecture and WavLM-Large Encoder.
|
8 |
+
|
9 |
+
* Model date: Mar,2025
|
10 |
+
|
11 |
+
* Paper or resources for more information: https://github.com/JL-er/WorldRWKV
|
12 |
+
|
13 |
+
* Where to send questions or comments about the model: https://github.com/JL-er/WorldRWKV/issues
|
14 |
+
|
15 |
+
** Training Time
|
16 |
+
1 hrs 4*4090
|
17 |
+
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
** Inference
|
22 |
+
|
23 |
+
```
|
24 |
+
from infer.worldmodel import Worldinfer
|
25 |
+
import librosa
|
26 |
+
import numpy as np
|
27 |
+
import soundfile as sf
|
28 |
+
|
29 |
+
# 模型路径
|
30 |
+
llm_path = '/home/rwkv/model/rwkv7-0.4b-wavlmlarge-enasr-demo.pth'
|
31 |
+
encoder_path = '/home/rwkv/model/facebookhubert-large-ls960-ft'
|
32 |
+
encoder_type = 'speech'
|
33 |
+
|
34 |
+
# 初始化模型
|
35 |
+
model = Worldinfer(model_path=llm_path, encoder_type=encoder_type, encoder_path=encoder_path)
|
36 |
+
|
37 |
+
# 加载音频文件
|
38 |
+
audio_path = './test_audio.wav'
|
39 |
+
audio_data, sample_rate = sf.read(audio_path)
|
40 |
+
|
41 |
+
# 确保音频是单声道
|
42 |
+
if len(audio_data.shape) > 1:
|
43 |
+
audio_data = audio_data[:, 0]
|
44 |
+
|
45 |
+
# 检查并转换音频数据为浮点数格式
|
46 |
+
if audio_data.dtype != np.float32 and audio_data.dtype != np.float64:
|
47 |
+
audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max
|
48 |
+
|
49 |
+
# 重采样到 16000 Hz
|
50 |
+
resampled_audio = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
|
51 |
+
|
52 |
+
# 构造提示文本
|
53 |
+
text = '\x16Assistant:'
|
54 |
+
|
55 |
+
# 生成结果
|
56 |
+
result, _ = model.generate(text, resampled_audio)
|
57 |
+
|
58 |
+
print(result)
|
59 |
+
|
60 |
+
```
|