SupYumm commited on
Commit
3ce02a4
·
verified ·
1 Parent(s): 760db65

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +60 -3
README.md CHANGED
@@ -1,3 +1,60 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ ** Model Detail
5
+
6
+ * Model type:
7
+ RWKV7-0.4B-wavlmLarge-ENASR-demo is an opensource English ASR Model trained using RWKV7 architecture and WavLM-Large Encoder.
8
+
9
+ * Model date: Mar,2025
10
+
11
+ * Paper or resources for more information: https://github.com/JL-er/WorldRWKV
12
+
13
+ * Where to send questions or comments about the model: https://github.com/JL-er/WorldRWKV/issues
14
+
15
+ ** Training Time
16
+ 1 hrs 4*4090
17
+
18
+
19
+
20
+
21
+ ** Inference
22
+
23
+ ```
24
+ from infer.worldmodel import Worldinfer
25
+ import librosa
26
+ import numpy as np
27
+ import soundfile as sf
28
+
29
+ # 模型路径
30
+ llm_path = '/home/rwkv/model/rwkv7-0.4b-wavlmlarge-enasr-demo.pth'
31
+ encoder_path = '/home/rwkv/model/facebookhubert-large-ls960-ft'
32
+ encoder_type = 'speech'
33
+
34
+ # 初始化模型
35
+ model = Worldinfer(model_path=llm_path, encoder_type=encoder_type, encoder_path=encoder_path)
36
+
37
+ # 加载音频文件
38
+ audio_path = './test_audio.wav'
39
+ audio_data, sample_rate = sf.read(audio_path)
40
+
41
+ # 确保音频是单声道
42
+ if len(audio_data.shape) > 1:
43
+ audio_data = audio_data[:, 0]
44
+
45
+ # 检查并转换音频数据为浮点数格式
46
+ if audio_data.dtype != np.float32 and audio_data.dtype != np.float64:
47
+ audio_data = audio_data.astype(np.float32) / np.iinfo(audio_data.dtype).max
48
+
49
+ # 重采样到 16000 Hz
50
+ resampled_audio = librosa.resample(audio_data, orig_sr=sample_rate, target_sr=16000)
51
+
52
+ # 构造提示文本
53
+ text = '\x16Assistant:'
54
+
55
+ # 生成结果
56
+ result, _ = model.generate(text, resampled_audio)
57
+
58
+ print(result)
59
+
60
+ ```