yangxiaoyu6 commited on
Commit
bfb7c5a
·
1 Parent(s): 57173c7

add new files init from a later checkpoint

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. from_iter_468000/epoch-16-avg-6.pt +3 -0
  2. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  3. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  4. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  5. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  6. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  7. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  8. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  9. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  10. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  11. from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  12. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  13. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  14. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  15. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  16. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  17. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  18. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  19. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  20. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  21. from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  22. from_iter_468000/greedy_search/log-decode-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-27-49 +30 -0
  23. from_iter_468000/greedy_search/log-decode-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-28-20 +28 -0
  24. from_iter_468000/greedy_search/log-decode-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-26-06 +30 -0
  25. from_iter_468000/greedy_search/log-decode-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-26-34 +29 -0
  26. from_iter_468000/greedy_search/log-decode-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-24-24 +28 -0
  27. from_iter_468000/greedy_search/log-decode-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-24-47 +29 -0
  28. from_iter_468000/greedy_search/log-decode-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-22-41 +27 -0
  29. from_iter_468000/greedy_search/log-decode-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-23-01 +30 -0
  30. from_iter_468000/greedy_search/log-decode-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-21-00 +29 -0
  31. from_iter_468000/greedy_search/log-decode-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-21-14 +28 -0
  32. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  33. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  34. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  35. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  36. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  37. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  38. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  39. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  40. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  41. from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  42. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  43. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  44. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  45. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  46. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  47. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  48. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  49. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
  50. from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt +0 -0
from_iter_468000/epoch-16-avg-6.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb5d7e339dda9d20c0dade91c25733fab61308976feb1e634082acb461a4128a
3
+ size 266408001
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/log-decode-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-27-49 ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:27:49,290 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:27:49,290 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:27:49,292 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 3, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '16', 'left_context_frames': '128', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:27:49,293 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:27:49,660 INFO [decode.py:949] Calculating the averaged model over epoch range from 10 (excluded) to 13
6
+ 2024-08-23 00:27:56,195 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:27:56,196 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:27:56,381 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:28:01,442 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:28:01,492 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.4349, 3.2697, 4.1474, 3.9908], device='cuda:0')
11
+ 2024-08-23 00:28:44,018 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:28:45,042 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:28:45,109 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.28% [1725 / 52576, 174 ins, 157 del, 1394 sub ]
14
+ 2024-08-23 00:28:45,239 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:28:45,241 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.28 best for test-clean
18
+
19
+ 2024-08-23 00:28:46,648 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:28:47,712 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([2.6659, 2.2333, 1.7476, 2.3375], device='cuda:0')
21
+ 2024-08-23 00:29:10,952 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.4806, 3.9500, 4.2244, 4.3414], device='cuda:0')
22
+ 2024-08-23 00:29:27,691 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
23
+ 2024-08-23 00:29:27,792 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:29:27,862 INFO [utils.py:641] [test-other-greedy_search] %WER 8.61% [4507 / 52343, 415 ins, 477 del, 3615 sub ]
25
+ 2024-08-23 00:29:27,998 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
26
+ 2024-08-23 00:29:28,000 INFO [decode.py:773]
27
+ For test-other, WER of different settings are:
28
+ greedy_search 8.61 best for test-other
29
+
30
+ 2024-08-23 00:29:28,000 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-28-20 ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:28:20,794 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:28:20,794 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:28:20,796 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 3, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:28:20,797 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:28:21,165 INFO [decode.py:949] Calculating the averaged model over epoch range from 10 (excluded) to 13
6
+ 2024-08-23 00:28:28,191 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:28:28,192 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:28:28,385 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:28:33,200 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:28:38,096 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.6084, 2.7137, 3.1132, 3.2383], device='cuda:0')
11
+ 2024-08-23 00:29:17,931 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:29:18,333 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:29:18,395 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.08% [1617 / 52576, 167 ins, 134 del, 1316 sub ]
14
+ 2024-08-23 00:29:18,521 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:29:18,523 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.08 best for test-clean
18
+
19
+ 2024-08-23 00:29:19,806 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:30:00,675 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
21
+ 2024-08-23 00:30:00,790 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
22
+ 2024-08-23 00:30:00,860 INFO [utils.py:641] [test-other-greedy_search] %WER 7.87% [4118 / 52343, 376 ins, 392 del, 3350 sub ]
23
+ 2024-08-23 00:30:00,992 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:30:00,994 INFO [decode.py:773]
25
+ For test-other, WER of different settings are:
26
+ greedy_search 7.87 best for test-other
27
+
28
+ 2024-08-23 00:30:00,994 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-26-06 ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:26:06,874 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:26:06,875 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:26:06,877 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 4, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '16', 'left_context_frames': '128', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:26:06,877 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:26:07,248 INFO [decode.py:949] Calculating the averaged model over epoch range from 9 (excluded) to 13
6
+ 2024-08-23 00:26:14,149 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:26:14,149 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:26:14,318 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:26:19,061 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:26:56,993 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.0461, 2.4794, 2.0142, 2.5891], device='cuda:0')
11
+ 2024-08-23 00:27:01,209 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:27:02,255 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:27:02,323 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.29% [1728 / 52576, 174 ins, 159 del, 1395 sub ]
14
+ 2024-08-23 00:27:02,451 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:27:02,453 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.29 best for test-clean
18
+
19
+ 2024-08-23 00:27:03,762 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:27:19,596 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.3698, 3.1105, 3.1383, 3.2248, 2.7247, 3.0833, 3.1983, 3.2349],
21
+ device='cuda:0')
22
+ 2024-08-23 00:27:44,721 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
23
+ 2024-08-23 00:27:44,810 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:27:44,877 INFO [utils.py:641] [test-other-greedy_search] %WER 8.62% [4511 / 52343, 411 ins, 484 del, 3616 sub ]
25
+ 2024-08-23 00:27:45,009 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
26
+ 2024-08-23 00:27:45,014 INFO [decode.py:773]
27
+ For test-other, WER of different settings are:
28
+ greedy_search 8.62 best for test-other
29
+
30
+ 2024-08-23 00:27:45,014 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-26-34 ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:26:34,058 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:26:34,059 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:26:34,060 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 4, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:26:34,061 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:26:34,452 INFO [decode.py:949] Calculating the averaged model over epoch range from 9 (excluded) to 13
6
+ 2024-08-23 00:26:41,575 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:26:41,575 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:26:41,740 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:26:47,278 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:27:26,454 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.4295, 4.2030, 3.7386, 3.4471], device='cuda:0')
11
+ 2024-08-23 00:27:32,883 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:27:33,907 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:27:33,976 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.12% [1642 / 52576, 172 ins, 134 del, 1336 sub ]
14
+ 2024-08-23 00:27:34,106 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:27:34,109 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.12 best for test-clean
18
+
19
+ 2024-08-23 00:27:35,492 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:28:14,094 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.1769, 3.9642, 3.4593, 3.3649], device='cuda:0')
21
+ 2024-08-23 00:28:16,400 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
22
+ 2024-08-23 00:28:16,507 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
23
+ 2024-08-23 00:28:16,576 INFO [utils.py:641] [test-other-greedy_search] %WER 7.90% [4136 / 52343, 375 ins, 398 del, 3363 sub ]
24
+ 2024-08-23 00:28:16,710 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
25
+ 2024-08-23 00:28:16,713 INFO [decode.py:773]
26
+ For test-other, WER of different settings are:
27
+ greedy_search 7.9 best for test-other
28
+
29
+ 2024-08-23 00:28:16,713 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-24-24 ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:24:24,456 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:24:24,457 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:24:24,459 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 5, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '16', 'left_context_frames': '128', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:24:24,459 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:24:24,827 INFO [decode.py:949] Calculating the averaged model over epoch range from 8 (excluded) to 13
6
+ 2024-08-23 00:24:31,735 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:24:31,736 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:24:31,914 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:24:36,741 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:25:17,890 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.4859, 3.9571, 4.2021, 4.3285], device='cuda:0')
11
+ 2024-08-23 00:25:18,350 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:25:19,397 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:25:19,466 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.28% [1726 / 52576, 169 ins, 160 del, 1397 sub ]
14
+ 2024-08-23 00:25:19,595 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:25:19,598 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.28 best for test-clean
18
+
19
+ 2024-08-23 00:25:20,545 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:26:02,328 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
21
+ 2024-08-23 00:26:02,412 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
22
+ 2024-08-23 00:26:02,482 INFO [utils.py:641] [test-other-greedy_search] %WER 8.62% [4510 / 52343, 420 ins, 473 del, 3617 sub ]
23
+ 2024-08-23 00:26:02,616 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:26:02,619 INFO [decode.py:773]
25
+ For test-other, WER of different settings are:
26
+ greedy_search 8.62 best for test-other
27
+
28
+ 2024-08-23 00:26:02,620 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-24-47 ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:24:47,740 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:24:47,741 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:24:47,743 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 5, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:24:47,743 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:24:48,120 INFO [decode.py:949] Calculating the averaged model over epoch range from 8 (excluded) to 13
6
+ 2024-08-23 00:24:55,032 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:24:55,033 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:24:55,203 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:25:00,515 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:25:04,206 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([1.9642, 3.0172, 1.9752, 1.9577], device='cuda:0')
11
+ 2024-08-23 00:25:45,723 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:25:46,090 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:25:46,158 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.14% [1649 / 52576, 172 ins, 142 del, 1335 sub ]
14
+ 2024-08-23 00:25:46,286 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:25:46,288 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.14 best for test-clean
18
+
19
+ 2024-08-23 00:25:47,499 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:26:23,437 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([2.9858, 3.2872, 3.9612, 3.7919], device='cuda:0')
21
+ 2024-08-23 00:26:29,630 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
22
+ 2024-08-23 00:26:29,740 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
23
+ 2024-08-23 00:26:29,812 INFO [utils.py:641] [test-other-greedy_search] %WER 7.90% [4137 / 52343, 374 ins, 404 del, 3359 sub ]
24
+ 2024-08-23 00:26:29,947 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
25
+ 2024-08-23 00:26:29,949 INFO [decode.py:773]
26
+ For test-other, WER of different settings are:
27
+ greedy_search 7.9 best for test-other
28
+
29
+ 2024-08-23 00:26:29,949 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-22-41 ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:22:41,267 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:22:41,267 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:22:41,269 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 6, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '16', 'left_context_frames': '128', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:22:41,270 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:22:41,635 INFO [decode.py:949] Calculating the averaged model over epoch range from 7 (excluded) to 13
6
+ 2024-08-23 00:22:48,369 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:22:48,369 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:22:48,538 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:22:52,830 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:23:35,600 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
11
+ 2024-08-23 00:23:36,692 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
12
+ 2024-08-23 00:23:36,762 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.26% [1715 / 52576, 167 ins, 155 del, 1393 sub ]
13
+ 2024-08-23 00:23:36,891 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
14
+ 2024-08-23 00:23:36,893 INFO [decode.py:773]
15
+ For test-clean, WER of different settings are:
16
+ greedy_search 3.26 best for test-clean
17
+
18
+ 2024-08-23 00:23:38,037 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
19
+ 2024-08-23 00:24:20,128 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
20
+ 2024-08-23 00:24:20,217 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
21
+ 2024-08-23 00:24:20,287 INFO [utils.py:641] [test-other-greedy_search] %WER 8.61% [4509 / 52343, 416 ins, 469 del, 3624 sub ]
22
+ 2024-08-23 00:24:20,420 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
23
+ 2024-08-23 00:24:20,423 INFO [decode.py:773]
24
+ For test-other, WER of different settings are:
25
+ greedy_search 8.61 best for test-other
26
+
27
+ 2024-08-23 00:24:20,423 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-23-01 ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:23:01,107 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:23:01,107 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:23:01,109 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 6, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:23:01,109 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:23:01,479 INFO [decode.py:949] Calculating the averaged model over epoch range from 7 (excluded) to 13
6
+ 2024-08-23 00:23:08,086 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:23:08,087 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:23:08,356 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:23:13,563 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:23:58,756 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
11
+ 2024-08-23 00:23:59,843 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
12
+ 2024-08-23 00:23:59,914 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.14% [1653 / 52576, 171 ins, 143 del, 1339 sub ]
13
+ 2024-08-23 00:24:00,045 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
14
+ 2024-08-23 00:24:00,047 INFO [decode.py:773]
15
+ For test-clean, WER of different settings are:
16
+ greedy_search 3.14 best for test-clean
17
+
18
+ 2024-08-23 00:24:01,214 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
19
+ 2024-08-23 00:24:20,390 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.8945, 3.7153, 3.3283, 3.2425], device='cuda:0')
20
+ 2024-08-23 00:24:25,058 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([4.5085, 4.0007, 4.1696, 4.3249], device='cuda:0')
21
+ 2024-08-23 00:24:35,783 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.5111, 3.3380, 2.8093, 3.1196], device='cuda:0')
22
+ 2024-08-23 00:24:43,097 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
23
+ 2024-08-23 00:24:43,206 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:24:43,285 INFO [utils.py:641] [test-other-greedy_search] %WER 7.89% [4129 / 52343, 376 ins, 398 del, 3355 sub ]
25
+ 2024-08-23 00:24:43,478 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
26
+ 2024-08-23 00:24:43,481 INFO [decode.py:773]
27
+ For test-other, WER of different settings are:
28
+ greedy_search 7.89 best for test-other
29
+
30
+ 2024-08-23 00:24:43,481 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-21-00 ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:21:00,949 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:21:00,949 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:21:00,951 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 7, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '16', 'left_context_frames': '128', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:21:00,951 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:21:01,324 INFO [decode.py:949] Calculating the averaged model over epoch range from 6 (excluded) to 13
6
+ 2024-08-23 00:21:07,357 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:21:07,357 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:21:07,531 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:21:12,057 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:21:45,145 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.8526, 3.6594, 3.2563, 3.0291], device='cuda:0')
11
+ 2024-08-23 00:21:54,103 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:21:55,082 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:21:55,148 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.27% [1719 / 52576, 167 ins, 149 del, 1403 sub ]
14
+ 2024-08-23 00:21:55,278 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:21:55,281 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.27 best for test-clean
18
+
19
+ 2024-08-23 00:21:56,580 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:22:33,798 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([3.1266, 2.5931, 2.0217, 2.6864], device='cuda:0')
21
+ 2024-08-23 00:22:37,009 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
22
+ 2024-08-23 00:22:37,098 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
23
+ 2024-08-23 00:22:37,168 INFO [utils.py:641] [test-other-greedy_search] %WER 8.62% [4511 / 52343, 409 ins, 482 del, 3620 sub ]
24
+ 2024-08-23 00:22:37,301 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt
25
+ 2024-08-23 00:22:37,304 INFO [decode.py:773]
26
+ For test-other, WER of different settings are:
27
+ greedy_search 8.62 best for test-other
28
+
29
+ 2024-08-23 00:22:37,304 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/log-decode-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model-2024-08-23-00-21-14 ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2024-08-23 00:21:14,879 INFO [decode.py:855] Decoding started
2
+ 2024-08-23 00:21:14,879 INFO [decode.py:861] Device: cuda:0
3
+ 2024-08-23 00:21:14,881 INFO [decode.py:879] {'best_train_loss': inf, 'best_valid_loss': inf, 'best_train_epoch': -1, 'best_valid_epoch': -1, 'batch_idx_train': 0, 'log_interval': 50, 'reset_interval': 200, 'valid_interval': 3000, 'feature_dim': 80, 'subsampling_factor': 4, 'warm_step': 2000, 'env_info': {'k2-version': '1.24.3', 'k2-build-type': 'Release', 'k2-with-cuda': True, 'k2-git-sha1': 'e400fa3b456faf8afe0ee5bfe572946b4921a3db', 'k2-git-date': 'Sat Jul 15 04:21:50 2023', 'lhotse-version': '1.16.0', 'torch-version': '2.0.1+cu117', 'torch-cuda-available': True, 'torch-cuda-version': '11.7', 'python-version': '3.9', 'icefall-git-branch': 'multi_KD_with_wenet', 'icefall-git-sha1': 'a932ad6d-clean', 'icefall-git-date': 'Wed Aug 21 18:06:09 2024', 'icefall-path': '/xy/mnt/yangxiaoyu/workspace/icefall_multi_KD', 'k2-path': '/root/anaconda3/lib/python3.9/site-packages/k2/__init__.py', 'lhotse-path': '/root/anaconda3/lib/python3.9/site-packages/lhotse/__init__.py', 'hostname': 'NGK_xiaoyu'}, 'epoch': 13, 'iter': 0, 'avg': 7, 'use_averaged_model': True, 'exp_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further'), 'use_bpe': True, 'bpe_model': 'data/lang_bpe_500/bpe.model', 'lang_dir': PosixPath('data/lang_char'), 'decoding_method': 'greedy_search', 'beam_size': 4, 'beam': 20.0, 'ngram_lm_scale': 0.01, 'max_contexts': 8, 'max_states': 64, 'context_size': 2, 'max_sym_per_frame': 1, 'num_paths': 200, 'nbest_scale': 0.5, 'use_shallow_fusion': False, 'lm_type': 'rnn', 'lm_scale': 0.3, 'tokens_ngram': 2, 'backoff_id': 500, 'context_score': 2, 'context_file': '', 'num_encoder_layers': '2,2,3,4,3,2', 'downsampling_factor': '1,2,4,8,4,2', 'feedforward_dim': '512,768,1024,1536,1024,768', 'num_heads': '4,4,4,8,4,4', 'encoder_dim': '192,256,384,512,384,256', 'query_head_dim': '32', 'value_head_dim': '12', 'pos_head_dim': '4', 'pos_dim': 48, 'encoder_unmasked_dim': '192,192,256,256,256,192', 'cnn_module_kernel': '31,31,15,15,15,31', 'decoder_dim': 512, 'joiner_dim': 512, 'causal': True, 'chunk_size': '32', 'left_context_frames': '256', 'use_transducer': True, 'use_ctc': False, 'do_sv': False, 'speaker_input_idx': -1, 'do_audio_tagging': False, 'audio_tagging_KD': False, 'use_encoder_projection': True, 'encoder_projection_dim': 2560, 'freeze_encoder': False, 'freezing_encoder_layer_index': '-1', 'freeze_encoder_steps': -1, 'sync_other_tasks': False, 'encoder_lr_scale': 1.0, 'beats_lr_scale': 1.0, 'ecapa_lr_scale': 1.0, 'beats_label': False, 'full_libri': True, 'mini_libri': False, 'manifest_dir': PosixPath('data/fbank'), 'max_duration': 500, 'bucketing_sampler': True, 'num_buckets': 30, 'concatenate_cuts': False, 'duration_factor': 1.0, 'gap': 1.0, 'on_the_fly_feats': False, 'shuffle': True, 'drop_last': True, 'return_cuts': True, 'num_workers': 2, 'enable_spec_aug': True, 'spec_aug_time_warp_factor': 80, 'enable_musan': True, 'input_strategy': 'PrecomputedFeatures', 'drop_features': False, 'return_audio': False, 'use_beats': True, 'use_ecapa': True, 'use_whisper': True, 'use_librispeech': True, 'use_wenetspeech': False, 'use_voxceleb': False, 'voxceleb_subset': 'vox1', 'use_audioset': True, 'audioset_subset': 'balanced', 'whisper_version': 'small.en', 'lm_vocab_size': 500, 'lm_epoch': 7, 'lm_avg': 1, 'lm_exp_dir': None, 'rnn_lm_embedding_dim': 2048, 'rnn_lm_hidden_dim': 2048, 'rnn_lm_num_layers': 3, 'rnn_lm_tie_weights': True, 'transformer_lm_exp_dir': None, 'transformer_lm_dim_feedforward': 2048, 'transformer_lm_encoder_dim': 768, 'transformer_lm_embedding_dim': 768, 'transformer_lm_nhead': 8, 'transformer_lm_num_layers': 16, 'transformer_lm_tie_weights': True, 'res_dir': PosixPath('multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search'), 'has_contexts': False, 'suffix': 'epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model', 'blank_id': 0, 'vocab_size': 500}
4
+ 2024-08-23 00:21:14,881 INFO [decode.py:881] About to create model
5
+ 2024-08-23 00:21:15,344 INFO [decode.py:949] Calculating the averaged model over epoch range from 6 (excluded) to 13
6
+ 2024-08-23 00:21:22,385 INFO [decode.py:1044] Number of model parameters: 69496787
7
+ 2024-08-23 00:21:22,386 INFO [asr_datamodule.py:565] About to get test-clean cuts
8
+ 2024-08-23 00:21:22,555 INFO [asr_datamodule.py:572] About to get test-other cuts
9
+ 2024-08-23 00:21:26,865 INFO [decode.py:728] batch 0/?, cuts processed until now is 36
10
+ 2024-08-23 00:22:10,016 INFO [zipformer.py:1877] name=None, attn_weights_entropy = tensor([2.8180, 1.5594, 2.7805, 2.5478], device='cuda:0')
11
+ 2024-08-23 00:22:11,794 INFO [decode.py:728] batch 50/?, cuts processed until now is 2610
12
+ 2024-08-23 00:22:12,921 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
13
+ 2024-08-23 00:22:12,999 INFO [utils.py:641] [test-clean-greedy_search] %WER 3.14% [1650 / 52576, 171 ins, 144 del, 1335 sub ]
14
+ 2024-08-23 00:22:13,128 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
15
+ 2024-08-23 00:22:13,131 INFO [decode.py:773]
16
+ For test-clean, WER of different settings are:
17
+ greedy_search 3.14 best for test-clean
18
+
19
+ 2024-08-23 00:22:14,510 INFO [decode.py:728] batch 0/?, cuts processed until now is 43
20
+ 2024-08-23 00:22:56,304 INFO [decode.py:728] batch 50/?, cuts processed until now is 2939
21
+ 2024-08-23 00:22:56,401 INFO [decode.py:744] The transcripts are stored in multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
22
+ 2024-08-23 00:22:56,472 INFO [utils.py:641] [test-other-greedy_search] %WER 7.94% [4155 / 52343, 374 ins, 408 del, 3373 sub ]
23
+ 2024-08-23 00:22:56,604 INFO [decode.py:757] Wrote detailed error stats to multi_KD/exp_finetune_asr_libri1x6_do_AT1_unbalanced_KD_scale2.0_do_SV0_only_vox2_scale10.0_freeze_12000steps_encoder_lr_scale0.2_freeze_3layers_ecapa_lr_scale0.2_init_3_tasks_delta6_pretrain_avg_musan0_sync_task_md1000_amp_bf16_further/greedy_search/errs-test-other-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt
24
+ 2024-08-23 00:22:56,607 INFO [decode.py:773]
25
+ For test-other, WER of different settings are:
26
+ greedy_search 7.94 best for test-other
27
+
28
+ 2024-08-23 00:22:56,607 INFO [decode.py:1100] Done!
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-clean-greedy_search-epoch-13-avg-7-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-3-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-4-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-5-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-6-chunk-32-left-context-256-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff
 
from_iter_468000/greedy_search/recogs-test-other-greedy_search-epoch-13-avg-7-chunk-16-left-context-128-context-2-max-sym-per-frame-1-use-averaged-model.txt ADDED
The diff for this file is too large to render. See raw diff