whr-a commited on
Commit
6236087
·
verified ·
1 Parent(s): 66ce1aa

Upload 28 files

Browse files
Files changed (28) hide show
  1. exp/codec_train_dac_large_v1.4_raw_fs16k/942epoch_test.pth +3 -0
  2. exp/codec_train_dac_large_v1.4_raw_fs16k/config.yaml +270 -0
  3. exp/codec_train_dac_large_v1.4_raw_fs16k/images/adv_loss.png +0 -0
  4. exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_commit_loss.png +0 -0
  5. exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_loss.png +0 -0
  6. exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_quantization_loss.png +0 -0
  7. exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_backward_time.png +0 -0
  8. exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_forward_time.png +0 -0
  9. exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_loss.png +0 -0
  10. exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_optim_step_time.png +0 -0
  11. exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_train_time.png +0 -0
  12. exp/codec_train_dac_large_v1.4_raw_fs16k/images/fake_loss.png +0 -0
  13. exp/codec_train_dac_large_v1.4_raw_fs16k/images/feat_match_loss.png +0 -0
  14. exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_backward_time.png +0 -0
  15. exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_forward_time.png +0 -0
  16. exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_optim_step_time.png +0 -0
  17. exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_train_time.png +0 -0
  18. exp/codec_train_dac_large_v1.4_raw_fs16k/images/gpu_max_cached_mem_GB.png +0 -0
  19. exp/codec_train_dac_large_v1.4_raw_fs16k/images/iter_time.png +0 -0
  20. exp/codec_train_dac_large_v1.4_raw_fs16k/images/loss.png +0 -0
  21. exp/codec_train_dac_large_v1.4_raw_fs16k/images/mel_loss.png +0 -0
  22. exp/codec_train_dac_large_v1.4_raw_fs16k/images/mel_loss_real.png +0 -0
  23. exp/codec_train_dac_large_v1.4_raw_fs16k/images/optim0_lr0.png +0 -0
  24. exp/codec_train_dac_large_v1.4_raw_fs16k/images/optim1_lr0.png +0 -0
  25. exp/codec_train_dac_large_v1.4_raw_fs16k/images/real_loss.png +0 -0
  26. exp/codec_train_dac_large_v1.4_raw_fs16k/images/reconstruct_loss.png +0 -0
  27. exp/codec_train_dac_large_v1.4_raw_fs16k/images/train_time.png +0 -0
  28. meta.yaml +8 -0
exp/codec_train_dac_large_v1.4_raw_fs16k/942epoch_test.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ebf0bb3ac4573154e41ecf4c35883d54d028d7f8efa0caa0b1a968456ff2c3
3
+ size 283100815
exp/codec_train_dac_large_v1.4_raw_fs16k/config.yaml ADDED
@@ -0,0 +1,270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_dac_large_v1.4.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: chunk
7
+ valid_iterator_type: null
8
+ output_dir: exp/codec_train_dac_large_v1.4_raw_fs16k
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 1
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: 4
16
+ dist_rank: 0
17
+ local_rank: 0
18
+ dist_master_addr: localhost
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: true
22
+ unused_parameters: true
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ gradient_as_bucket_view: true
27
+ ddp_comm_hook: null
28
+ cudnn_enabled: true
29
+ cudnn_benchmark: false
30
+ cudnn_deterministic: false
31
+ use_tf32: false
32
+ collect_stats: false
33
+ write_collected_feats: false
34
+ max_epoch: 3600
35
+ patience: null
36
+ val_scheduler_criterion:
37
+ - valid
38
+ - loss
39
+ early_stopping_criterion:
40
+ - valid
41
+ - loss
42
+ - min
43
+ best_model_criterion:
44
+ - - valid
45
+ - mel_loss
46
+ - min
47
+ - - train
48
+ - mel_loss
49
+ - min
50
+ - - train
51
+ - total_count
52
+ - max
53
+ keep_nbest_models: 5
54
+ nbest_averaging_interval: 0
55
+ grad_clip: -1
56
+ grad_clip_type: 2.0
57
+ grad_noise: false
58
+ accum_grad: 1
59
+ no_forward_run: false
60
+ resume: true
61
+ train_dtype: float32
62
+ use_amp: false
63
+ log_interval: 50
64
+ use_matplotlib: true
65
+ use_tensorboard: true
66
+ create_graph_in_tensorboard: false
67
+ use_wandb: false
68
+ wandb_project: null
69
+ wandb_id: null
70
+ wandb_entity: null
71
+ wandb_name: null
72
+ wandb_model_log_interval: -1
73
+ detect_anomaly: false
74
+ use_adapter: false
75
+ adapter: lora
76
+ save_strategy: all
77
+ adapter_conf: {}
78
+ pretrain_path: null
79
+ init_param: []
80
+ ignore_init_mismatch: false
81
+ freeze_param: []
82
+ num_iters_per_epoch: 500
83
+ batch_size: 64
84
+ valid_batch_size: null
85
+ batch_bins: 1000000
86
+ valid_batch_bins: null
87
+ category_sample_size: 10
88
+ train_shape_file:
89
+ - exp/codec_stats_raw/train/audio_shape
90
+ valid_shape_file:
91
+ - exp/codec_stats_raw/valid/audio_shape
92
+ batch_type: unsorted
93
+ valid_batch_type: null
94
+ fold_length:
95
+ - 256000
96
+ sort_in_batch: descending
97
+ shuffle_within_batch: false
98
+ sort_batch: descending
99
+ multiple_iterator: false
100
+ chunk_length: 32000
101
+ chunk_shift_ratio: 0.5
102
+ num_cache_chunks: 256
103
+ chunk_excluded_key_prefixes: []
104
+ chunk_default_fs: null
105
+ chunk_max_abs_length: null
106
+ chunk_discard_short_samples: true
107
+ train_data_path_and_name_and_type:
108
+ - - dump/raw/train/wav.scp
109
+ - audio
110
+ - sound
111
+ valid_data_path_and_name_and_type:
112
+ - - dump/raw/dev/wav.scp
113
+ - audio
114
+ - sound
115
+ multi_task_dataset: false
116
+ allow_variable_data_keys: false
117
+ max_cache_size: 0.0
118
+ max_cache_fd: 32
119
+ allow_multi_rates: false
120
+ valid_max_cache_size: null
121
+ exclude_weight_decay: false
122
+ exclude_weight_decay_conf: {}
123
+ optim: adamw
124
+ optim_conf:
125
+ lr: 0.0002
126
+ betas:
127
+ - 0.5
128
+ - 0.9
129
+ eps: 1.0e-09
130
+ weight_decay: 0.0
131
+ scheduler: exponentiallr
132
+ scheduler_conf:
133
+ gamma: 0.999875
134
+ optim2: adamw
135
+ optim2_conf:
136
+ lr: 0.0002
137
+ betas:
138
+ - 0.5
139
+ - 0.9
140
+ eps: 1.0e-09
141
+ weight_decay: 0.0
142
+ scheduler2: exponentiallr
143
+ scheduler2_conf:
144
+ gamma: 0.999875
145
+ generator_first: true
146
+ skip_discriminator_prob: 0.0
147
+ model_conf: {}
148
+ use_preprocessor: true
149
+ codec: dac
150
+ codec_conf:
151
+ sampling_rate: 16000
152
+ generator_params:
153
+ hidden_dim: 512
154
+ codebook_dim: 512
155
+ encdec_channels: 1
156
+ encdec_n_filters: 32
157
+ encdec_n_residual_layers: 3
158
+ encdec_ratios:
159
+ - 8
160
+ - 5
161
+ - 4
162
+ - 2
163
+ encdec_activation: Snake
164
+ encdec_norm: weight_norm
165
+ encdec_kernel_size: 7
166
+ encdec_residual_kernel_size: 7
167
+ encdec_last_kernel_size: 7
168
+ encdec_dilation_base: 2
169
+ encdec_causal: false
170
+ encdec_pad_mode: reflect
171
+ encdec_true_skip: false
172
+ encdec_compress: 2
173
+ encdec_lstm: 2
174
+ decoder_trim_right_ratio: 1.0
175
+ decoder_final_activation: null
176
+ decoder_final_activation_params: null
177
+ quantizer_n_q: 8
178
+ quantizer_bins: 1024
179
+ quantizer_decay: 0.99
180
+ quantizer_kmeans_init: true
181
+ quantizer_kmeans_iters: 50
182
+ quantizer_threshold_ema_dead_code: 2
183
+ quantizer_target_bandwidth:
184
+ - 0.5
185
+ - 1
186
+ - 2
187
+ - 4
188
+ quantizer_dropout: true
189
+ sample_rate: 16000
190
+ discriminator_params:
191
+ msmpmb_discriminator_params:
192
+ rates: []
193
+ sample_rate: 16000
194
+ fft_sizes:
195
+ - 2048
196
+ - 1024
197
+ - 512
198
+ periods:
199
+ - 2
200
+ - 3
201
+ - 5
202
+ - 7
203
+ - 11
204
+ period_discriminator_params:
205
+ in_channels: 1
206
+ out_channels: 1
207
+ kernel_sizes:
208
+ - 5
209
+ - 3
210
+ channels: 32
211
+ downsample_scales:
212
+ - 3
213
+ - 3
214
+ - 3
215
+ - 3
216
+ - 1
217
+ max_downsample_channels: 1024
218
+ bias: true
219
+ nonlinear_activation: LeakyReLU
220
+ nonlinear_activation_params:
221
+ negative_slope: 0.1
222
+ use_weight_norm: true
223
+ use_spectral_norm: false
224
+ band_discriminator_params:
225
+ hop_factor: 0.25
226
+ sample_rate: 24000
227
+ bands:
228
+ - - 0.0
229
+ - 0.1
230
+ - - 0.1
231
+ - 0.25
232
+ - - 0.25
233
+ - 0.5
234
+ - - 0.5
235
+ - 0.75
236
+ - - 0.75
237
+ - 1.0
238
+ channel: 32
239
+ generator_adv_loss_params:
240
+ average_by_discriminators: false
241
+ loss_type: mse
242
+ discriminator_adv_loss_params:
243
+ average_by_discriminators: false
244
+ loss_type: mse
245
+ use_feat_match_loss: true
246
+ feat_match_loss_params:
247
+ average_by_discriminators: false
248
+ average_by_layers: false
249
+ include_final_outputs: true
250
+ use_mel_loss: true
251
+ mel_loss_params:
252
+ range_start: 6
253
+ range_end: 11
254
+ window: hann
255
+ n_mels: 80
256
+ fmin: 0
257
+ fmax: null
258
+ log_base: null
259
+ fs: 16000
260
+ lambda_quantization: 0.25
261
+ lambda_commit: 1.0
262
+ lambda_reconstruct: 1.0
263
+ lambda_adv: 1.0
264
+ lambda_mel: 45.0
265
+ lambda_feat_match: 2.0
266
+ cache_generator_outputs: true
267
+ required:
268
+ - output_dir
269
+ version: '202412'
270
+ distributed: true
exp/codec_train_dac_large_v1.4_raw_fs16k/images/adv_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_commit_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/codec_quantization_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_backward_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_forward_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_optim_step_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/discriminator_train_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/fake_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/feat_match_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_backward_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_forward_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_optim_step_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/generator_train_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/gpu_max_cached_mem_GB.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/iter_time.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/mel_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/mel_loss_real.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/optim0_lr0.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/optim1_lr0.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/real_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/reconstruct_loss.png ADDED
exp/codec_train_dac_large_v1.4_raw_fs16k/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202412'
2
+ files:
3
+ model_file: exp/codec_train_dac_large_v1.4_raw_fs16k/942epoch_test.pth
4
+ python: "3.9.21 (main, Dec 11 2024, 16:24:11) \n[GCC 11.2.0]"
5
+ timestamp: 1743015824.723491
6
+ torch: 2.3.0
7
+ yaml_files:
8
+ train_config: exp/codec_train_dac_large_v1.4_raw_fs16k/config.yaml