# @package _group_ common: fp16: true log_format: json log_interval: 200 tensorboard_logdir: tb min_loss_scale: 1e-6 user_dir: /private/home/abaevski/fairseq-py/examples/data2vec checkpoint: save_interval: 1 save_interval_updates: 25000 keep_interval_updates: 1 no_epoch_checkpoints: true task: _name: audio_pretraining data: /private/home/abaevski/data/audioset max_sample_size: 320000 min_sample_size: 32000 normalize: true dataset: num_workers: 6 max_tokens: 3400000 skip_invalid_size_inputs_valid_test: true validate_interval: 5 required_batch_size_multiple: 1 disable_validation: true distributed_training: distributed_world_size: 24 ddp_backend: legacy_ddp criterion: _name: model log_keys: - ema_decay - target_var - pred_var # - avg_self_attn # - weights optimization: max_update: 200000 lr: [0.0005] optimizer: _name: adam adam_betas: (0.9,0.98) adam_eps: 1e-06 weight_decay: 0.01 lr_scheduler: _name: cosine warmup_updates: 10000 model: _name: data2vec_audio extractor_mode: layer_norm encoder_layerdrop: 0.05 dropout_input: 0.0 dropout_features: 0.0 feature_grad_mult: 1.0 encoder_embed_dim: 768 mask_prob: 0.65 mask_length: 10 loss_beta: 0 loss_scale: null instance_norm_target_layer: true layer_norm_targets: true average_top_k_layers: 12 self_attn_norm_type: deepnorm final_norm_type: deepnorm pos_conv_depth: 5 conv_pos: 95 ema_decay: 0.999 ema_end_decay: 0.9999 ema_anneal_end_step: 30000 ema_transformer_only: true ema_layers_only: false require_same_masks: true mask_dropout: 0