PyTorch
ssl-aasist
custom_code
ssl-aasist / fairseq /examples /data2vec /config /v2 /base_text_only_task.yaml
ash56's picture
Add files using upload-large-folder tool
b1b22fb verified
raw
history blame
2.13 kB
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
no_epoch_checkpoints: true
save_interval_updates: 50000
keep_interval_updates: 1
distributed_training:
distributed_world_size: 16
ddp_backend: legacy_ddp
task:
_name: masked_lm
data: /fsx-wav2vec/abaevski/data/nlp/bookwiki_aml-full-mmap2-bin
sample_break_mode: none
tokens_per_sample: 512
include_target_tokens: true
random_token_prob: 0
leave_unmasked_prob: 0
include_index: True
skip_masking: True
d2v2_multi: True
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
dataset:
batch_size: 4
ignore_unused_valid_subsets: true
skip_invalid_size_inputs_valid_test: true
disable_validation: true
optimization:
clip_norm: 1
lr: [0.0002]
max_update: 1000000
update_freq: [1]
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 0.0002
optimizer:
_name: adam
adam_betas: [0.9,0.98]
adam_eps: 1e-06
weight_decay: 0.01
lr_scheduler:
_name: cosine
warmup_updates: 4000
lr_scheduler: pass_through
model:
_name: data2vec_multi
loss_beta: 0
loss_scale: 1
depth: 12
embed_dim: 768
clone_batch: 8
ema_decay: 0.9999
ema_end_decay: 0.99999
ema_anneal_end_step: 100000
ema_encoder_only: true
average_top_k_layers: 12
layer_norm_target_layer: false
instance_norm_target_layer: true
batch_norm_target_layer: false
instance_norm_targets: false
layer_norm_targets: false
layerdrop: 0
norm_eps: 1e-5
supported_modality: TEXT
modalities:
text:
mask_prob: 0.48
mask_length: 1
mask_noise_std: 0.01
prenet_depth: 0
decoder:
input_dropout: 0.1
decoder_dim: 768
decoder_groups: 1
decoder_kernel: 9
decoder_layers: 5
decoder_residual: false
projection_layers: 2
projection_ratio: 2.0