PyTorch
ssl-aasist
custom_code
ssl-aasist / fairseq /examples /data2vec /config /v2 /base_images_only_task.yaml
ash56's picture
Add files using upload-large-folder tool
b1b22fb verified
raw
history blame
2.21 kB
# @package _group_
common:
fp16: true
log_format: json
log_interval: 200
tensorboard_logdir: tb
min_loss_scale: 1e-6
fp16_no_flatten_grads: true
user_dir: ${env:PWD}/examples/data2vec
checkpoint:
save_interval: 5
save_interval_updates: 25000
keep_interval_updates: 1
no_epoch_checkpoints: true
task:
_name: mae_image_pretraining
data: /datasets01/imagenet_full_size/061417/
rebuild_batches: true
local_cache_path: /scratch/cache_abaevski/imagenet
key: source
precompute_mask_config: {}
dataset:
num_workers: 10
batch_size: 16
skip_invalid_size_inputs_valid_test: true
required_batch_size_multiple: 1
disable_validation: true
distributed_training:
distributed_world_size: 16
ddp_backend: c10d
criterion:
_name: model
log_keys:
- ema_decay
- target_var
- pred_var
- model_norm
- ema_norm
- masked_pct
optimization:
max_update: 375300
lr: [ 0.001 ]
debug_param_names: true
clip_norm: 4
optimizer:
_name: composite
dynamic_groups: true
groups:
default:
lr_float: 1e-3
optimizer:
_name: adam
adam_betas: [0.9,0.95]
weight_decay: 0.05
lr_scheduler:
_name: cosine
warmup_updates: 50040
lr_scheduler: pass_through
model:
_name: data2vec_multi
ema_decay: 0.9998
ema_end_decay: 0.99999
ema_anneal_end_step: 100000
instance_norm_target_layer: true
layer_norm_target_layer: false
layer_norm_targets: true
end_of_block_targets: false
depth: 10
average_top_k_layers: 10
clone_batch: 16
norm_eps: 1e-6
min_target_var: 0
min_pred_var: 0
encoder_dropout: 0
post_mlp_drop: 0
attention_dropout: 0
activation_dropout: 0
supported_modality: IMAGE
cls_loss: 0.01
ema_encoder_only: false
modalities:
image:
inverse_mask: true
mask_prob: 0.8
mask_prob_adjust: 0.07
mask_length: 3
mask_noise_std: 0.01
prenet_depth: 2
ema_local_encoder: true
num_extra_tokens: 1
init_extra_token_zero: false
use_alibi_encoder: false
decoder:
decoder_dim: 768
decoder_groups: 16
decoder_kernel: 3
decoder_layers: 6
input_dropout: 0