dataset: | |
video_processor: VideoProcessor | |
bert_name: bert-base-uncased | |
meta_processor: MSRVTTMetaProcessor | |
train_path: data/msrvtt/MSRVTT_train.csv | |
dup: 20 | |
val_path: data/msrvtt/MSRVTT_JSFUSION_test.csv | |
vfeat_dir: data/feat/feat_vtt_s3d | |
text_processor: MSRVTTTextProcessor | |
json_path: data/msrvtt/MSRVTT_data.json | |
aligner: DSAligner | |
num_iso_layer: 12 | |
max_video_len: 32 | |
max_len: 96 | |
fairseq: | |
common: | |
tensorboard_logdir: run | |
log_interval: 1000 | |
fp16: true | |
dataset: | |
num_workers: 4 | |
batch_size: 128 | |
optimization: | |
lr: | |
- 5.0e-05 | |
clip_norm: 2.0 | |
optimizer: adam | |
adam_betas: (0.9, 0.98) | |
lr_scheduler: polynomial_decay | |
total_num_update: 1000000 | |
warmup_updates: 122 | |
weight_decay: 0.0 | |
ddp_backend: no_c10d | |
max_epoch: 5 | |
checkpoint: | |
restore_file: runs/mtm/vlm/checkpoint_best.pt | |
reset_optimizer: true | |
reset_dataloader: true | |
reset_meters: true | |
save_dir: runs/mtm/vlm/vttqa | |
task_type: sweep_small | |
model: | |
model_cls: MMFusionJoint | |
mm_encoder_cls: MMBertForJoint | |
use_seg_emb: true | |
loss: | |
loss_cls: V2TContraLoss | |