dataset: video_processor: CrossTaskVideoProcessor bert_name: bert-base-uncased meta_processor: CrossTaskMetaProcessor train_path: data/crosstask/crosstask_release/videos.csv train_csv_path: data/crosstask/crosstask_release/videos.csv val_path: data/crosstask/crosstask_release/videos_val.csv val_csv_path: data/crosstask/crosstask_release/videos_val.csv primary_path: data/crosstask/crosstask_release/tasks_primary.txt related_path: data/crosstask/crosstask_release/tasks_related.txt vfeat_dir: data/feat/feat_crosstask_s3d annotation_path: data/crosstask/crosstask_release/annotations n_train: 30 text_processor: CrossTaskTextProcessor aligner: CrossTaskAligner num_iso_layer: 12 sliding_window: 16 sliding_window_size: 32 max_video_len: 32 max_len: 96 fairseq: common: tensorboard_logdir: run log_interval: 1000 fp16: true dataset: num_workers: 4 batch_size: 1 optimization: lr: - 5.0e-05 clip_norm: 2.0 optimizer: adam adam_betas: (0.9, 0.98) lr_scheduler: polynomial_decay total_num_update: 1000000 warmup_updates: 122 weight_decay: 0.0 ddp_backend: no_c10d max_epoch: 5 checkpoint: restore_file: runs/mtm/vlm/checkpoint11.pt reset_optimizer: true reset_dataloader: true reset_meters: true save_dir: runs/mtm/vlm/crosstask task_type: sweep_small model: model_cls: MMFusionActionLocalization mm_encoder_cls: MMBertForJoint use_seg_emb: true loss: loss_cls: BCE