slurm_config: big task_type: local_predict dataset: split: test video_processor: VideoProcessor aligner: DSAligner bert_name: bert-base-uncased meta_processor: MSRVTTMetaProcessor test_path: data/msrvtt/MSRVTT_JSFUSION_test.csv vfeat_dir: data/feat/feat_vtt_s3d text_processor: MSRVTTTextProcessor num_iso_layer: 12 max_video_len: 32 max_len: 96 fairseq: dataset: batch_size: 256 valid_subset: test num_workers: 2 common_eval: path: runs/mtm/vlm/vtt/checkpoint_last.pt model: model_cls: MMFusionJoint mm_encoder_cls: MMBertForJoint use_seg_emb: true eval: save_path: runs/mtm/vlm/vtt/eval metric: RetrievalMetric predictor: RetrievalPredictor