dataset: video_processor: VideoProcessor bert_name: bert-base-uncased meta_processor: COINActionSegmentationMetaProcessor train_path: data/coin/COIN.json val_path: data/coin/COIN.json vfeat_dir: data/feat/feat_coin_s3d text_processor: COINActionSegmentationTextProcessor aligner: COINActionSegmentationAligner num_iso_layer: 12 sliding_window: 8 sliding_window_size: 32 max_video_len: 32 max_len: 96 fairseq: common: tensorboard_logdir: run log_interval: 1000 fp16: true dataset: num_workers: 4 batch_size: 1 optimization: lr: - 5.0e-05 clip_norm: 2.0 optimizer: adam adam_betas: (0.9, 0.98) lr_scheduler: polynomial_decay total_num_update: 1000000 warmup_updates: 122 weight_decay: 0.0 ddp_backend: no_c10d max_epoch: 8 checkpoint: restore_file: runs/mtm/vlm/checkpoint_best.pt reset_optimizer: true reset_dataloader: true reset_meters: true save_dir: runs/mtm/vlm/coin task_type: sweep_big model: model_cls: MMFusionActionSegmentation mm_encoder_cls: MMBertForTokenClassification use_seg_emb: true loss: loss_cls: CrossEntropy