slurm_config: big | |
task_type: local_predict | |
dataset: | |
split: test | |
video_processor: YoucookVideoProcessor | |
aligner: DSNLGAligner | |
bert_name: bert-base-uncased | |
meta_processor: YoucookNLGMetaProcessor | |
test_path: data/youcook/val_list.txt | |
trainval_annotation: data/youcook/youcookii_annotations_trainval.json | |
vfeat_dir: data/feat/feat_youcook_s3d | |
text_processor: NLGTextProcessor | |
max_video_len: 32 | |
max_len: 96 | |
fairseq: | |
dataset: | |
batch_size: 256 | |
valid_subset: test | |
num_workers: 2 | |
common_eval: | |
path: runs/mtm/vlm/youcookcap/checkpoint_best.pt | |
model: | |
model_cls: MMFusionNLG | |
mm_encoder_cls: MMBertForNLG | |
max_decode_length: 24 | |
use_seg_emb: true | |
eval: | |
save_path: runs/mtm/vlm/youcookcap/eval | |
metric: NLGMetric | |
predictor: NLGPredictor | |
gen_param: | |
num_beams: 5 | |