# @package _global_ | |
hydra: | |
job: | |
config: | |
override_dirname: | |
kv_sep: ':' | |
item_sep: '/' | |
exclude_keys: | |
- run_config | |
- distributed_training.distributed_port | |
- distributed_training.distributed_world_size | |
- model.pretrained_model_path | |
- model.target_network_path | |
- next_script | |
- task.cache_in_scratch | |
- task.data | |
- checkpoint.save_interval_updates | |
- checkpoint.keep_interval_updates | |
- checkpoint.save_on_overflow | |
- common.log_interval | |
sweep: | |
dir: /checkpoint/${env:USER}/${env:PREFIX}/${hydra.job.config_name}_${hydra.launcher.gpus_per_node}/${hydra.job.override_dirname} | |
subdir: '' | |
launcher: | |
submitit_folder: ${hydra.sweep.dir} | |
timeout_min: 4320 | |
cpus_per_task: 80 | |
gpus_per_node: 8 | |
tasks_per_node: 1 | |
mem_gb: 450 | |
nodes: 3 | |
name: ${env:PREFIX}_${hydra.job.config_name} | |
partition: devlab,learnlab,learnfair,scavenge | |
constraint: volta32gb,ib4 | |
max_num_timeout: 30 | |