|
defaults: |
|
- df_base |
|
|
|
n_frames: ${dataset.n_frames} |
|
frame_skip: ${dataset.frame_skip} |
|
metadata: ${dataset.metadata} |
|
|
|
|
|
weight_decay: 2e-3 |
|
warmup_steps: 10000 |
|
optimizer_beta: [0.9, 0.99] |
|
action_cond_dim: 25 |
|
|
|
diffusion: |
|
|
|
beta_schedule: sigmoid |
|
objective: pred_v |
|
use_fused_snr: True |
|
cum_snr_decay: 0.96 |
|
clip_noise: 20. |
|
|
|
sampling_timesteps: 20 |
|
ddim_sampling_eta: 0.0 |
|
stabilization_level: 15 |
|
|
|
architecture: |
|
network_size: 64 |
|
attn_heads: 4 |
|
attn_dim_head: 64 |
|
dim_mults: [1, 2, 4, 8] |
|
resolution: ${dataset.resolution} |
|
attn_resolutions: [16, 32, 64, 128] |
|
use_init_temporal_attn: True |
|
use_linear_attn: True |
|
time_emb_type: rotary |
|
|
|
metrics: |
|
|
|
|
|
|
|
|
|
_name: df_video_worldmemminecraft |