n_tokens: 3 pose_cond_dim: 5 use_plucker: true focal_length: 0.35 customized_validation: true condition_similar_length: 8 log_video: true relative_embedding: true cond_only_on_qk: true add_pose_embed: false use_domain_adapter: false use_reference_attention: true add_frame_timestep_embedder: true is_interactive: true diffusion: sampling_timesteps: 20 beta_schedule: sigmoid objective: pred_v use_fused_snr: True cum_snr_decay: 0.96 clip_noise: 20. ddim_sampling_eta: 0.0 stabilization_level: 15 schedule_fn_kwargs: {} use_snr: False use_cum_snr: False snr_clip: 5.0 timesteps: 1000 # architecture architecture: network_size: 64 attn_heads: 4 attn_dim_head: 64 dim_mults: [1, 2, 4, 8] resolution: ${dataset.resolution} attn_resolutions: [16, 32, 64, 128] use_init_temporal_attn: True use_linear_attn: True time_emb_type: rotary weight_decay: 2e-3 warmup_steps: 10000 optimizer_beta: [0.9, 0.99] action_cond_dim: 25 n_frames: 8 frame_skip: 1 frame_stack: 1 uncertainty_scale: 1 guidance_scale: 0.0 chunk_size: 1 # -1 for full trajectory diffusion, number to specify diffusion chunk size scheduling_matrix: autoregressive noise_level: random_all causal: True x_shape: [3, 360, 640] context_frames: 1 diffusion_path: yslan/worldmem_checkpoints/diffusion_only.ckpt vae_path: yslan/worldmem_checkpoints/vae_only.ckpt pose_predictor_path: yslan/worldmem_checkpoints/pose_prediction_model_only.ckpt