output_dir: ./runs/train_libero_v2_512hdim_variant_decouple_2node/decouple_2node_b8_20260605_184739 batch_size: 8 num_workers: 4 prefetch_factor: 6 lr_scheduler_type: constant learning_rate: 0.0001 num_epochs: 10 max_steps: 80000 log_every: 10 save_every: 5000 state_keep_last_n: 1 weights_keep_last_n: 1000 long_term_save_every: 10000 long_term_save_start: 0 eval_every: 0 eval_num_inference_steps: 10 gradient_accumulation_steps: 1 mixed_precision: bf16 seed: 42 max_grad_norm: 1.0 weight_decay: 0.01 resume: null compile_mot: false optimizer_type: adamw8bit wandb: enabled: true workspace: null project: fastwam_ltx_decouple name: decouple_2node_b8_20260605_184739 group: null mode: online data: train: _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset dataset_dirs: - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_frames: 33 global_sample_stride: 1 action_video_freq_ratio: 4 video_size: - 224 - 448 camera_key: null val_set_proportion: 0 is_training_set: true skip_padding_as_possible: false concat_multi_camera: horizontal processor: _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_obs_steps: 33 num_output_cameras: 2 action_output_dim: 7 proprio_output_dim: 8 delta_action_dim_mask: default: - true - true - true - true - true - true - false action_state_transforms: null use_stepwise_action_norm: false norm_default_mode: min/max norm_exception_mode: null action_state_merger: _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign train_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 val_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 text_embedding_cache_dir: ./data/text_embeds_cache/libero text_cache_slug: ltx23_gemma3_12b_v2connector context_len: 128 joint_latent_cache_dir: ./data/joint_latents/libero_ratio4_nf33 val: _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset dataset_dirs: - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_frames: 33 global_sample_stride: 1 action_video_freq_ratio: 4 video_size: - 224 - 448 camera_key: null val_set_proportion: 0 is_training_set: false skip_padding_as_possible: false concat_multi_camera: horizontal processor: _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor shape_meta: images: - key: image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 - key: wrist_image raw_shape: - 3 - 512 - 512 shape: - 3 - 224 - 224 action: - key: default raw_shape: 7 shape: 7 state: - key: default raw_shape: 8 shape: 8 num_obs_steps: 33 num_output_cameras: 2 action_output_dim: 7 proprio_output_dim: 8 delta_action_dim_mask: default: - true - true - true - true - true - true - false action_state_transforms: null use_stepwise_action_norm: false norm_default_mode: min/max norm_exception_mode: null action_state_merger: _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign train_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 val_transforms: - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor - _target_: torchvision.transforms.Resize size: - 224 - 224 text_embedding_cache_dir: ./data/text_embeds_cache/libero text_cache_slug: ltx23_gemma3_12b_v2connector context_len: 128 model: _target_: fastwam.runtime.create_fastwam ckpt_path: checkpoints/Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors gemma_path: checkpoints/google/gemma-3-12b-it-qat-q4_0-unquantized load_text_encoder: false attach_gemma_to_text_encoder: false proprio_dim: 8 mot_checkpoint_mixed_attn: false action_dit_pretrained_path: checkpoints/preprocessed/ltx_action_dit_backbone.pt skip_dit_load_from_pretrain: false video_dit_config: text_dim: 4096 use_gradient_checkpointing: false action_dim: 7 action_dit_config: action_dim: 7 hidden_dim: 512 num_heads: 32 attn_head_dim: 128 num_layers: 48 text_dim: 4096 eps: 1.0e-06 cross_attention_adaln: false use_gradient_checkpointing: false video_scheduler: type: ltx2 min_shift: 0.95 max_shift: 2.05 min_tokens: 1024 max_tokens: 4096 infer_shift: 2.05 num_train_timesteps: 1000 train_shift: 5.0 sigma_floor: 0.0 action_scheduler: type: wan train_shift: 5.0 infer_shift: 5.0 num_train_timesteps: 1000 sigma_floor: 0.0 loss: lambda_video: 0.1 lambda_action: 1.0 action_only_train: false mot_attn_decouple_frac: 0.25 video_expert_lr: 1.0e-05 action_expert_lr: 0.0001