uuuhjb
/

2nodebs8_decouple

Model card Files Files and versions

xet

Community

uuuhjb commited on 23 days ago

Commit

acf49f8

verified ·

1 Parent(s): 8472c4d

upload config.yaml

Browse files

Files changed (1) hide show

config.yaml +300 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,300 @@

+output_dir: ./runs/train_libero_v2_512hdim_variant_decouple_2node/decouple_2node_b8_20260605_184739
+batch_size: 8
+num_workers: 4
+prefetch_factor: 6
+lr_scheduler_type: constant
+learning_rate: 0.0001
+num_epochs: 10
+max_steps: 80000
+log_every: 10
+save_every: 5000
+state_keep_last_n: 1
+weights_keep_last_n: 1000
+long_term_save_every: 10000
+long_term_save_start: 0
+eval_every: 0
+eval_num_inference_steps: 10
+gradient_accumulation_steps: 1
+mixed_precision: bf16
+seed: 42
+max_grad_norm: 1.0
+weight_decay: 0.01
+resume: null
+compile_mot: false
+optimizer_type: adamw8bit
+wandb:
+  enabled: true
+  workspace: null
+  project: fastwam_ltx_decouple
+  name: decouple_2node_b8_20260605_184739
+  group: null
+  mode: online
+data:
+  train:
+    _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
+    dataset_dirs:
+    - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
+    shape_meta:
+      images:
+      - key: image
+        raw_shape:
+        - 3
+        - 512
+        - 512
+        shape:
+        - 3
+        - 224
+        - 224
+      - key: wrist_image
+        raw_shape:
+        - 3
+        - 512
+        - 512
+        shape:
+        - 3
+        - 224
+        - 224
+      action:
+      - key: default
+        raw_shape: 7
+        shape: 7
+      state:
+      - key: default
+        raw_shape: 8
+        shape: 8
+    num_frames: 33
+    global_sample_stride: 1
+    action_video_freq_ratio: 4
+    video_size:
+    - 224
+    - 448
+    camera_key: null
+    val_set_proportion: 0
+    is_training_set: true
+    skip_padding_as_possible: false
+    concat_multi_camera: horizontal
+    processor:
+      _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
+      shape_meta:
+        images:
+        - key: image
+          raw_shape:
+          - 3
+          - 512
+          - 512
+          shape:
+          - 3
+          - 224
+          - 224
+        - key: wrist_image
+          raw_shape:
+          - 3
+          - 512
+          - 512
+          shape:
+          - 3
+          - 224
+          - 224
+        action:
+        - key: default
+          raw_shape: 7
+          shape: 7
+        state:
+        - key: default
+          raw_shape: 8
+          shape: 8
+      num_obs_steps: 33
+      num_output_cameras: 2
+      action_output_dim: 7
+      proprio_output_dim: 8
+      delta_action_dim_mask:
+        default:
+        - true
+        - true
+        - true
+        - true
+        - true
+        - true
+        - false
+      action_state_transforms: null
+      use_stepwise_action_norm: false
+      norm_default_mode: min/max
+      norm_exception_mode: null
+      action_state_merger:
+        _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
+      train_transforms:
+      - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Resize
+        size:
+        - 224
+        - 224
+      val_transforms:
+      - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Resize
+        size:
+        - 224
+        - 224
+    text_embedding_cache_dir: ./data/text_embeds_cache/libero
+    text_cache_slug: ltx23_gemma3_12b_v2connector
+    context_len: 128
+    joint_latent_cache_dir: ./data/joint_latents/libero_ratio4_nf33
+  val:
+    _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
+    dataset_dirs:
+    - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
+    - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
+    shape_meta:
+      images:
+      - key: image
+        raw_shape:
+        - 3
+        - 512
+        - 512
+        shape:
+        - 3
+        - 224
+        - 224
+      - key: wrist_image
+        raw_shape:
+        - 3
+        - 512
+        - 512
+        shape:
+        - 3
+        - 224
+        - 224
+      action:
+      - key: default
+        raw_shape: 7
+        shape: 7
+      state:
+      - key: default
+        raw_shape: 8
+        shape: 8
+    num_frames: 33
+    global_sample_stride: 1
+    action_video_freq_ratio: 4
+    video_size:
+    - 224
+    - 448
+    camera_key: null
+    val_set_proportion: 0
+    is_training_set: false
+    skip_padding_as_possible: false
+    concat_multi_camera: horizontal
+    processor:
+      _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
+      shape_meta:
+        images:
+        - key: image
+          raw_shape:
+          - 3
+          - 512
+          - 512
+          shape:
+          - 3
+          - 224
+          - 224
+        - key: wrist_image
+          raw_shape:
+          - 3
+          - 512
+          - 512
+          shape:
+          - 3
+          - 224
+          - 224
+        action:
+        - key: default
+          raw_shape: 7
+          shape: 7
+        state:
+        - key: default
+          raw_shape: 8
+          shape: 8
+      num_obs_steps: 33
+      num_output_cameras: 2
+      action_output_dim: 7
+      proprio_output_dim: 8
+      delta_action_dim_mask:
+        default:
+        - true
+        - true
+        - true
+        - true
+        - true
+        - true
+        - false
+      action_state_transforms: null
+      use_stepwise_action_norm: false
+      norm_default_mode: min/max
+      norm_exception_mode: null
+      action_state_merger:
+        _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
+      train_transforms:
+      - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Resize
+        size:
+        - 224
+        - 224
+      val_transforms:
+      - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
+      - _target_: torchvision.transforms.Resize
+        size:
+        - 224
+        - 224
+    text_embedding_cache_dir: ./data/text_embeds_cache/libero
+    text_cache_slug: ltx23_gemma3_12b_v2connector
+    context_len: 128
+model:
+  _target_: fastwam.runtime.create_fastwam
+  ckpt_path: checkpoints/Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors
+  gemma_path: checkpoints/google/gemma-3-12b-it-qat-q4_0-unquantized
+  load_text_encoder: false
+  attach_gemma_to_text_encoder: false
+  proprio_dim: 8
+  mot_checkpoint_mixed_attn: false
+  action_dit_pretrained_path: checkpoints/preprocessed/ltx_action_dit_backbone.pt
+  skip_dit_load_from_pretrain: false
+  video_dit_config:
+    text_dim: 4096
+    use_gradient_checkpointing: false
+    action_dim: 7
+  action_dit_config:
+    action_dim: 7
+    hidden_dim: 512
+    num_heads: 32
+    attn_head_dim: 128
+    num_layers: 48
+    text_dim: 4096
+    eps: 1.0e-06
+    cross_attention_adaln: false
+    use_gradient_checkpointing: false
+  video_scheduler:
+    type: ltx2
+    min_shift: 0.95
+    max_shift: 2.05
+    min_tokens: 1024
+    max_tokens: 4096
+    infer_shift: 2.05
+    num_train_timesteps: 1000
+    train_shift: 5.0
+    sigma_floor: 0.0
+  action_scheduler:
+    type: wan
+    train_shift: 5.0
+    infer_shift: 5.0
+    num_train_timesteps: 1000
+    sigma_floor: 0.0
+  loss:
+    lambda_video: 0.1
+    lambda_action: 1.0
+  action_only_train: false
+  mot_attn_decouple_frac: 0.25
+video_expert_lr: 1.0e-05
+action_expert_lr: 0.0001