2nodebs8_decouple / config.yaml
uuuhjb's picture
upload config.yaml
acf49f8 verified
Raw
History Blame Contribute Delete
7.34 kB
output_dir: ./runs/train_libero_v2_512hdim_variant_decouple_2node/decouple_2node_b8_20260605_184739
batch_size: 8
num_workers: 4
prefetch_factor: 6
lr_scheduler_type: constant
learning_rate: 0.0001
num_epochs: 10
max_steps: 80000
log_every: 10
save_every: 5000
state_keep_last_n: 1
weights_keep_last_n: 1000
long_term_save_every: 10000
long_term_save_start: 0
eval_every: 0
eval_num_inference_steps: 10
gradient_accumulation_steps: 1
mixed_precision: bf16
seed: 42
max_grad_norm: 1.0
weight_decay: 0.01
resume: null
compile_mot: false
optimizer_type: adamw8bit
wandb:
enabled: true
workspace: null
project: fastwam_ltx_decouple
name: decouple_2node_b8_20260605_184739
group: null
mode: online
data:
train:
_target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
dataset_dirs:
- ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
shape_meta:
images:
- key: image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
- key: wrist_image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
action:
- key: default
raw_shape: 7
shape: 7
state:
- key: default
raw_shape: 8
shape: 8
num_frames: 33
global_sample_stride: 1
action_video_freq_ratio: 4
video_size:
- 224
- 448
camera_key: null
val_set_proportion: 0
is_training_set: true
skip_padding_as_possible: false
concat_multi_camera: horizontal
processor:
_target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
shape_meta:
images:
- key: image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
- key: wrist_image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
action:
- key: default
raw_shape: 7
shape: 7
state:
- key: default
raw_shape: 8
shape: 8
num_obs_steps: 33
num_output_cameras: 2
action_output_dim: 7
proprio_output_dim: 8
delta_action_dim_mask:
default:
- true
- true
- true
- true
- true
- true
- false
action_state_transforms: null
use_stepwise_action_norm: false
norm_default_mode: min/max
norm_exception_mode: null
action_state_merger:
_target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
train_transforms:
- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
- _target_: torchvision.transforms.Resize
size:
- 224
- 224
val_transforms:
- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
- _target_: torchvision.transforms.Resize
size:
- 224
- 224
text_embedding_cache_dir: ./data/text_embeds_cache/libero
text_cache_slug: ltx23_gemma3_12b_v2connector
context_len: 128
joint_latent_cache_dir: ./data/joint_latents/libero_ratio4_nf33
val:
_target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
dataset_dirs:
- ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
- ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
shape_meta:
images:
- key: image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
- key: wrist_image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
action:
- key: default
raw_shape: 7
shape: 7
state:
- key: default
raw_shape: 8
shape: 8
num_frames: 33
global_sample_stride: 1
action_video_freq_ratio: 4
video_size:
- 224
- 448
camera_key: null
val_set_proportion: 0
is_training_set: false
skip_padding_as_possible: false
concat_multi_camera: horizontal
processor:
_target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
shape_meta:
images:
- key: image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
- key: wrist_image
raw_shape:
- 3
- 512
- 512
shape:
- 3
- 224
- 224
action:
- key: default
raw_shape: 7
shape: 7
state:
- key: default
raw_shape: 8
shape: 8
num_obs_steps: 33
num_output_cameras: 2
action_output_dim: 7
proprio_output_dim: 8
delta_action_dim_mask:
default:
- true
- true
- true
- true
- true
- true
- false
action_state_transforms: null
use_stepwise_action_norm: false
norm_default_mode: min/max
norm_exception_mode: null
action_state_merger:
_target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
train_transforms:
- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
- _target_: torchvision.transforms.Resize
size:
- 224
- 224
val_transforms:
- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
- _target_: torchvision.transforms.Resize
size:
- 224
- 224
text_embedding_cache_dir: ./data/text_embeds_cache/libero
text_cache_slug: ltx23_gemma3_12b_v2connector
context_len: 128
model:
_target_: fastwam.runtime.create_fastwam
ckpt_path: checkpoints/Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors
gemma_path: checkpoints/google/gemma-3-12b-it-qat-q4_0-unquantized
load_text_encoder: false
attach_gemma_to_text_encoder: false
proprio_dim: 8
mot_checkpoint_mixed_attn: false
action_dit_pretrained_path: checkpoints/preprocessed/ltx_action_dit_backbone.pt
skip_dit_load_from_pretrain: false
video_dit_config:
text_dim: 4096
use_gradient_checkpointing: false
action_dim: 7
action_dit_config:
action_dim: 7
hidden_dim: 512
num_heads: 32
attn_head_dim: 128
num_layers: 48
text_dim: 4096
eps: 1.0e-06
cross_attention_adaln: false
use_gradient_checkpointing: false
video_scheduler:
type: ltx2
min_shift: 0.95
max_shift: 2.05
min_tokens: 1024
max_tokens: 4096
infer_shift: 2.05
num_train_timesteps: 1000
train_shift: 5.0
sigma_floor: 0.0
action_scheduler:
type: wan
train_shift: 5.0
infer_shift: 5.0
num_train_timesteps: 1000
sigma_floor: 0.0
loss:
lambda_video: 0.1
lambda_action: 1.0
action_only_train: false
mot_attn_decouple_frac: 0.25
video_expert_lr: 1.0e-05
action_expert_lr: 0.0001