upload config.yaml

acf49f8 verified 13 days ago

7.34 kB

	output_dir: ./runs/train_libero_v2_512hdim_variant_decouple_2node/decouple_2node_b8_20260605_184739
	batch_size: 8
	num_workers: 4
	prefetch_factor: 6
	lr_scheduler_type: constant
	learning_rate: 0.0001
	num_epochs: 10
	max_steps: 80000
	log_every: 10
	save_every: 5000
	state_keep_last_n: 1
	weights_keep_last_n: 1000
	long_term_save_every: 10000
	long_term_save_start: 0
	eval_every: 0
	eval_num_inference_steps: 10
	gradient_accumulation_steps: 1
	mixed_precision: bf16
	seed: 42
	max_grad_norm: 1.0
	weight_decay: 0.01
	resume: null
	compile_mot: false
	optimizer_type: adamw8bit
	wandb:
	enabled: true
	workspace: null
	project: fastwam_ltx_decouple
	name: decouple_2node_b8_20260605_184739
	group: null
	mode: online
	data:
	train:
	_target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
	dataset_dirs:
	- ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 7
	shape: 7
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_frames: 33
	global_sample_stride: 1
	action_video_freq_ratio: 4
	video_size:
	- 224
	- 448
	camera_key: null
	val_set_proportion: 0
	is_training_set: true
	skip_padding_as_possible: false
	concat_multi_camera: horizontal
	processor:
	_target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 7
	shape: 7
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_obs_steps: 33
	num_output_cameras: 2
	action_output_dim: 7
	proprio_output_dim: 8
	delta_action_dim_mask:
	default:
	- true
	- true
	- true
	- true
	- true
	- true
	- false
	action_state_transforms: null
	use_stepwise_action_norm: false
	norm_default_mode: min/max
	norm_exception_mode: null
	action_state_merger:
	_target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
	train_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	val_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	text_embedding_cache_dir: ./data/text_embeds_cache/libero
	text_cache_slug: ltx23_gemma3_12b_v2connector
	context_len: 128
	joint_latent_cache_dir: ./data/joint_latents/libero_ratio4_nf33
	val:
	_target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
	dataset_dirs:
	- ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
	- ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 7
	shape: 7
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_frames: 33
	global_sample_stride: 1
	action_video_freq_ratio: 4
	video_size:
	- 224
	- 448
	camera_key: null
	val_set_proportion: 0
	is_training_set: false
	skip_padding_as_possible: false
	concat_multi_camera: horizontal
	processor:
	_target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
	shape_meta:
	images:
	- key: image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	- key: wrist_image
	raw_shape:
	- 3
	- 512
	- 512
	shape:
	- 3
	- 224
	- 224
	action:
	- key: default
	raw_shape: 7
	shape: 7
	state:
	- key: default
	raw_shape: 8
	shape: 8
	num_obs_steps: 33
	num_output_cameras: 2
	action_output_dim: 7
	proprio_output_dim: 8
	delta_action_dim_mask:
	default:
	- true
	- true
	- true
	- true
	- true
	- true
	- false
	action_state_transforms: null
	use_stepwise_action_norm: false
	norm_default_mode: min/max
	norm_exception_mode: null
	action_state_merger:
	_target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
	train_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	val_transforms:
	- _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
	- _target_: torchvision.transforms.Resize
	size:
	- 224
	- 224
	text_embedding_cache_dir: ./data/text_embeds_cache/libero
	text_cache_slug: ltx23_gemma3_12b_v2connector
	context_len: 128
	model:
	_target_: fastwam.runtime.create_fastwam
	ckpt_path: checkpoints/Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors
	gemma_path: checkpoints/google/gemma-3-12b-it-qat-q4_0-unquantized
	load_text_encoder: false
	attach_gemma_to_text_encoder: false
	proprio_dim: 8
	mot_checkpoint_mixed_attn: false
	action_dit_pretrained_path: checkpoints/preprocessed/ltx_action_dit_backbone.pt
	skip_dit_load_from_pretrain: false
	video_dit_config:
	text_dim: 4096
	use_gradient_checkpointing: false
	action_dim: 7
	action_dit_config:
	action_dim: 7
	hidden_dim: 512
	num_heads: 32
	attn_head_dim: 128
	num_layers: 48
	text_dim: 4096
	eps: 1.0e-06
	cross_attention_adaln: false
	use_gradient_checkpointing: false
	video_scheduler:
	type: ltx2
	min_shift: 0.95
	max_shift: 2.05
	min_tokens: 1024
	max_tokens: 4096
	infer_shift: 2.05
	num_train_timesteps: 1000
	train_shift: 5.0
	sigma_floor: 0.0
	action_scheduler:
	type: wan
	train_shift: 5.0
	infer_shift: 5.0
	num_train_timesteps: 1000
	sigma_floor: 0.0
	loss:
	lambda_video: 0.1
	lambda_action: 1.0
	action_only_train: false
	mot_attn_decouple_frac: 0.25
	video_expert_lr: 1.0e-05
	action_expert_lr: 0.0001