uuuhjb commited on
Commit
acf49f8
·
verified ·
1 Parent(s): 8472c4d

upload config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +300 -0
config.yaml ADDED
@@ -0,0 +1,300 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ output_dir: ./runs/train_libero_v2_512hdim_variant_decouple_2node/decouple_2node_b8_20260605_184739
2
+ batch_size: 8
3
+ num_workers: 4
4
+ prefetch_factor: 6
5
+ lr_scheduler_type: constant
6
+ learning_rate: 0.0001
7
+ num_epochs: 10
8
+ max_steps: 80000
9
+ log_every: 10
10
+ save_every: 5000
11
+ state_keep_last_n: 1
12
+ weights_keep_last_n: 1000
13
+ long_term_save_every: 10000
14
+ long_term_save_start: 0
15
+ eval_every: 0
16
+ eval_num_inference_steps: 10
17
+ gradient_accumulation_steps: 1
18
+ mixed_precision: bf16
19
+ seed: 42
20
+ max_grad_norm: 1.0
21
+ weight_decay: 0.01
22
+ resume: null
23
+ compile_mot: false
24
+ optimizer_type: adamw8bit
25
+ wandb:
26
+ enabled: true
27
+ workspace: null
28
+ project: fastwam_ltx_decouple
29
+ name: decouple_2node_b8_20260605_184739
30
+ group: null
31
+ mode: online
32
+ data:
33
+ train:
34
+ _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
35
+ dataset_dirs:
36
+ - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
37
+ - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
38
+ - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
39
+ - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
40
+ shape_meta:
41
+ images:
42
+ - key: image
43
+ raw_shape:
44
+ - 3
45
+ - 512
46
+ - 512
47
+ shape:
48
+ - 3
49
+ - 224
50
+ - 224
51
+ - key: wrist_image
52
+ raw_shape:
53
+ - 3
54
+ - 512
55
+ - 512
56
+ shape:
57
+ - 3
58
+ - 224
59
+ - 224
60
+ action:
61
+ - key: default
62
+ raw_shape: 7
63
+ shape: 7
64
+ state:
65
+ - key: default
66
+ raw_shape: 8
67
+ shape: 8
68
+ num_frames: 33
69
+ global_sample_stride: 1
70
+ action_video_freq_ratio: 4
71
+ video_size:
72
+ - 224
73
+ - 448
74
+ camera_key: null
75
+ val_set_proportion: 0
76
+ is_training_set: true
77
+ skip_padding_as_possible: false
78
+ concat_multi_camera: horizontal
79
+ processor:
80
+ _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
81
+ shape_meta:
82
+ images:
83
+ - key: image
84
+ raw_shape:
85
+ - 3
86
+ - 512
87
+ - 512
88
+ shape:
89
+ - 3
90
+ - 224
91
+ - 224
92
+ - key: wrist_image
93
+ raw_shape:
94
+ - 3
95
+ - 512
96
+ - 512
97
+ shape:
98
+ - 3
99
+ - 224
100
+ - 224
101
+ action:
102
+ - key: default
103
+ raw_shape: 7
104
+ shape: 7
105
+ state:
106
+ - key: default
107
+ raw_shape: 8
108
+ shape: 8
109
+ num_obs_steps: 33
110
+ num_output_cameras: 2
111
+ action_output_dim: 7
112
+ proprio_output_dim: 8
113
+ delta_action_dim_mask:
114
+ default:
115
+ - true
116
+ - true
117
+ - true
118
+ - true
119
+ - true
120
+ - true
121
+ - false
122
+ action_state_transforms: null
123
+ use_stepwise_action_norm: false
124
+ norm_default_mode: min/max
125
+ norm_exception_mode: null
126
+ action_state_merger:
127
+ _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
128
+ train_transforms:
129
+ - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
130
+ - _target_: torchvision.transforms.Resize
131
+ size:
132
+ - 224
133
+ - 224
134
+ val_transforms:
135
+ - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
136
+ - _target_: torchvision.transforms.Resize
137
+ size:
138
+ - 224
139
+ - 224
140
+ text_embedding_cache_dir: ./data/text_embeds_cache/libero
141
+ text_cache_slug: ltx23_gemma3_12b_v2connector
142
+ context_len: 128
143
+ joint_latent_cache_dir: ./data/joint_latents/libero_ratio4_nf33
144
+ val:
145
+ _target_: fastwam.datasets.lerobot.robot_video_dataset.RobotVideoDataset
146
+ dataset_dirs:
147
+ - ./data/LIBERO-fastwam/libero_spatial_no_noops_lerobot
148
+ - ./data/LIBERO-fastwam/libero_object_no_noops_lerobot
149
+ - ./data/LIBERO-fastwam/libero_goal_no_noops_lerobot
150
+ - ./data/LIBERO-fastwam/libero_10_no_noops_lerobot
151
+ shape_meta:
152
+ images:
153
+ - key: image
154
+ raw_shape:
155
+ - 3
156
+ - 512
157
+ - 512
158
+ shape:
159
+ - 3
160
+ - 224
161
+ - 224
162
+ - key: wrist_image
163
+ raw_shape:
164
+ - 3
165
+ - 512
166
+ - 512
167
+ shape:
168
+ - 3
169
+ - 224
170
+ - 224
171
+ action:
172
+ - key: default
173
+ raw_shape: 7
174
+ shape: 7
175
+ state:
176
+ - key: default
177
+ raw_shape: 8
178
+ shape: 8
179
+ num_frames: 33
180
+ global_sample_stride: 1
181
+ action_video_freq_ratio: 4
182
+ video_size:
183
+ - 224
184
+ - 448
185
+ camera_key: null
186
+ val_set_proportion: 0
187
+ is_training_set: false
188
+ skip_padding_as_possible: false
189
+ concat_multi_camera: horizontal
190
+ processor:
191
+ _target_: fastwam.datasets.lerobot.processors.fastwam_processor.FastWAMProcessor
192
+ shape_meta:
193
+ images:
194
+ - key: image
195
+ raw_shape:
196
+ - 3
197
+ - 512
198
+ - 512
199
+ shape:
200
+ - 3
201
+ - 224
202
+ - 224
203
+ - key: wrist_image
204
+ raw_shape:
205
+ - 3
206
+ - 512
207
+ - 512
208
+ shape:
209
+ - 3
210
+ - 224
211
+ - 224
212
+ action:
213
+ - key: default
214
+ raw_shape: 7
215
+ shape: 7
216
+ state:
217
+ - key: default
218
+ raw_shape: 8
219
+ shape: 8
220
+ num_obs_steps: 33
221
+ num_output_cameras: 2
222
+ action_output_dim: 7
223
+ proprio_output_dim: 8
224
+ delta_action_dim_mask:
225
+ default:
226
+ - true
227
+ - true
228
+ - true
229
+ - true
230
+ - true
231
+ - true
232
+ - false
233
+ action_state_transforms: null
234
+ use_stepwise_action_norm: false
235
+ norm_default_mode: min/max
236
+ norm_exception_mode: null
237
+ action_state_merger:
238
+ _target_: fastwam.datasets.lerobot.transforms.action_state_merger.ConcatLeftAlign
239
+ train_transforms:
240
+ - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
241
+ - _target_: torchvision.transforms.Resize
242
+ size:
243
+ - 224
244
+ - 224
245
+ val_transforms:
246
+ - _target_: fastwam.datasets.lerobot.transforms.image.ToTensor
247
+ - _target_: torchvision.transforms.Resize
248
+ size:
249
+ - 224
250
+ - 224
251
+ text_embedding_cache_dir: ./data/text_embeds_cache/libero
252
+ text_cache_slug: ltx23_gemma3_12b_v2connector
253
+ context_len: 128
254
+ model:
255
+ _target_: fastwam.runtime.create_fastwam
256
+ ckpt_path: checkpoints/Lightricks/LTX-2.3/ltx-2.3-22b-dev.safetensors
257
+ gemma_path: checkpoints/google/gemma-3-12b-it-qat-q4_0-unquantized
258
+ load_text_encoder: false
259
+ attach_gemma_to_text_encoder: false
260
+ proprio_dim: 8
261
+ mot_checkpoint_mixed_attn: false
262
+ action_dit_pretrained_path: checkpoints/preprocessed/ltx_action_dit_backbone.pt
263
+ skip_dit_load_from_pretrain: false
264
+ video_dit_config:
265
+ text_dim: 4096
266
+ use_gradient_checkpointing: false
267
+ action_dim: 7
268
+ action_dit_config:
269
+ action_dim: 7
270
+ hidden_dim: 512
271
+ num_heads: 32
272
+ attn_head_dim: 128
273
+ num_layers: 48
274
+ text_dim: 4096
275
+ eps: 1.0e-06
276
+ cross_attention_adaln: false
277
+ use_gradient_checkpointing: false
278
+ video_scheduler:
279
+ type: ltx2
280
+ min_shift: 0.95
281
+ max_shift: 2.05
282
+ min_tokens: 1024
283
+ max_tokens: 4096
284
+ infer_shift: 2.05
285
+ num_train_timesteps: 1000
286
+ train_shift: 5.0
287
+ sigma_floor: 0.0
288
+ action_scheduler:
289
+ type: wan
290
+ train_shift: 5.0
291
+ infer_shift: 5.0
292
+ num_train_timesteps: 1000
293
+ sigma_floor: 0.0
294
+ loss:
295
+ lambda_video: 0.1
296
+ lambda_action: 1.0
297
+ action_only_train: false
298
+ mot_attn_decouple_frac: 0.25
299
+ video_expert_lr: 1.0e-05
300
+ action_expert_lr: 0.0001