{ "type": "molmoact2", "n_obs_steps": 1, "input_features": { "observation.state": { "type": "STATE", "shape": [ 6 ] }, "observation.images.front": { "type": "VISUAL", "shape": [ 3, 480, 640 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 6 ] } }, "device": "cuda", "use_amp": false, "use_peft": false, "push_to_hub": true, "repo_id": "ETHrobotlearning/molmoact2-task3-TOY-clean-step2000", "private": null, "tags": null, "license": null, "pretrained_path": null, "checkpoint_path": "allenai/MolmoAct2-SO100_101", "checkpoint_revision": null, "checkpoint_force_download": false, "trust_remote_code": true, "chunk_size": 10, "n_action_steps": 10, "action_mode": "continuous", "inference_action_mode": null, "discrete_action_tokenizer": "allenai/MolmoAct2-FAST-Tokenizer", "discrete_generation_max_steps": null, "norm_tag": null, "setup_type": "single SO-100/SO-101 arm with one front RGB camera", "control_mode": "absolute joint pose", "image_keys": [ "observation.images.front" ], "normalize_language": true, "add_setup_tokens": true, "add_control_tokens": true, "normalize_gripper": false, "num_state_tokens": 256, "max_sequence_length": null, "expected_max_action_dim": 32, "num_flow_timesteps": 8, "flow_matching_cutoff": 1.0, "flow_matching_time_offset": 0.001, "flow_matching_time_scale": 0.999, "flow_matching_beta_alpha": 1.0, "flow_matching_beta_beta": 1.5, "num_inference_steps": null, "mask_action_dim_padding": true, "enable_inference_cuda_graph": true, "per_episode_seed": false, "eval_seed": null, "rtc_config": null, "enable_lora_vlm": true, "lora_rank": 64, "lora_alpha": 16, "lora_dropout": 0.05, "lora_bias": "none", "enable_lora_action_expert": false, "enable_knowledge_insulation": false, "freeze_embedding": true, "train_action_expert_only": false, "gradient_checkpointing": true, "model_dtype": "bfloat16", "softmax_auxiliary_loss": true, "softmax_auxiliary_loss_scale": 0.0001, "discrete_loss_token_weighting": "root_subsegments_root_tokens", "optimizer_lr": 1e-05, "optimizer_vit_lr": 5e-06, "optimizer_connector_lr": 5e-06, "optimizer_action_expert_lr": 5e-05, "optimizer_betas": [ 0.9, 0.95 ], "optimizer_eps": 1e-06, "optimizer_weight_decay": 0.0, "optimizer_grad_clip_norm": 1.0, "scheduler_warmup_steps": 200, "scheduler_decay_steps": null, "scheduler_decay_lr": 1e-06, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "QUANTILES", "ACTION": "QUANTILES" }, "dataset_feature_names": { "action": [ "shoulder_pan.pos", "shoulder_lift.pos", "elbow_flex.pos", "wrist_flex.pos", "wrist_roll.pos", "gripper.pos" ], "observation.state": [ "shoulder_pan.pos", "shoulder_lift.pos", "elbow_flex.pos", "wrist_flex.pos", "wrist_roll.pos", "gripper.pos" ] } }