{ "type": "wam", "n_obs_steps": 1, "input_features": { "observation.images.top": { "type": "VISUAL", "shape": [ 3, 480, 640 ] }, "observation.state": { "type": "STATE", "shape": [ 14 ] } }, "output_features": { "action": { "type": "ACTION", "shape": [ 14 ] } }, "device": "cuda", "use_amp": true, "use_peft": false, "push_to_hub": true, "repo_id": "maximellerbach/test_wam_decoder", "private": null, "tags": null, "license": null, "pretrained_path": "outputs/train/wam_aloha_decode_test/checkpoints/last/pretrained_model", "time_between_frames": 50, "lag_offset": 3, "image_size": [ 256, 256 ], "horizon": 50, "n_action_steps": 50, "normalization_mapping": { "VISUAL": "IDENTITY", "STATE": "MEAN_STD", "ACTION": "MEAN_STD" }, "hidden_dim": 512, "dim_feedforward": 2048, "pre_norm": false, "dropout": 0.1, "vision_backbone": "resnet18", "pretrained_backbone_weights": "ResNet18_Weights.IMAGENET1K_V1", "use_pretrained_backbone": true, "freeze_backbone": false, "latent_dim": 32, "n_vae_encoder_layers": 4, "action_head_num_layers": 4, "action_head_num_heads": 8, "action_head_dropout": 0.1, "predictor_num_layers": 8, "predictor_num_heads": 8, "predictor_dropout": 0.05, "predictor_max_tokens": 1024, "optimizer_lr": 5e-05, "optimizer_weight_decay": 0.01, "optimizer_grad_clip_norm": 1.0, "scheduler_decay_lr": 1e-05, "scheduler_warmup_steps": 500, "scheduler_decay_steps": 5000 }