{ "mlx_format_version": 1, "encoder": { "encoder_dim": 1280, "n_mels": 128, "num_layers": 32, "num_heads": 20, "head_dim": 64, "intermediate_size": 5120, "rope_theta": 10000.0 }, "projector": { "encoder_dim": 1280, "llm_dim": 1024, "hidden_dim": 1024, "pool_stride": 4 }, "audio_token": "