{ "format": "onnx", "architecture_type": "vit", "num_labels": 19294, "num_channels": 3, "image_size": 320, "patch_size": 16, "hidden_size": 1024, "num_hidden_layers": 18, "num_attention_heads": 16, "intermediate_size": 4096, "num_groups": 20, "tags_per_group": 10000, "training_epoch": 33, "training_step": 170799, "vocab_format_version": 1, "vocab_sha256": "b9f95e88fb7e30669077bb761e9a66642ec526c1e10d65336a2a2b628141199d", "onnx_opset_version": 21, "onnx_inputs": ["pixel_values", "padding_mask"], "onnx_outputs": ["probabilities"], "dynamic_batch": true, "preprocessing_file": "preprocessing.json", "thresholds_file": "pr_thresholds.json", "vocabulary_file": "vocabulary.json", "checkpoint_source": "experiments/run1_vit/checkpoints/last.pt (epoch 33), exported to ONNX" }