{
  "format": "onnx",
  "architecture_type": "vit",
  "num_labels": 19294,
  "num_channels": 3,
  "image_size": 320,
  "patch_size": 16,
  "hidden_size": 1024,
  "num_hidden_layers": 18,
  "num_attention_heads": 16,
  "intermediate_size": 4096,
  "num_groups": 20,
  "tags_per_group": 10000,
  "training_epoch": 33,
  "training_step": 170799,
  "vocab_format_version": 1,
  "vocab_sha256": "b9f95e88fb7e30669077bb761e9a66642ec526c1e10d65336a2a2b628141199d",
  "onnx_opset_version": 21,
  "onnx_inputs": ["pixel_values", "padding_mask"],
  "onnx_outputs": ["probabilities"],
  "dynamic_batch": true,
  "preprocessing_file": "preprocessing.json",
  "thresholds_file": "pr_thresholds.json",
  "vocabulary_file": "vocabulary.json",
  "checkpoint_source": "experiments/run1_vit/checkpoints/last.pt (epoch 33), exported to ONNX"
}