File size: 1,437 Bytes
56bb8d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
{
  "image_size": 320,
  "patch_size": 16,
  "num_channels": 3,
  "color_order": "RGB",
  "resize_mode": "letterbox",
  "pad_color_rgb": [114, 114, 114],
  "normalize_mean": [0.5, 0.5, 0.5],
  "normalize_std": [0.5, 0.5, 0.5],
  "input_dtype": "float32",
  "input_layout": "BCHW",
  "onnx_inputs": {
    "pixel_values": {
      "shape": "(batch_size, 3, 320, 320)",
      "dtype": "float32",
      "description": "Letterboxed and normalized image tensor. Preprocessing is NOT in the graph; do it externally."
    },
    "padding_mask": {
      "shape": "(batch_size, 320, 320)",
      "dtype": "bool",
      "description": "True = padded pixel, False = valid pixel. Pass an all-False mask if your image fills the frame."
    }
  },
  "onnx_outputs": {
    "probabilities": {
      "shape": "(batch_size, 19294)",
      "dtype": "float32",
      "activation": "sigmoid (already applied inside the graph)"
    }
  },
  "opset_version": 21,
  "dynamic_batch": true,
  "embedded_metadata": {
    "vocabulary": "Embedded as gzip+base64 in the ONNX metadata_props (key: vocab_b64_gzip).",
    "tags_csv": "selected_tags.csv mirrors index_to_tag for SmilingWolf-style tagger UIs."
  },
  "notes": [
    "Letterbox resize keeps aspect ratio; pad with the RGB color above to reach 320x320.",
    "Normalize per-channel: (x/255 - mean) / std after letterboxing.",
    "Recommended thresholds are in pr_thresholds.json (per-tag and global)."
  ]
}