File size: 1,437 Bytes
56bb8d8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | {
"image_size": 320,
"patch_size": 16,
"num_channels": 3,
"color_order": "RGB",
"resize_mode": "letterbox",
"pad_color_rgb": [114, 114, 114],
"normalize_mean": [0.5, 0.5, 0.5],
"normalize_std": [0.5, 0.5, 0.5],
"input_dtype": "float32",
"input_layout": "BCHW",
"onnx_inputs": {
"pixel_values": {
"shape": "(batch_size, 3, 320, 320)",
"dtype": "float32",
"description": "Letterboxed and normalized image tensor. Preprocessing is NOT in the graph; do it externally."
},
"padding_mask": {
"shape": "(batch_size, 320, 320)",
"dtype": "bool",
"description": "True = padded pixel, False = valid pixel. Pass an all-False mask if your image fills the frame."
}
},
"onnx_outputs": {
"probabilities": {
"shape": "(batch_size, 19294)",
"dtype": "float32",
"activation": "sigmoid (already applied inside the graph)"
}
},
"opset_version": 21,
"dynamic_batch": true,
"embedded_metadata": {
"vocabulary": "Embedded as gzip+base64 in the ONNX metadata_props (key: vocab_b64_gzip).",
"tags_csv": "selected_tags.csv mirrors index_to_tag for SmilingWolf-style tagger UIs."
},
"notes": [
"Letterbox resize keeps aspect ratio; pad with the RGB color above to reach 320x320.",
"Normalize per-channel: (x/255 - mean) / std after letterboxing.",
"Recommended thresholds are in pr_thresholds.json (per-tag and global)."
]
}
|