| { |
| "image_size": 320, |
| "patch_size": 16, |
| "num_channels": 3, |
| "color_order": "RGB", |
| "resize_mode": "letterbox", |
| "pad_color_rgb": [114, 114, 114], |
| "normalize_mean": [0.5, 0.5, 0.5], |
| "normalize_std": [0.5, 0.5, 0.5], |
| "input_dtype": "float32", |
| "input_layout": "BCHW", |
| "onnx_inputs": { |
| "pixel_values": { |
| "shape": "(batch_size, 3, 320, 320)", |
| "dtype": "float32", |
| "description": "Letterboxed and normalized image tensor. Preprocessing is NOT in the graph; do it externally." |
| }, |
| "padding_mask": { |
| "shape": "(batch_size, 320, 320)", |
| "dtype": "bool", |
| "description": "True = padded pixel, False = valid pixel. Pass an all-False mask if your image fills the frame." |
| } |
| }, |
| "onnx_outputs": { |
| "probabilities": { |
| "shape": "(batch_size, 19294)", |
| "dtype": "float32", |
| "activation": "sigmoid (already applied inside the graph)" |
| } |
| }, |
| "opset_version": 21, |
| "dynamic_batch": true, |
| "embedded_metadata": { |
| "vocabulary": "Embedded as gzip+base64 in the ONNX metadata_props (key: vocab_b64_gzip).", |
| "tags_csv": "selected_tags.csv mirrors index_to_tag for SmilingWolf-style tagger UIs." |
| }, |
| "notes": [ |
| "Letterbox resize keeps aspect ratio; pad with the RGB color above to reach 320x320.", |
| "Normalize per-channel: (x/255 - mean) / std after letterboxing.", |
| "Recommended thresholds are in pr_thresholds.json (per-tag and global)." |
| ] |
| } |
|
|