DiffusionWave
/

OppaiOracle_v1.1

Image Classification

multi-label-classification

vision-transformer

Model card Files Files and versions

OppaiOracle_v1.1 / preprocessing.json

DiffusionWave's picture

Upload 7 files

56bb8d8 verified 4 days ago

history blame contribute delete

1.44 kB

	{
	"image_size": 320,
	"patch_size": 16,
	"num_channels": 3,
	"color_order": "RGB",
	"resize_mode": "letterbox",
	"pad_color_rgb": [114, 114, 114],
	"normalize_mean": [0.5, 0.5, 0.5],
	"normalize_std": [0.5, 0.5, 0.5],
	"input_dtype": "float32",
	"input_layout": "BCHW",
	"onnx_inputs": {
	"pixel_values": {
	"shape": "(batch_size, 3, 320, 320)",
	"dtype": "float32",
	"description": "Letterboxed and normalized image tensor. Preprocessing is NOT in the graph; do it externally."
	},
	"padding_mask": {
	"shape": "(batch_size, 320, 320)",
	"dtype": "bool",
	"description": "True = padded pixel, False = valid pixel. Pass an all-False mask if your image fills the frame."
	}
	},
	"onnx_outputs": {
	"probabilities": {
	"shape": "(batch_size, 19294)",
	"dtype": "float32",
	"activation": "sigmoid (already applied inside the graph)"
	}
	},
	"opset_version": 21,
	"dynamic_batch": true,
	"embedded_metadata": {
	"vocabulary": "Embedded as gzip+base64 in the ONNX metadata_props (key: vocab_b64_gzip).",
	"tags_csv": "selected_tags.csv mirrors index_to_tag for SmilingWolf-style tagger UIs."
	},
	"notes": [
	"Letterbox resize keeps aspect ratio; pad with the RGB color above to reach 320x320.",
	"Normalize per-channel: (x/255 - mean) / std after letterboxing.",
	"Recommended thresholds are in pr_thresholds.json (per-tag and global)."
	]
	}