StreamFormer
/

OmniStream

Image Feature Extraction

Model card Files Files and versions

OmniStream / config.json

StreamFormer's picture

Upload folder using huggingface_hub

a2fbe0c verified 5 days ago

history blame contribute delete

2.81 kB

	{
	"architectures": [
	"VFMMultiFrameTransformer"
	],
	"chosen_layers": [
	4,
	11,
	17,
	23
	],
	"ffn_layer": "mlp",
	"geometry_aggregator": false,
	"geometry_aggregator_layer": 6,
	"grounding_ratio": 0.5,
	"hidden_act": "gelu",
	"hidden_size": 1024,
	"image_aggregator": false,
	"image_aggregator_layer": 6,
	"image_size": 224,
	"image_ssl": {
	"compute_precision": {
	"sharding_strategy": "SHARD_GRAD_OP"
	},
	"crops": {
	"local_crops_number": 2
	},
	"dino": {
	"force_weight_norm": false,
	"global_ignore_diagonal": true,
	"head_bottleneck_dim": 256,
	"head_hidden_dim": 2048,
	"head_n_prototypes": 65536,
	"head_nlayers": 3,
	"head_norm_last_layer": false,
	"koleo_distributed_replicas": 0,
	"koleo_loss_distributed": false,
	"koleo_loss_weight": 0.1,
	"koleo_topk": 1,
	"local_loss_weight_schedule": {
	"end": 0.5,
	"peak": 0.5,
	"start": 0.5,
	"warmup_epochs": 0
	},
	"loss_weight": 1.0,
	"reweight_dino_local_loss": false
	},
	"distillation": {
	"checkpoint_path": "",
	"enabled": false,
	"full_cfg_path": ""
	},
	"gram": {
	"ckpt": null,
	"compute_stats": false,
	"ema_teacher": false,
	"global_teacher_resize_antialias": false,
	"global_teacher_resize_method": "bicubic",
	"img_level": true,
	"it_first_update": 0,
	"it_load_ema_teacher": -1,
	"loss_weight": 1.0,
	"loss_weight_schedule": null,
	"max_updates": null,
	"normalized": true,
	"remove_neg": false,
	"remove_only_teacher_neg": false,
	"rep_update": true,
	"tokens_used": "all",
	"update_frequency": 50000,
	"use_loss": true
	},
	"ibot": {
	"force_masking_even_with_zero_weight": false,
	"head_bottleneck_dim": 256,
	"head_hidden_dim": 2048,
	"head_n_prototypes": 65536,
	"head_nlayers": 3,
	"head_norm_last_layer": false,
	"loss_weight": 1.0,
	"mask_random_circular_shift": false,
	"mask_ratio_min_max": [
	0.1,
	0.5
	],
	"mask_sample_probability": 0.5,
	"separate_head": true
	},
	"multidistillation": {
	"enabled": false
	},
	"train": {
	"centering": "sinkhorn_knopp"
	}
	},
	"initializer_range": 0.02,
	"intermediate_size": 3072,
	"layer_norm_eps": 1e-06,
	"mlp_ratio": 4.0,
	"mm_projector_type": "mlp2x_gelu",
	"model_type": "vfm",
	"num_attention_heads": 16,
	"num_channels": 3,
	"num_experts": 8,
	"num_frames": 16,
	"patch_embed_name": "dinov3_vitl16_torch",
	"patch_size": 16,
	"top_k": 2,
	"torch_dtype": "float32",
	"transformers_version": "4.52.3",
	"upcycle_to_moe": false,
	"video_aggregator": true,
	"video_aggregator_layer": 24
	}