nvidia
/

Cosmos3-Super-Image2Video

video-generation

Model card Files Files and versions

Cosmos3-Super-Image2Video / vision_encoder /config.json

mingyuliutw's picture

Super-squash branch 'main' using huggingface_hub

8889131 1 day ago

509 Bytes

	{
	"architectures": [
	"Qwen3VLVisionModel"
	],
	"deepstack_visual_indexes": [
	8,
	16,
	24
	],
	"depth": 27,
	"dtype": "bfloat16",
	"hidden_act": "gelu_pytorch_tanh",
	"hidden_size": 1152,
	"in_channels": 3,
	"initializer_range": 0.02,
	"intermediate_size": 4304,
	"model_type": "qwen3_vl",
	"num_heads": 16,
	"num_position_embeddings": 2304,
	"out_hidden_size": 5120,
	"patch_size": 16,
	"spatial_merge_size": 2,
	"temporal_patch_size": 2,
	"transformers_version": "4.57.1"
	}