{ "_class_name": "MLLMEncoder", "mllm_model_path": ".", "dit_dim": 3072, "hidden_size": 2048, "num_image_queries": 256, "num_video_queries": 512, "num_ref_queries": 768, "max_object_token": 768, "max_frames": 16, "max_pixels_per_frame": 262144 }