| { | |
| "_class_name": "MLLMEncoder", | |
| "mllm_model_path": ".", | |
| "dit_dim": 3072, | |
| "hidden_size": 2048, | |
| "num_image_queries": 256, | |
| "num_video_queries": 512, | |
| "num_ref_queries": 768, | |
| "max_object_token": 768, | |
| "max_frames": 16, | |
| "max_pixels_per_frame": 262144 | |
| } |
| { | |
| "_class_name": "MLLMEncoder", | |
| "mllm_model_path": ".", | |
| "dit_dim": 3072, | |
| "hidden_size": 2048, | |
| "num_image_queries": 256, | |
| "num_video_queries": 512, | |
| "num_ref_queries": 768, | |
| "max_object_token": 768, | |
| "max_frames": 16, | |
| "max_pixels_per_frame": 262144 | |
| } |