Image-Text-to-Text
Transformers
Safetensors
minimax_m3_vl
minimax-m3
fp8
compressed-tensors
llm-compressor
vllm
rocm
conversational
custom_code
Instructions to use EmbeddedLLM/MiniMax-M3-FP8-dynamic with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use EmbeddedLLM/MiniMax-M3-FP8-dynamic with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="EmbeddedLLM/MiniMax-M3-FP8-dynamic", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoProcessor, AutoModelForMultimodalLM processor = AutoProcessor.from_pretrained("EmbeddedLLM/MiniMax-M3-FP8-dynamic", trust_remote_code=True) model = AutoModelForMultimodalLM.from_pretrained("EmbeddedLLM/MiniMax-M3-FP8-dynamic", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] inputs = processor.apply_chat_template( messages, add_generation_prompt=True, tokenize=True, return_dict=True, return_tensors="pt", ).to(model.device) outputs = model.generate(**inputs, max_new_tokens=40) print(processor.decode(outputs[0][inputs["input_ids"].shape[-1]:])) - Notebooks
- Google Colab
- Kaggle
- Local Apps Settings
- vLLM
How to use EmbeddedLLM/MiniMax-M3-FP8-dynamic with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "EmbeddedLLM/MiniMax-M3-FP8-dynamic" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "EmbeddedLLM/MiniMax-M3-FP8-dynamic", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/EmbeddedLLM/MiniMax-M3-FP8-dynamic
- SGLang
How to use EmbeddedLLM/MiniMax-M3-FP8-dynamic with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "EmbeddedLLM/MiniMax-M3-FP8-dynamic" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "EmbeddedLLM/MiniMax-M3-FP8-dynamic", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "EmbeddedLLM/MiniMax-M3-FP8-dynamic" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "EmbeddedLLM/MiniMax-M3-FP8-dynamic", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use EmbeddedLLM/MiniMax-M3-FP8-dynamic with Docker Model Runner:
docker model run hf.co/EmbeddedLLM/MiniMax-M3-FP8-dynamic
Add files using upload-large-folder tool
Browse files- LICENSE +17 -0
- README.md +61 -0
- added_tokens.json +63 -0
- configuration_minimax_m3_vl.py +111 -0
- generation_config.json +8 -0
- merges.txt +0 -0
- minimax_m3_model_free_fp8_dynamic_receipt.json +19 -0
- model-00001-of-00059.safetensors +3 -0
- model-00002-of-00059.safetensors +3 -0
- model-00004-of-00059.safetensors +3 -0
- model-00005-of-00059.safetensors +3 -0
- model-00008-of-00059.safetensors +3 -0
- model-00009-of-00059.safetensors +3 -0
- model-00010-of-00059.safetensors +3 -0
- model-00011-of-00059.safetensors +3 -0
- model-00012-of-00059.safetensors +3 -0
- model-00013-of-00059.safetensors +3 -0
- model-00014-of-00059.safetensors +3 -0
- model-00015-of-00059.safetensors +3 -0
- model-00016-of-00059.safetensors +3 -0
- model-00018-of-00059.safetensors +3 -0
- model-00019-of-00059.safetensors +3 -0
- model-00020-of-00059.safetensors +3 -0
- model-00026-of-00059.safetensors +3 -0
- model-00028-of-00059.safetensors +3 -0
- model-00029-of-00059.safetensors +3 -0
- model-00030-of-00059.safetensors +3 -0
- model-00031-of-00059.safetensors +3 -0
- model-00033-of-00059.safetensors +3 -0
- model-00035-of-00059.safetensors +3 -0
- model-00036-of-00059.safetensors +3 -0
- model-00037-of-00059.safetensors +3 -0
- model-00038-of-00059.safetensors +3 -0
- model-00041-of-00059.safetensors +3 -0
- model-00042-of-00059.safetensors +3 -0
- model-00043-of-00059.safetensors +3 -0
- model-00044-of-00059.safetensors +3 -0
- model-00045-of-00059.safetensors +3 -0
- model-00046-of-00059.safetensors +3 -0
- model-00047-of-00059.safetensors +3 -0
- model-00048-of-00059.safetensors +3 -0
- model-00049-of-00059.safetensors +3 -0
- model-00050-of-00059.safetensors +3 -0
- model-00051-of-00059.safetensors +3 -0
- model-00052-of-00059.safetensors +3 -0
- model-00053-of-00059.safetensors +3 -0
- model-00056-of-00059.safetensors +3 -0
- model-00059-of-00059.safetensors +3 -0
- preprocessor_config.json +32 -0
- special_tokens_map.json +16 -0
LICENSE
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MINIMAX COMMUNITY LICENSE
|
| 2 |
+
Copyright (c) 2026 MiniMax
|
| 3 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software for non-commercial purposes, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or provide copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
| 4 |
+
1. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
| 5 |
+
2. If the Software (or any derivative works thereof) is used for any Commercial Use for your products or services:
|
| 6 |
+
1. you shall prominently display “Built with MiniMax M3” on a related website, user interface, blogpost, about page or product documentation.
|
| 7 |
+
2. you shall obtain a separate, prior written authorization from MiniMax by contacting api@minimax.io with the subject line “M3 licensing - authorization request”, if such products and services generate more than 20 million US dollars (or equivalent in other currencies) in yearly revenue; otherwise, you only need to send a one-time notice to api@minimax.io with the subject “M3 licensing — notice”.
|
| 8 |
+
3. “Commercial Use” means any use of the Software or any derivative work thereof that is primarily intended for commercial advantage or monetary compensation, which includes, without limitation: (i) offering products or services to third parties for a fee, which utilize, incorporate, or rely on the Software or its derivatives, (ii) the commercial use of APIs provided by or for the Software or its derivatives, including to support or enable commercial products, services, or operations, whether in a cloud-based, hosted, or other similar environment, and (iii) the deployment or provision of the Software or its derivatives that have been subjected to post-training, fine-tuning, instruction-tuning, or any other form of modification, for any commercial purpose.
|
| 9 |
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
| 10 |
+
|
| 11 |
+
Appendix: Prohibited Uses
|
| 12 |
+
You agree you will not use, or allow others to use, the Software or any derivatives of the Software to:
|
| 13 |
+
1. Generate or disseminate content prohibited by applicable laws or regulations.
|
| 14 |
+
2. Assist with, engage in or otherwise support any military purpose.
|
| 15 |
+
3. Exploit, harm, or attempt to exploit or harm minors.
|
| 16 |
+
4. Generate or disseminate false or misleading information with the intent to cause harm.
|
| 17 |
+
5. Promote discrimination, hate speech, or harmful behavior against individuals or groups based on race or ethnic origin, religion, disability, age, nationality and national origin, veteran status, sexual orientation, gender or gender identity, caste, immigration status, or any other characteristic that is associated with systemic discrimination or marginalization.
|
README.md
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: other
|
| 3 |
+
base_model: MiniMaxAI/MiniMax-M3
|
| 4 |
+
pipeline_tag: image-text-to-text
|
| 5 |
+
library_name: transformers
|
| 6 |
+
tags:
|
| 7 |
+
- minimax-m3
|
| 8 |
+
- fp8
|
| 9 |
+
- compressed-tensors
|
| 10 |
+
- llm-compressor
|
| 11 |
+
- vllm
|
| 12 |
+
- rocm
|
| 13 |
+
- conversational
|
| 14 |
+
- image-text-to-text
|
| 15 |
+
---
|
| 16 |
+
|
| 17 |
+
# MiniMax-M3-FP8-dynamic
|
| 18 |
+
|
| 19 |
+
## Model Overview
|
| 20 |
+
|
| 21 |
+
This model is an FP8 dynamic quantized version of [MiniMaxAI/MiniMax-M3](https://huggingface.co/MiniMaxAI/MiniMax-M3).
|
| 22 |
+
|
| 23 |
+
- Base model: `MiniMaxAI/MiniMax-M3`
|
| 24 |
+
- Optimization: FP8 dynamic quantization
|
| 25 |
+
- Format: safetensors / compressed-tensors
|
| 26 |
+
- Validated runtime: vLLM OpenAI-compatible server
|
| 27 |
+
- Tested hardware: AMD MI350, tensor parallel size 8
|
| 28 |
+
|
| 29 |
+
MiniMax-M3 is a native multimodal MoE model. The original model card describes it as a ~428B parameter model with ~23B activated parameters and 1M context support.
|
| 30 |
+
|
| 31 |
+
## License
|
| 32 |
+
|
| 33 |
+
This quantized checkpoint follows the license terms of the base model, [MiniMaxAI/MiniMax-M3](https://huggingface.co/MiniMaxAI/MiniMax-M3). The Hugging Face model-card metadata uses `license: other` because the MiniMax community license is not one of the Hub's enumerated license identifiers.
|
| 34 |
+
|
| 35 |
+
## Model Optimizations
|
| 36 |
+
|
| 37 |
+
This checkpoint uses FP8 dynamic quantization to reduce memory and disk requirements while preserving model quality. Validation below compares this quantized checkpoint against the BF16 `MiniMaxAI/MiniMax-M3` baseline.
|
| 38 |
+
|
| 39 |
+
## Evaluation
|
| 40 |
+
|
| 41 |
+
The model was evaluated against BF16 `MiniMaxAI/MiniMax-M3`. Scores are averaged across seeds.
|
| 42 |
+
|
| 43 |
+
| Benchmark | MiniMaxAI/MiniMax-M3 | EmbeddedLLM/MiniMax-M3-FP8-dynamic | Recovery (%) |
|
| 44 |
+
|---|---:|---:|---:|
|
| 45 |
+
| GSM8k Platinum | 95.81 | 95.92 | 100.12 |
|
| 46 |
+
| IfEval | 80.65 | 79.42 | 98.47 |
|
| 47 |
+
| AIME 2025 | 20.83 | 19.17 | 92.00 |
|
| 48 |
+
| GPQA diamond | 77.78 | 77.95 | 100.22 |
|
| 49 |
+
| Math 500 | 81.20 | 79.93 | 98.44 |
|
| 50 |
+
| Lcb Codegeneration V6 | 37.14 | 35.62 | 95.90 |
|
| 51 |
+
| MMLU Pro Chat | 79.85 | 79.62 | 99.72 |
|
| 52 |
+
|
| 53 |
+
## Evaluation Setup
|
| 54 |
+
|
| 55 |
+
- Standard seeds: `42, 1234, 4158`
|
| 56 |
+
- AIME 2025 seeds: `42, 1234, 4158, 5322, 1356, 9843, 3344, 5678`
|
| 57 |
+
- GSM8K Platinum cap: `max_gen_toks=64000`
|
| 58 |
+
- IFEval, AIME, GPQA, Math 500, MMLU Pro Chat cap: `max_gen_toks=4096`
|
| 59 |
+
- LiveCodeBench v6 cap: `max_gen_toks=2048`
|
| 60 |
+
- MiniMax thinking mode: disabled
|
| 61 |
+
- Runners: lm-eval harness and lighteval through LiteLLM endpoint mode
|
added_tokens.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"]!p~[": 200000,
|
| 3 |
+
"<fim_prefix>": 200001,
|
| 4 |
+
"<fim_middle>": 200002,
|
| 5 |
+
"<fim_suffix>": 200003,
|
| 6 |
+
"<fim_pad>": 200004,
|
| 7 |
+
"<reponame>": 200005,
|
| 8 |
+
"<filename>": 200006,
|
| 9 |
+
"<gh_stars>": 200007,
|
| 10 |
+
"<issue_start>": 200008,
|
| 11 |
+
"<issue_comment>": 200009,
|
| 12 |
+
"<issue_closed>": 200010,
|
| 13 |
+
"<jupyter_start>": 200011,
|
| 14 |
+
"<jupyter_text>": 200012,
|
| 15 |
+
"<jupyter_code>": 200013,
|
| 16 |
+
"<jupyter_output>": 200014,
|
| 17 |
+
"<empty_output>": 200015,
|
| 18 |
+
"<commit_before>": 200016,
|
| 19 |
+
"<commit_msg>": 200017,
|
| 20 |
+
"<commit_after>": 200018,
|
| 21 |
+
"]~b]": 200019,
|
| 22 |
+
"[e~[": 200020,
|
| 23 |
+
"]!d~[": 200021,
|
| 24 |
+
"<function_call>": 200022,
|
| 25 |
+
"<code_interpreter>": 200023,
|
| 26 |
+
"]<]speech[>[": 200024,
|
| 27 |
+
"]<]image[>[": 200025,
|
| 28 |
+
"]<]video[>[": 200026,
|
| 29 |
+
"]<]start of speech[>[": 200027,
|
| 30 |
+
"]<]end of speech[>[": 200028,
|
| 31 |
+
"]<]start of image[>[": 200029,
|
| 32 |
+
"]<]end of image[>[": 200030,
|
| 33 |
+
"]<]start of video[>[": 200031,
|
| 34 |
+
"]<]end of video[>[": 200032,
|
| 35 |
+
"]<]vision pad[>[": 200033,
|
| 36 |
+
"]~!b[": 200034,
|
| 37 |
+
"<jupyter_error>": 200035,
|
| 38 |
+
"<add_file>": 200036,
|
| 39 |
+
"<delete_file>": 200037,
|
| 40 |
+
"<rename_file>": 200038,
|
| 41 |
+
"<edit_file>": 200039,
|
| 42 |
+
"<commit_message>": 200040,
|
| 43 |
+
"<empty_source_file>": 200041,
|
| 44 |
+
"<repo_struct>": 200042,
|
| 45 |
+
"<code_context>": 200043,
|
| 46 |
+
"<file_content>": 200044,
|
| 47 |
+
"<source_files>": 200045,
|
| 48 |
+
"<pr_start>": 200046,
|
| 49 |
+
"<review_comment>": 200047,
|
| 50 |
+
"<filepath>": 200048,
|
| 51 |
+
"<file_sep>": 200049,
|
| 52 |
+
"<think>": 200050,
|
| 53 |
+
"</think>": 200051,
|
| 54 |
+
"<tool_call>": 200052,
|
| 55 |
+
"</tool_call>": 200053,
|
| 56 |
+
"]<]frame[>[": 200054,
|
| 57 |
+
"]<]start of frame[>[": 200055,
|
| 58 |
+
"]<]end of frame[>[": 200056,
|
| 59 |
+
"<|content_altered_placeholder|>": 200057,
|
| 60 |
+
"]<]minimax[>[": 200058,
|
| 61 |
+
"<mm:think>": 200059,
|
| 62 |
+
"</mm:think>": 200060
|
| 63 |
+
}
|
configuration_minimax_m3_vl.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""HuggingFace configs for the MiniMax VL family (M2 VL / M3 VL).
|
| 2 |
+
|
| 3 |
+
This file is bundled into every converted HF checkpoint so that loading via
|
| 4 |
+
``AutoConfig.from_pretrained(..., trust_remote_code=True)`` works without any
|
| 5 |
+
runtime dependency on sglang or other internal packages — only stock
|
| 6 |
+
``transformers`` is required.
|
| 7 |
+
|
| 8 |
+
The class definitions intentionally mirror
|
| 9 |
+
``sglang.srt.configs.minimax_vl``; if either side changes, keep them in sync.
|
| 10 |
+
|
| 11 |
+
The file is named ``configuration_minimax_m3_vl.py`` (matching the legacy
|
| 12 |
+
``model_type="minimax_m3_vl"`` and the converter's ``auto_map`` entry) so
|
| 13 |
+
that ckpts produced by this converter remain loadable by older sglang versions
|
| 14 |
+
that only know the ``MiniMaxM3VL*`` names. The canonical class is
|
| 15 |
+
``MiniMaxM3VLConfig``; ``MiniMaxM3VLConfig`` is a thin BC alias whose only
|
| 16 |
+
purpose is to be referenced from ``auto_map``.
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
from typing import Optional
|
| 20 |
+
|
| 21 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 22 |
+
from transformers.models.auto import CONFIG_MAPPING
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def _coerce_sub_config(
|
| 26 |
+
sub_config: Optional[dict], default_model_type: str
|
| 27 |
+
) -> Optional[PretrainedConfig]:
|
| 28 |
+
"""Convert a config dict to a ``PretrainedConfig`` instance.
|
| 29 |
+
|
| 30 |
+
If ``model_type`` is registered in HF ``CONFIG_MAPPING`` the corresponding
|
| 31 |
+
config class is used; otherwise we fall back to a generic
|
| 32 |
+
``PretrainedConfig`` so all dict keys still become real attributes (M3's
|
| 33 |
+
text backbone uses ``model_type="minimax_m2"`` which is not in
|
| 34 |
+
``CONFIG_MAPPING``).
|
| 35 |
+
"""
|
| 36 |
+
if not isinstance(sub_config, dict):
|
| 37 |
+
return sub_config
|
| 38 |
+
model_type = sub_config.get("model_type", default_model_type)
|
| 39 |
+
cls = CONFIG_MAPPING.get(model_type, PretrainedConfig)
|
| 40 |
+
return cls(**sub_config)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class MiniMaxVLBaseConfig(PretrainedConfig):
|
| 44 |
+
"""Base config shared by every MiniMax VL variant.
|
| 45 |
+
|
| 46 |
+
Handles vision/text sub-config coercion. Concrete subclasses only need to
|
| 47 |
+
declare a unique ``model_type`` string.
|
| 48 |
+
"""
|
| 49 |
+
|
| 50 |
+
def __init__(
|
| 51 |
+
self,
|
| 52 |
+
vision_config: Optional[dict] = None,
|
| 53 |
+
text_config: Optional[dict] = None,
|
| 54 |
+
image_token_index: int = 200025,
|
| 55 |
+
video_token_index: int = 200026,
|
| 56 |
+
image_seq_length: int = 576,
|
| 57 |
+
process_image_mode: str = "dynamic_res",
|
| 58 |
+
projector_hidden_act: str = "gelu",
|
| 59 |
+
multimodal_projector_bias: bool = True,
|
| 60 |
+
vision_feature_layer: int = -1,
|
| 61 |
+
vision_feature_select_strategy: str = "full",
|
| 62 |
+
img_token_compression_config: Optional[dict] = None,
|
| 63 |
+
image_grid_pinpoints: Optional[str] = None,
|
| 64 |
+
**kwargs,
|
| 65 |
+
):
|
| 66 |
+
self.vision_config = _coerce_sub_config(vision_config, "clip_vision_model")
|
| 67 |
+
self.text_config = _coerce_sub_config(text_config, "mixtral")
|
| 68 |
+
|
| 69 |
+
self.image_token_index = image_token_index
|
| 70 |
+
self.video_token_index = video_token_index
|
| 71 |
+
self.image_seq_length = image_seq_length
|
| 72 |
+
self.process_image_mode = process_image_mode
|
| 73 |
+
self.projector_hidden_act = projector_hidden_act
|
| 74 |
+
self.multimodal_projector_bias = multimodal_projector_bias
|
| 75 |
+
self.vision_feature_layer = vision_feature_layer
|
| 76 |
+
self.vision_feature_select_strategy = vision_feature_select_strategy
|
| 77 |
+
self.img_token_compression_config = img_token_compression_config or {}
|
| 78 |
+
self.image_grid_pinpoints = image_grid_pinpoints
|
| 79 |
+
|
| 80 |
+
super().__init__(**kwargs)
|
| 81 |
+
|
| 82 |
+
def __post_init__(self, **kwargs):
|
| 83 |
+
super().__post_init__(**kwargs)
|
| 84 |
+
if hasattr(self, "vision_config"):
|
| 85 |
+
self.vision_config = _coerce_sub_config(self.vision_config, "clip_vision_model")
|
| 86 |
+
if hasattr(self, "text_config"):
|
| 87 |
+
self.text_config = _coerce_sub_config(self.text_config, "mixtral")
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
class MiniMaxM2VLConfig(MiniMaxVLBaseConfig):
|
| 91 |
+
"""MiniMax M2 VL: vision tower + M2 (Mixtral-style MoE) text backbone."""
|
| 92 |
+
|
| 93 |
+
model_type = "minimax_m2_vl"
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
class MiniMaxM3VLConfig(MiniMaxVLBaseConfig):
|
| 97 |
+
"""MiniMax M3 VL: vision tower + M3 (mixed sparse/dense MoE) text backbone."""
|
| 98 |
+
|
| 99 |
+
model_type = "minimax_m3_vl"
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
class MiniMaxM2MiniVLConfig(MiniMaxM2VLConfig):
|
| 103 |
+
"""Legacy alias kept so old ``model_type="minimax_m2_mini_vl"`` ckpts load."""
|
| 104 |
+
|
| 105 |
+
model_type = "minimax_m2_mini_vl"
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
class MiniMaxM3VLConfig(MiniMaxM3VLConfig):
|
| 109 |
+
"""Legacy alias kept so old ``model_type="minimax_m3_vl"`` ckpts load."""
|
| 110 |
+
|
| 111 |
+
model_type = "minimax_m3_vl"
|
generation_config.json
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token_id": 200019,
|
| 3 |
+
"do_sample": true,
|
| 4 |
+
"eos_token_id": 200020,
|
| 5 |
+
"temperature": 1.0,
|
| 6 |
+
"top_p": 0.95,
|
| 7 |
+
"transformers_version": "4.46.1"
|
| 8 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
minimax_m3_model_free_fp8_dynamic_receipt.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"device": "cuda:0",
|
| 3 |
+
"elapsed_seconds": 1898.585,
|
| 4 |
+
"entrypoint": "llmcompressor.model_free_ptq",
|
| 5 |
+
"ignore": [
|
| 6 |
+
"re:.*lm_head$",
|
| 7 |
+
"re:.*embed_tokens$",
|
| 8 |
+
"re:.*vision_tower.*",
|
| 9 |
+
"re:.*multi_modal_projector.*",
|
| 10 |
+
"re:.*multimodal_projector.*",
|
| 11 |
+
"re:.*patch_merge_mlp.*",
|
| 12 |
+
"re:.*block_sparse_moe\\.gate$"
|
| 13 |
+
],
|
| 14 |
+
"max_workers": 1,
|
| 15 |
+
"output": "/workspace/mm3/MiniMaxAI__MiniMax-M3-FP8-Dynamic-model-free__from_051e8f96.partial.run_20260625T154743Z",
|
| 16 |
+
"pre_validation": "disabled because safetensors.safe_open rejects device='meta' in this ROCm image; per-tensor validation still runs during process_file",
|
| 17 |
+
"scheme": "FP8_DYNAMIC",
|
| 18 |
+
"source": "/workspace/mm3/MiniMaxAI__MiniMax-M3__official_051e8f96"
|
| 19 |
+
}
|
model-00001-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:28b601cea1932471db05a90178ad94d68c886efea8b5dc54c9194bd1cd1a2b41
|
| 3 |
+
size 5250352288
|
model-00002-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09797a454a0832f71b137554e87f2a8570a57afe2386eef04e988e67d076da00
|
| 3 |
+
size 5463380896
|
model-00004-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2061dcb3ce865da5b0798c6195edb50e996f1ed09d5122cc8baffecf3174975
|
| 3 |
+
size 8044892000
|
model-00005-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f15b403eda0ac42900d089a31b9585d1630c5e4071f629b403cfea329dabff8
|
| 3 |
+
size 8044892000
|
model-00008-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2eec6c7b9fa9b8ebd4d795134adc01871f33ec453cfc3c1377c2d96ff7cd746d
|
| 3 |
+
size 8044892000
|
model-00009-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d365280ecf01491cab250113c7539b63788c38103b839e179ca216605430a330
|
| 3 |
+
size 8044892000
|
model-00010-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0b509848b9f0fefa35f2dfc9e2c7e5b065f6d9f840ff3fcba961ac9b912a148
|
| 3 |
+
size 8044892800
|
model-00011-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f679f293f2fbbdb2a0887904e9245d1068012699b34de9b599239db7ca98d6c5
|
| 3 |
+
size 8044892800
|
model-00012-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4b97d6e165b4d133fe5c4b95701e3ef54082b9232f0beb08a3cf10b79c2d714
|
| 3 |
+
size 8044892800
|
model-00013-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51478d9d33079bb5351adf3cc040b1925267bd1d07fd26838dc65f1368998674
|
| 3 |
+
size 8044892800
|
model-00014-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecae238705cfbe4e7be017e7c8f8791780c2a4bd72b0a900874752fd11dc1da2
|
| 3 |
+
size 8044892800
|
model-00015-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:109c9de78dac5d00d4d0d8fe30f9121a2c0f530759d223c8d7295322d42e3d18
|
| 3 |
+
size 8044892800
|
model-00016-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:190f1b159714393b2b8ff7fc9684e1afee5b2bd55b3826a30f2636a39ef9f352
|
| 3 |
+
size 8044892800
|
model-00018-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a9587bff67f3b570b4dc6b445f6e41051ac161893c8da4effdc499c249f01e4
|
| 3 |
+
size 8044892800
|
model-00019-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ef764cd50be246eba3b32af00d5c98ece44a27237b0762f7321cd971685022f
|
| 3 |
+
size 8044892800
|
model-00020-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d638096b82ac618188edd7917d6b47f86c481ebc091eb553f0ae5eeb7c86e00
|
| 3 |
+
size 8044892800
|
model-00026-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc016ae04178f8733ea5062f565ec5d0ba81864d6b5803b1f503abefdfa4dd52
|
| 3 |
+
size 7890509072
|
model-00028-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b062e90d3a7f2935bcb507225b89afb6774c7d449bf907c7b7501b8896e8533b
|
| 3 |
+
size 7421757928
|
model-00029-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90ab2de09ac972ae8db06f53f302df35161e53b6b3724ff73aafc199048bc52
|
| 3 |
+
size 7421757928
|
model-00030-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:311aca972c5178f843c604cca8e5819bf2ffab65defd08792b2212fb7635934e
|
| 3 |
+
size 7421757928
|
model-00031-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9172a00c73eedba7ac43ae9d34b3264556862e22d86d5d30203a524de4377a1b
|
| 3 |
+
size 7421757928
|
model-00033-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a13d88640d44b2faa6592b6d4569f37db45f8eee92999cd55b4fc9dc7757083f
|
| 3 |
+
size 7421757928
|
model-00035-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0849976e8d51a7e5b8bca78c2c83866755316cdd93950315684b57759dbe42d
|
| 3 |
+
size 7421757928
|
model-00036-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6228890f7dc00e576dd1aaa4954e75a4e5d51d9f393b40244106dcaf32a32d6d
|
| 3 |
+
size 7421757928
|
model-00037-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a5fb1fdeedef2896566655f109d0b71fe3f2b883d61af455f8fc436b767599d
|
| 3 |
+
size 6798623072
|
model-00038-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:add8e08bf3eef7d37eb5ac7a487aebebe4d4048351c197610467f40f2721d335
|
| 3 |
+
size 6798623072
|
model-00041-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e6749d436bf44d73aacec623ef7b7bbacdeaa6bc8eaf4ada7161ad740c4a89d
|
| 3 |
+
size 6798623072
|
model-00042-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63afdf2fb4789b0d1371cdf2d06bcb24ab2685852fef2dc847d8e15d0707b5b6
|
| 3 |
+
size 6798623072
|
model-00043-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:00b7044bc8ebf4311c9be08dcecdf550c69071b1c17f62269d10905d2205144c
|
| 3 |
+
size 6798623072
|
model-00044-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c58d1247ba482e4edcea4bb7cfccd63be1efef419eae29d8ae6a38e55852203
|
| 3 |
+
size 6798623072
|
model-00045-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f1d0bd0604dc9330d602f8262aaa899150fe941f7b606922c4085ff664b238db
|
| 3 |
+
size 6798623072
|
model-00046-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7af020a268620bb7bbfbd2ac7d6c084efa5e7ee3b3ff1dd10ecb895f71572408
|
| 3 |
+
size 6798623072
|
model-00047-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0d94a58c4a87ec05e85852e88f7df83cc26049023f4fab305f817c8bc730480
|
| 3 |
+
size 6798623072
|
model-00048-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f897a0b74dd17d5d7a259fd3b7c7d129a6a95a6a14d545ce1d9501e151b411e
|
| 3 |
+
size 6798623072
|
model-00049-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:562ee57ac430a65ec99f0bdcef2f958c68b65838cad7b1b68cd8d8e51576714a
|
| 3 |
+
size 6798623072
|
model-00050-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96ebcac4a4b662b877fc2fa75b87c6b78cccf6cb15238b62df20c1cda93a5b28
|
| 3 |
+
size 6798623072
|
model-00051-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:507a861e648b86dbe2eb72bb2f59252a794e6ec99811106a3f7011161c9f205c
|
| 3 |
+
size 6798623072
|
model-00052-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1825204774c99914e1fc31e9462fa205e1511a67dde7e008731e01c11562995f
|
| 3 |
+
size 6798623072
|
model-00053-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9dae9087a6f9e474e04cc6ac65ec90eee00242dbae83f29618decbbff5495037
|
| 3 |
+
size 6798623072
|
model-00056-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eecf5287966cf2801cc63ec64a68b6074b534a33f9f94046eae633ec5b1efd41
|
| 3 |
+
size 6798623072
|
model-00059-of-00059.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c650aafbcbd2a8a4a1d499ff6a5291cc857eba2d29efc15fd00254e18fcb327
|
| 3 |
+
size 8687223736
|
preprocessor_config.json
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"processor_class": "MiniMaxVLProcessor",
|
| 3 |
+
"auto_map": {
|
| 4 |
+
"AutoImageProcessor": "image_processor.MiniMaxM3VLImageProcessor",
|
| 5 |
+
"AutoProcessor": "processing_minimax.MiniMaxVLProcessor",
|
| 6 |
+
"AutoVideoProcessor": "video_processor.MiniMaxM3VLVideoProcessor"
|
| 7 |
+
},
|
| 8 |
+
"process_image_mode": "dynamic_res",
|
| 9 |
+
"image_mean": [
|
| 10 |
+
0.48145466,
|
| 11 |
+
0.4578275,
|
| 12 |
+
0.40821073
|
| 13 |
+
],
|
| 14 |
+
"image_std": [
|
| 15 |
+
0.26862954,
|
| 16 |
+
0.26130258,
|
| 17 |
+
0.27577711
|
| 18 |
+
],
|
| 19 |
+
"size": [
|
| 20 |
+
672,
|
| 21 |
+
672
|
| 22 |
+
],
|
| 23 |
+
"patch_size": 14,
|
| 24 |
+
"img_token_compression_config": {
|
| 25 |
+
"image_token_compression_threshold": 1.1,
|
| 26 |
+
"image_token_compression_method": "patch_merge",
|
| 27 |
+
"max_image_resolution": 1008,
|
| 28 |
+
"spatial_merge_size": 2,
|
| 29 |
+
"temporal_patch_size": 2
|
| 30 |
+
},
|
| 31 |
+
"add_start_end_special_tokens": true
|
| 32 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": {
|
| 3 |
+
"content": "]~b]",
|
| 4 |
+
"lstrip": false,
|
| 5 |
+
"normalized": false,
|
| 6 |
+
"rstrip": false,
|
| 7 |
+
"single_word": false
|
| 8 |
+
},
|
| 9 |
+
"eos_token": {
|
| 10 |
+
"content": "[e~[",
|
| 11 |
+
"lstrip": false,
|
| 12 |
+
"normalized": false,
|
| 13 |
+
"rstrip": false,
|
| 14 |
+
"single_word": false
|
| 15 |
+
}
|
| 16 |
+
}
|