File size: 3,248 Bytes
c7ab596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db50a98
c7ab596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db50a98
c7ab596
 
db50a98
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# Patch 1: speculative.py
spec_path = "/usr/local/lib/python3.12/dist-packages/vllm/config/speculative.py"
with open(spec_path) as f:
    c = f.read()
old = '"deepseek_v3", "deepseek_v32", "glm_moe_dsa"'
new = '"deepseek_v3", "deepseek_v32", "glm_moe_dsa", "kimi_k25"'
if old in c and "kimi_k25" not in c:
    c = c.replace(old, new)
    with open(spec_path, "w") as f: f.write(c)
    print("speculative.py PATCHED")
else:
    print("speculative.py already patched")

# Patch 2: kimi_k25.py
k25_path = "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/kimi_k25.py"
with open(k25_path) as f:
    c = f.read()
if '"model.layers.": "language_model.model.layers."' not in c:
    c = c.replace(
        '"language_model.layers.": "language_model.model.layers.",',
        '"language_model.layers.": "language_model.model.layers.",\n            "model.layers.": "language_model.model.layers.",')
    with open(k25_path, "w") as f: f.write(c)
    print("kimi_k25.py PATCHED")
else:
    print("kimi_k25.py already patched")

# Patch 3: deepseek_mtp.py - extract text_config
mtp_path = "/usr/local/lib/python3.12/dist-packages/vllm/model_executor/models/deepseek_mtp.py"
with open(mtp_path) as f:
    c = f.read()
if "text_config" not in c:
    import_marker = "from .utils import maybe_prefix"
    helper = '''from .utils import maybe_prefix

def _get_text_config(hf_config):
    """Extract text_config from VLM configs (e.g. KimiK25Config)."""
    return getattr(hf_config, 'text_config', hf_config)'''
    c = c.replace(import_marker, helper)
    c = c.replace(
        "config = vllm_config.model_config.hf_config\n        self.mtp_start_layer_idx",
        "config = _get_text_config(vllm_config.model_config.hf_config)\n        self.mtp_start_layer_idx")
    c = c.replace(
        "self.config = vllm_config.model_config.hf_config\n        self.model = DeepSeekMultiTokenPredictor",
        "self.config = _get_text_config(vllm_config.model_config.hf_config)\n        self.model = DeepSeekMultiTokenPredictor")
    c = c.replace(
        "config = vllm_config.speculative_config.draft_model_config.hf_config\n        self.config = config",
        "config = _get_text_config(vllm_config.speculative_config.draft_model_config.hf_config)\n        self.config = config")
    with open(mtp_path, "w") as f: f.write(c)
    print("deepseek_mtp.py PATCHED")
else:
    print("deepseek_mtp.py already patched")

# Patch 4: eagle.py - handle KimiK25Config missing image_token_index
eagle_path = "/usr/local/lib/python3.12/dist-packages/vllm/v1/spec_decode/eagle.py"
with open(eagle_path) as f:
    c = f.read()
old_eagle = "                self.model.config.image_token_index = (\n                    target_model.config.image_token_index\n                )"
new_eagle = "                self.model.config.image_token_index = getattr(\n                    target_model.config, 'image_token_index',\n                    getattr(target_model.config, 'media_placeholder_token_id', 0)\n                )"
if "media_placeholder_token_id" not in c:
    c = c.replace(old_eagle, new_eagle)
    with open(eagle_path, "w") as f: f.write(c)
    print("eagle.py PATCHED (KimiK25 image_token_index fallback)")
else:
    print("eagle.py already patched")