Hashhasapi commited on
Commit
c4871aa
·
verified ·
1 Parent(s): 5376a50

Upload COLAB_MERGE_FULL_GGUF.md with huggingface_hub

Browse files
Files changed (1) hide show
  1. COLAB_MERGE_FULL_GGUF.md +125 -0
COLAB_MERGE_FULL_GGUF.md ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Create the full standalone Gemopus v1 E2B GGUF in Colab
2
+
3
+ This creates:
4
+
5
+ ```text
6
+ Gemopus-v1-e2b-Q4_K_M-merged.gguf
7
+ ```
8
+
9
+ That file is the full standalone LM Studio model:
10
+
11
+ ```text
12
+ Gemma 4 E2B Q4_K_M base + Gemopus checkpoint-400 LoRA
13
+ ```
14
+
15
+ ## Colab Cell
16
+
17
+ Run this in Google Colab. Use a runtime with enough disk space. GPU is not required for the merge, but a T4 runtime is fine.
18
+
19
+ Before running, replace:
20
+
21
+ ```text
22
+ PASTE_YOUR_HF_TOKEN_HERE
23
+ ```
24
+
25
+ with a Hugging Face write token.
26
+
27
+ ```python
28
+ import os
29
+ import subprocess
30
+ from pathlib import Path
31
+
32
+ HF_TOKEN = "PASTE_YOUR_HF_TOKEN_HERE"
33
+ REPO_ID = "Hashhasapi/Gemopus-v1-e2b"
34
+ BASE_REPO = "lmstudio-community/gemma-4-E2B-it-GGUF"
35
+ BASE_FILE = "gemma-4-E2B-it-Q4_K_M.gguf"
36
+ LORA_FILE = "Gemopus-v1-e2b-lora.gguf"
37
+ MERGED_FILE = "Gemopus-v1-e2b-Q4_K_M-merged.gguf"
38
+
39
+ os.environ["HF_TOKEN"] = HF_TOKEN
40
+
41
+ def run(cmd, cwd=None):
42
+ print("\n$", " ".join(cmd) if isinstance(cmd, list) else cmd)
43
+ subprocess.run(cmd, cwd=cwd, shell=isinstance(cmd, str), check=True)
44
+
45
+ run("apt-get update")
46
+ run("apt-get install -y --no-install-recommends git cmake build-essential ca-certificates python3-pip")
47
+ run("python3 -m pip install -U huggingface_hub")
48
+
49
+ Path("/content/base").mkdir(exist_ok=True)
50
+ Path("/content/lora").mkdir(exist_ok=True)
51
+
52
+ run([
53
+ "hf", "download", BASE_REPO, BASE_FILE,
54
+ "--local-dir", "/content/base",
55
+ "--token", HF_TOKEN,
56
+ ])
57
+
58
+ run([
59
+ "hf", "download", REPO_ID, LORA_FILE,
60
+ "--local-dir", "/content/lora",
61
+ "--token", HF_TOKEN,
62
+ ])
63
+
64
+ if not Path("/content/llama.cpp").exists():
65
+ run(["git", "clone", "--depth", "1", "https://github.com/ggml-org/llama.cpp", "/content/llama.cpp"])
66
+
67
+ run([
68
+ "cmake",
69
+ "-S", "/content/llama.cpp",
70
+ "-B", "/content/llama.cpp/build",
71
+ "-DGGML_CUDA=OFF",
72
+ "-DLLAMA_CURL=OFF",
73
+ "-DLLAMA_BUILD_TESTS=OFF",
74
+ "-DLLAMA_BUILD_EXAMPLES=OFF",
75
+ "-DLLAMA_BUILD_TOOLS=ON",
76
+ ])
77
+
78
+ run([
79
+ "cmake",
80
+ "--build", "/content/llama.cpp/build",
81
+ "--config", "Release",
82
+ "-j", str(os.cpu_count() or 2),
83
+ "--target", "llama-export-lora",
84
+ ])
85
+
86
+ export_bin = Path("/content/llama.cpp/build/bin/llama-export-lora")
87
+ if not export_bin.exists():
88
+ matches = list(Path("/content/llama.cpp/build").rglob("llama-export-lora*"))
89
+ if not matches:
90
+ raise FileNotFoundError("Could not find llama-export-lora after build.")
91
+ export_bin = matches[0]
92
+
93
+ run([
94
+ str(export_bin),
95
+ "-m", f"/content/base/{BASE_FILE}",
96
+ "--lora", f"/content/lora/{LORA_FILE}",
97
+ "-o", f"/content/{MERGED_FILE}",
98
+ "-t", str(os.cpu_count() or 2),
99
+ ])
100
+
101
+ merged = Path(f"/content/{MERGED_FILE}")
102
+ print("Merged file size:", merged.stat().st_size / (1024**3), "GiB")
103
+
104
+ run([
105
+ "hf", "upload", REPO_ID,
106
+ str(merged),
107
+ MERGED_FILE,
108
+ "--repo-type", "model",
109
+ "--token", HF_TOKEN,
110
+ "--commit-message", "Add merged standalone Gemopus v1 E2B GGUF",
111
+ ])
112
+
113
+ print("Done:")
114
+ print(f"https://huggingface.co/{REPO_ID}/blob/main/{MERGED_FILE}")
115
+ ```
116
+
117
+ ## After Upload
118
+
119
+ In LM Studio, download/load:
120
+
121
+ ```text
122
+ Gemopus-v1-e2b-Q4_K_M-merged.gguf
123
+ ```
124
+
125
+ That one should load directly without a LoRA adapter.