Upload COLAB_MERGE_FULL_GGUF.md with huggingface_hub
Browse files- COLAB_MERGE_FULL_GGUF.md +125 -0
COLAB_MERGE_FULL_GGUF.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Create the full standalone Gemopus v1 E2B GGUF in Colab
|
| 2 |
+
|
| 3 |
+
This creates:
|
| 4 |
+
|
| 5 |
+
```text
|
| 6 |
+
Gemopus-v1-e2b-Q4_K_M-merged.gguf
|
| 7 |
+
```
|
| 8 |
+
|
| 9 |
+
That file is the full standalone LM Studio model:
|
| 10 |
+
|
| 11 |
+
```text
|
| 12 |
+
Gemma 4 E2B Q4_K_M base + Gemopus checkpoint-400 LoRA
|
| 13 |
+
```
|
| 14 |
+
|
| 15 |
+
## Colab Cell
|
| 16 |
+
|
| 17 |
+
Run this in Google Colab. Use a runtime with enough disk space. GPU is not required for the merge, but a T4 runtime is fine.
|
| 18 |
+
|
| 19 |
+
Before running, replace:
|
| 20 |
+
|
| 21 |
+
```text
|
| 22 |
+
PASTE_YOUR_HF_TOKEN_HERE
|
| 23 |
+
```
|
| 24 |
+
|
| 25 |
+
with a Hugging Face write token.
|
| 26 |
+
|
| 27 |
+
```python
|
| 28 |
+
import os
|
| 29 |
+
import subprocess
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
|
| 32 |
+
HF_TOKEN = "PASTE_YOUR_HF_TOKEN_HERE"
|
| 33 |
+
REPO_ID = "Hashhasapi/Gemopus-v1-e2b"
|
| 34 |
+
BASE_REPO = "lmstudio-community/gemma-4-E2B-it-GGUF"
|
| 35 |
+
BASE_FILE = "gemma-4-E2B-it-Q4_K_M.gguf"
|
| 36 |
+
LORA_FILE = "Gemopus-v1-e2b-lora.gguf"
|
| 37 |
+
MERGED_FILE = "Gemopus-v1-e2b-Q4_K_M-merged.gguf"
|
| 38 |
+
|
| 39 |
+
os.environ["HF_TOKEN"] = HF_TOKEN
|
| 40 |
+
|
| 41 |
+
def run(cmd, cwd=None):
|
| 42 |
+
print("\n$", " ".join(cmd) if isinstance(cmd, list) else cmd)
|
| 43 |
+
subprocess.run(cmd, cwd=cwd, shell=isinstance(cmd, str), check=True)
|
| 44 |
+
|
| 45 |
+
run("apt-get update")
|
| 46 |
+
run("apt-get install -y --no-install-recommends git cmake build-essential ca-certificates python3-pip")
|
| 47 |
+
run("python3 -m pip install -U huggingface_hub")
|
| 48 |
+
|
| 49 |
+
Path("/content/base").mkdir(exist_ok=True)
|
| 50 |
+
Path("/content/lora").mkdir(exist_ok=True)
|
| 51 |
+
|
| 52 |
+
run([
|
| 53 |
+
"hf", "download", BASE_REPO, BASE_FILE,
|
| 54 |
+
"--local-dir", "/content/base",
|
| 55 |
+
"--token", HF_TOKEN,
|
| 56 |
+
])
|
| 57 |
+
|
| 58 |
+
run([
|
| 59 |
+
"hf", "download", REPO_ID, LORA_FILE,
|
| 60 |
+
"--local-dir", "/content/lora",
|
| 61 |
+
"--token", HF_TOKEN,
|
| 62 |
+
])
|
| 63 |
+
|
| 64 |
+
if not Path("/content/llama.cpp").exists():
|
| 65 |
+
run(["git", "clone", "--depth", "1", "https://github.com/ggml-org/llama.cpp", "/content/llama.cpp"])
|
| 66 |
+
|
| 67 |
+
run([
|
| 68 |
+
"cmake",
|
| 69 |
+
"-S", "/content/llama.cpp",
|
| 70 |
+
"-B", "/content/llama.cpp/build",
|
| 71 |
+
"-DGGML_CUDA=OFF",
|
| 72 |
+
"-DLLAMA_CURL=OFF",
|
| 73 |
+
"-DLLAMA_BUILD_TESTS=OFF",
|
| 74 |
+
"-DLLAMA_BUILD_EXAMPLES=OFF",
|
| 75 |
+
"-DLLAMA_BUILD_TOOLS=ON",
|
| 76 |
+
])
|
| 77 |
+
|
| 78 |
+
run([
|
| 79 |
+
"cmake",
|
| 80 |
+
"--build", "/content/llama.cpp/build",
|
| 81 |
+
"--config", "Release",
|
| 82 |
+
"-j", str(os.cpu_count() or 2),
|
| 83 |
+
"--target", "llama-export-lora",
|
| 84 |
+
])
|
| 85 |
+
|
| 86 |
+
export_bin = Path("/content/llama.cpp/build/bin/llama-export-lora")
|
| 87 |
+
if not export_bin.exists():
|
| 88 |
+
matches = list(Path("/content/llama.cpp/build").rglob("llama-export-lora*"))
|
| 89 |
+
if not matches:
|
| 90 |
+
raise FileNotFoundError("Could not find llama-export-lora after build.")
|
| 91 |
+
export_bin = matches[0]
|
| 92 |
+
|
| 93 |
+
run([
|
| 94 |
+
str(export_bin),
|
| 95 |
+
"-m", f"/content/base/{BASE_FILE}",
|
| 96 |
+
"--lora", f"/content/lora/{LORA_FILE}",
|
| 97 |
+
"-o", f"/content/{MERGED_FILE}",
|
| 98 |
+
"-t", str(os.cpu_count() or 2),
|
| 99 |
+
])
|
| 100 |
+
|
| 101 |
+
merged = Path(f"/content/{MERGED_FILE}")
|
| 102 |
+
print("Merged file size:", merged.stat().st_size / (1024**3), "GiB")
|
| 103 |
+
|
| 104 |
+
run([
|
| 105 |
+
"hf", "upload", REPO_ID,
|
| 106 |
+
str(merged),
|
| 107 |
+
MERGED_FILE,
|
| 108 |
+
"--repo-type", "model",
|
| 109 |
+
"--token", HF_TOKEN,
|
| 110 |
+
"--commit-message", "Add merged standalone Gemopus v1 E2B GGUF",
|
| 111 |
+
])
|
| 112 |
+
|
| 113 |
+
print("Done:")
|
| 114 |
+
print(f"https://huggingface.co/{REPO_ID}/blob/main/{MERGED_FILE}")
|
| 115 |
+
```
|
| 116 |
+
|
| 117 |
+
## After Upload
|
| 118 |
+
|
| 119 |
+
In LM Studio, download/load:
|
| 120 |
+
|
| 121 |
+
```text
|
| 122 |
+
Gemopus-v1-e2b-Q4_K_M-merged.gguf
|
| 123 |
+
```
|
| 124 |
+
|
| 125 |
+
That one should load directly without a LoRA adapter.
|