Upload config.json with huggingface_hub
Browse files- config.json +90 -0
config.json
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_README": "POD variant of config.json for Jv2 stages 00-03 on a single Pro 6000 (96GB). Models pulled from HF; paths are pod-local (/workspace). it_base + instruct_fisher are NULL here — they belong to stage 04 (merge) which runs on AIBOX, not the pod. Finalize/verify bin paths + fisher params ON the pod (jaxx: expect quirks). Keep the local config.json as the AIBOX/stage-04 source of truth.",
|
| 3 |
+
"_MODELS_ARE_REAL_J_INPUTS": "SET 2026-05-29 07:18Z (off the old Gv3/Equinox smoke placeholders). J-line = Iv-winner ⊕ toasty's glimmer-rp v0.1. Equinox is NOT a J input — it is already INSIDE the Iv-winner (Iv3 = Gv3-rebuilt + Equinox). REAL Jv2 style ratio is between A = the LOCKED Iv-winner (Iv3_rebuilt, scp'd to /workspace/jv2) and B = glimmer-on-Iv-winner (= Jv1; fold glimmer-rp v0.1 LoRA onto A on the pod). The separate Gv3/Equinox smoke (#74) is DROPPED from this run — first real run IS the smoke (README). #74 stays a later optional follow-up.",
|
| 4 |
+
"run_label": "style_mask_j1_pod",
|
| 5 |
+
"goal": "reduce slop + respect character details (personality, physical traits, voice)",
|
| 6 |
+
"seed": 1729,
|
| 7 |
+
|
| 8 |
+
"single_gpu": true,
|
| 9 |
+
"cuda_devices": "0",
|
| 10 |
+
"_gpu_note": "ONE 96GB card. run_all.sh serves with CUDA_VISIBLE_DEVICES=0,1 --tensor-split 1,1 (2-GPU aibox). On the pod: CUDA_VISIBLE_DEVICES=0 and DROP --tensor-split. See POD_RUNBOOK.md.",
|
| 11 |
+
|
| 12 |
+
"models": {
|
| 13 |
+
"A": {
|
| 14 |
+
"name": "Iv_winner",
|
| 15 |
+
"hf_repo": null,
|
| 16 |
+
"_source": "Iv3 (Gv3-rebuilt + Equinox, thinkfisher 0.3/0.8) — scp'd from aibox, NOT pulled from HF.",
|
| 17 |
+
"safetensors_dir": "/workspace/jv2/g4_31b_recipe_Iv3_rebuilt",
|
| 18 |
+
"gguf": "/workspace/style_mask_j1/gguf/Iv_winner.Q4_K_M.gguf"
|
| 19 |
+
},
|
| 20 |
+
"B": {
|
| 21 |
+
"name": "glimmer_on_Iv",
|
| 22 |
+
"hf_repo": null,
|
| 23 |
+
"_source": "= Jv1. Fold glimmer-rp v0.1 LoRA (/workspace/jv2/gemma4-31b-glimmer-rp-adapter) onto A (the Iv-winner) on the pod via fold_jv1.py -> this dir.",
|
| 24 |
+
"safetensors_dir": "/workspace/jv2/Jv1",
|
| 25 |
+
"gguf": "/workspace/style_mask_j1/gguf/glimmer_on_Iv.Q4_K_M.gguf"
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"it_base": null,
|
| 29 |
+
"instruct_fisher": null,
|
| 30 |
+
"instruct_layer_importance": null,
|
| 31 |
+
"_stage04_inputs_note": "it_base=google/gemma-4-31B-it, instruct_fisher=g4_31b_it_fisher/g4-31b-it/fisher.pt (55GB local), instruct_layer_importance=...layer_importance.json — ALL aibox-local, used only in 04_merge_style.py. Never copied to the pod.",
|
| 32 |
+
|
| 33 |
+
"eso5_prompts": "/workspace/style_mask_j1/prompts_used.jsonl",
|
| 34 |
+
"n_prompts": 40,
|
| 35 |
+
|
| 36 |
+
"generate": {
|
| 37 |
+
"port": 8091,
|
| 38 |
+
"ctx_size": 32768,
|
| 39 |
+
"parallel": 8,
|
| 40 |
+
"max_tokens": 1024,
|
| 41 |
+
"temperature": 0.0,
|
| 42 |
+
"workers": 8,
|
| 43 |
+
"use_bf16_transformers": false
|
| 44 |
+
},
|
| 45 |
+
|
| 46 |
+
"fisher": {
|
| 47 |
+
"backend": "pod_local",
|
| 48 |
+
"_backend_note": "Run 02_extract_style_fisher.py directly on the pod GPU. On 96GB a single card likely fits without --cpu-offload; start with --gpu-memory-gib 90 and add --cpu-offload only if OOM. Verify on pod.",
|
| 49 |
+
"max_length": 1024,
|
| 50 |
+
"layers_per_batch": 8,
|
| 51 |
+
"fisher_dtype": "bf16",
|
| 52 |
+
"gpu_memory_gib": "90",
|
| 53 |
+
"cpu_offload": false,
|
| 54 |
+
"enable_thinking": false,
|
| 55 |
+
"_enable_thinking_note": "Eso-5/style is thinking-OFF; extract style-Fisher thinking-off to match how the merge is evaluated."
|
| 56 |
+
},
|
| 57 |
+
|
| 58 |
+
"ratio": {
|
| 59 |
+
"epsilon": 1e-8,
|
| 60 |
+
"combined_floor_quantile": 0.5
|
| 61 |
+
},
|
| 62 |
+
|
| 63 |
+
"merge": {
|
| 64 |
+
"mode": "v2_two_model_style",
|
| 65 |
+
"density": null,
|
| 66 |
+
"scale": null,
|
| 67 |
+
"_note": "STAGE 04 — AIBOX ONLY. Not run on the pod. Fill density/scale from the #65 sweep winner before stage 04 runs on aibox."
|
| 68 |
+
},
|
| 69 |
+
|
| 70 |
+
"upload": {
|
| 71 |
+
"ratio_repo": "Esobold/g4_31b_style_ratio_j1",
|
| 72 |
+
"ratio_repo_private": true,
|
| 73 |
+
"files": ["ratio/style_ratio.pt", "ratio/ratio_layer_summary.json"],
|
| 74 |
+
"_note": "After stage 03, upload style_ratio.pt (~35GB) to the pre-created private Esobold repo, then pull HF->aibox for stage 04."
|
| 75 |
+
},
|
| 76 |
+
|
| 77 |
+
"disk": {
|
| 78 |
+
"min_free_gb_before_download": 400,
|
| 79 |
+
"_footprint": "Gv3 ~63GB + Equinox ~62GB safetensors (~125) + per-model transient bf16 GGUF ~62GB during convert (deleted after) + 4 style-Fishers ~35GB each (~140) + ratio ~35GB. df preflight before download AND before stage 02.",
|
| 80 |
+
"_twisted_2026_05_29": "Sanity-check disk so it doesn't run out partway through."
|
| 81 |
+
},
|
| 82 |
+
|
| 83 |
+
"bin": {
|
| 84 |
+
"_note": "SET ON POD — point at the pod's llama.cpp build + venv python. Placeholders below assume a /workspace build.",
|
| 85 |
+
"python": "python",
|
| 86 |
+
"llama_server": "/workspace/llama.cpp/build/bin/llama-server",
|
| 87 |
+
"llama_quantize": "/workspace/llama.cpp/build/bin/llama-quantize",
|
| 88 |
+
"llama_convert": "/workspace/llama.cpp/convert_hf_to_gguf.py"
|
| 89 |
+
}
|
| 90 |
+
}
|