| { |
| "_README": "POD variant of config.json for Jv2 stages 00-03 on a single Pro 6000 (96GB). Models pulled from HF; paths are pod-local (/workspace). it_base + instruct_fisher are NULL here β they belong to stage 04 (merge) which runs on AIBOX, not the pod. Finalize/verify bin paths + fisher params ON the pod (jaxx: expect quirks). Keep the local config.json as the AIBOX/stage-04 source of truth.", |
| "_MODELS_ARE_REAL_J_INPUTS": "SET 2026-05-29 07:18Z (off the old Gv3/Equinox smoke placeholders). J-line = Iv-winner β toasty's glimmer-rp v0.1. Equinox is NOT a J input β it is already INSIDE the Iv-winner (Iv3 = Gv3-rebuilt + Equinox). REAL Jv2 style ratio is between A = the LOCKED Iv-winner (Iv3_rebuilt, scp'd to /workspace/jv2) and B = glimmer-on-Iv-winner (= Jv1; fold glimmer-rp v0.1 LoRA onto A on the pod). The separate Gv3/Equinox smoke (#74) is DROPPED from this run β first real run IS the smoke (README). #74 stays a later optional follow-up.", |
| "run_label": "style_mask_j1_pod", |
| "goal": "reduce slop + respect character details (personality, physical traits, voice)", |
| "seed": 1729, |
|
|
| "single_gpu": true, |
| "cuda_devices": "0", |
| "_gpu_note": "ONE 96GB card. run_all.sh serves with CUDA_VISIBLE_DEVICES=0,1 --tensor-split 1,1 (2-GPU aibox). On the pod: CUDA_VISIBLE_DEVICES=0 and DROP --tensor-split. See POD_RUNBOOK.md.", |
|
|
| "models": { |
| "A": { |
| "name": "Iv_winner", |
| "hf_repo": null, |
| "_source": "Iv3 (Gv3-rebuilt + Equinox, thinkfisher 0.3/0.8) β scp'd from aibox, NOT pulled from HF.", |
| "safetensors_dir": "/workspace/jv2/g4_31b_recipe_Iv3_rebuilt", |
| "gguf": "/workspace/style_mask_j1/gguf/Iv_winner.Q4_K_M.gguf" |
| }, |
| "B": { |
| "name": "glimmer_on_Iv", |
| "hf_repo": null, |
| "_source": "= Jv1. Fold glimmer-rp v0.1 LoRA (/workspace/jv2/gemma4-31b-glimmer-rp-adapter) onto A (the Iv-winner) on the pod via fold_jv1.py -> this dir.", |
| "safetensors_dir": "/workspace/jv2/Jv1", |
| "gguf": "/workspace/style_mask_j1/gguf/glimmer_on_Iv.Q4_K_M.gguf" |
| } |
| }, |
| "it_base": null, |
| "instruct_fisher": null, |
| "instruct_layer_importance": null, |
| "_stage04_inputs_note": "it_base=google/gemma-4-31B-it, instruct_fisher=g4_31b_it_fisher/g4-31b-it/fisher.pt (55GB local), instruct_layer_importance=...layer_importance.json β ALL aibox-local, used only in 04_merge_style.py. Never copied to the pod.", |
|
|
| "eso5_prompts": "/workspace/style_mask_j1/prompts_used.jsonl", |
| "n_prompts": 40, |
|
|
| "generate": { |
| "port": 8091, |
| "ctx_size": 32768, |
| "parallel": 8, |
| "max_tokens": 1024, |
| "temperature": 0.0, |
| "workers": 8, |
| "use_bf16_transformers": false |
| }, |
|
|
| "fisher": { |
| "backend": "pod_local", |
| "_backend_note": "Run 02_extract_style_fisher.py directly on the pod GPU. On 96GB a single card likely fits without --cpu-offload; start with --gpu-memory-gib 90 and add --cpu-offload only if OOM. Verify on pod.", |
| "max_length": 1024, |
| "layers_per_batch": 8, |
| "fisher_dtype": "bf16", |
| "gpu_memory_gib": "90", |
| "cpu_offload": false, |
| "enable_thinking": false, |
| "_enable_thinking_note": "Eso-5/style is thinking-OFF; extract style-Fisher thinking-off to match how the merge is evaluated." |
| }, |
|
|
| "ratio": { |
| "epsilon": 1e-8, |
| "combined_floor_quantile": 0.5 |
| }, |
|
|
| "merge": { |
| "mode": "v2_two_model_style", |
| "density": null, |
| "scale": null, |
| "_note": "STAGE 04 β AIBOX ONLY. Not run on the pod. Fill density/scale from the #65 sweep winner before stage 04 runs on aibox." |
| }, |
|
|
| "upload": { |
| "ratio_repo": "Esobold/g4_31b_style_ratio_j1", |
| "ratio_repo_private": true, |
| "files": ["ratio/style_ratio.pt", "ratio/ratio_layer_summary.json"], |
| "_note": "After stage 03, upload style_ratio.pt (~35GB) to the pre-created private Esobold repo, then pull HF->aibox for stage 04." |
| }, |
|
|
| "disk": { |
| "min_free_gb_before_download": 400, |
| "_footprint": "Gv3 ~63GB + Equinox ~62GB safetensors (~125) + per-model transient bf16 GGUF ~62GB during convert (deleted after) + 4 style-Fishers ~35GB each (~140) + ratio ~35GB. df preflight before download AND before stage 02.", |
| "_twisted_2026_05_29": "Sanity-check disk so it doesn't run out partway through." |
| }, |
|
|
| "bin": { |
| "_note": "SET ON POD β point at the pod's llama.cpp build + venv python. Placeholders below assume a /workspace build.", |
| "python": "python", |
| "llama_server": "/workspace/llama.cpp/build/bin/llama-server", |
| "llama_quantize": "/workspace/llama.cpp/build/bin/llama-quantize", |
| "llama_convert": "/workspace/llama.cpp/convert_hf_to_gguf.py" |
| } |
| } |
|
|