| set -eu | |
| MODEL_SPEC="${MODEL_SPEC:-unsloth/gemma-4-E2B-it-GGUF:Q4_0}" | |
| HOST="${HOST:-0.0.0.0}" | |
| PORT="${PORT:-7860}" | |
| CTX_SIZE="${CTX_SIZE:-4096}" | |
| THREADS="${THREADS:-2}" | |
| PARALLEL="${PARALLEL:-1}" | |
| REASONING_MODE="${REASONING_MODE:-off}" | |
| exec llama-server \ | |
| -hf "$MODEL_SPEC" \ | |
| --host "$HOST" \ | |
| --port "$PORT" \ | |
| --ctx-size "$CTX_SIZE" \ | |
| --threads "$THREADS" \ | |
| --parallel "$PARALLEL" \ | |
| --reasoning "$REASONING_MODE" | |