| set -eu | |
| MODEL_SPEC="${MODEL_SPEC:-unsloth/gemma-4-E2B-it-GGUF:Q4_0}" | |
| HOST="${HOST:-0.0.0.0}" | |
| PORT="${PORT:-7860}" | |
| CTX_SIZE="${CTX_SIZE:-131072}" | |
| THREADS="${THREADS:-2}" | |
| PARALLEL="${PARALLEL:-1}" | |
| REASONING_MODE="${REASONING_MODE:-off}" | |
| CACHE_TYPE_K="${CACHE_TYPE_K:-q4_0}" | |
| CACHE_TYPE_V="${CACHE_TYPE_V:-q4_0}" | |
| exec /app/llama-server \ | |
| -hf "$MODEL_SPEC" \ | |
| --host "$HOST" \ | |
| --port "$PORT" \ | |
| --ctx-size "$CTX_SIZE" \ | |
| --threads "$THREADS" \ | |
| --parallel "$PARALLEL" \ | |
| --cache-type-k "$CACHE_TYPE_K" \ | |
| --cache-type-v "$CACHE_TYPE_V" \ | |
| --reasoning "$REASONING_MODE" | |