#!/bin/sh set -eu MODEL_SPEC="${MODEL_SPEC:-unsloth/gemma-4-E2B-it-GGUF:Q4_0}" HOST="${HOST:-0.0.0.0}" PORT="${PORT:-7860}" CTX_SIZE="${CTX_SIZE:-131072}" THREADS="${THREADS:-2}" PARALLEL="${PARALLEL:-1}" REASONING_MODE="${REASONING_MODE:-off}" CACHE_TYPE_K="${CACHE_TYPE_K:-q4_0}" CACHE_TYPE_V="${CACHE_TYPE_V:-q4_0}" exec /app/llama-server \ -hf "$MODEL_SPEC" \ --host "$HOST" \ --port "$PORT" \ --ctx-size "$CTX_SIZE" \ --threads "$THREADS" \ --parallel "$PARALLEL" \ --cache-type-k "$CACHE_TYPE_K" \ --cache-type-v "$CACHE_TYPE_V" \ --reasoning "$REASONING_MODE"