Add runtime entrypoint for Gemma 4 API
Browse files- entrypoint.sh +19 -0
entrypoint.sh
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/sh
|
| 2 |
+
set -eu
|
| 3 |
+
|
| 4 |
+
MODEL_SPEC="${MODEL_SPEC:-unsloth/gemma-4-E2B-it-GGUF:Q4_0}"
|
| 5 |
+
HOST="${HOST:-0.0.0.0}"
|
| 6 |
+
PORT="${PORT:-7860}"
|
| 7 |
+
CTX_SIZE="${CTX_SIZE:-4096}"
|
| 8 |
+
THREADS="${THREADS:-2}"
|
| 9 |
+
PARALLEL="${PARALLEL:-1}"
|
| 10 |
+
REASONING_MODE="${REASONING_MODE:-off}"
|
| 11 |
+
|
| 12 |
+
exec llama-server \
|
| 13 |
+
-hf "$MODEL_SPEC" \
|
| 14 |
+
--host "$HOST" \
|
| 15 |
+
--port "$PORT" \
|
| 16 |
+
--ctx-size "$CTX_SIZE" \
|
| 17 |
+
--threads "$THREADS" \
|
| 18 |
+
--parallel "$PARALLEL" \
|
| 19 |
+
--reasoning "$REASONING_MODE"
|