Spaces:
Running
Running
File size: 2,387 Bytes
7b4f5dd | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #!/bin/bash
# =============================================================================
# setup_vllm.sh β One-command vLLM setup on AMD MI300X for CodeSentry
# =============================================================================
set -euo pipefail
echo "============================================================"
echo " CodeSentry β vLLM + Qwen2.5-Coder-32B Setup (AMD MI300X)"
echo "============================================================"
# ββ 1. Install vLLM with ROCm backend βββββββββββββββββββββββββ
echo "[1/4] Installing vLLM with ROCm 6.2 support..."
pip install vllm --extra-index-url https://download.pytorch.org/whl/rocm6.2
# ββ 2. Install project dependencies βββββββββββββββββββββββββββ
echo "[2/4] Installing CodeSentry requirements..."
pip install -r requirements.txt
# ββ 3. Start vLLM server ββββββββββββββββββββββββββββββββββββββ
echo "[3/4] Starting vLLM server with Qwen2.5-Coder-32B-Instruct..."
echo " Model: Qwen/Qwen2.5-Coder-32B-Instruct"
echo " Port: 8080"
echo " GPU utilisation: 85%"
echo " Max context: 32768 tokens"
vllm serve Qwen/Qwen2.5-Coder-32B-Instruct \
--port 8080 \
--tensor-parallel-size 1 \
--gpu-memory-utilization 0.85 \
--max-model-len 32768 \
--enable-chunked-prefill \
--trust-remote-code \
&
VLLM_PID=$!
echo " vLLM PID: $VLLM_PID"
# ββ 4. Wait for vLLM to be ready ββββββββββββββββββββββββββββββ
echo "[4/4] Waiting for vLLM to be ready..."
MAX_WAIT=300 # 5 minutes max
ELAPSED=0
until curl -sf http://localhost:8080/health > /dev/null 2>&1; do
if [ "$ELAPSED" -ge "$MAX_WAIT" ]; then
echo "ERROR: vLLM did not become ready within ${MAX_WAIT}s"
kill "$VLLM_PID" 2>/dev/null || true
exit 1
fi
echo " Waiting... (${ELAPSED}s elapsed)"
sleep 5
ELAPSED=$((ELAPSED + 5))
done
echo ""
echo "============================================================"
echo " vLLM is READY at http://localhost:8080"
echo " Starting CodeSentry API at http://localhost:8000 ..."
echo "============================================================"
echo ""
# Start CodeSentry
uvicorn main:app --host 0.0.0.0 --port 8000 --reload
|