File size: 2,387 Bytes
7b4f5dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/bin/bash
# =============================================================================
# setup_vllm.sh β€” One-command vLLM setup on AMD MI300X for CodeSentry
# =============================================================================
set -euo pipefail

echo "============================================================"
echo "  CodeSentry β€” vLLM + Qwen2.5-Coder-32B Setup (AMD MI300X)"
echo "============================================================"

# ── 1. Install vLLM with ROCm backend ─────────────────────────
echo "[1/4] Installing vLLM with ROCm 6.2 support..."
pip install vllm --extra-index-url https://download.pytorch.org/whl/rocm6.2

# ── 2. Install project dependencies ───────────────────────────
echo "[2/4] Installing CodeSentry requirements..."
pip install -r requirements.txt

# ── 3. Start vLLM server ──────────────────────────────────────
echo "[3/4] Starting vLLM server with Qwen2.5-Coder-32B-Instruct..."
echo "  Model: Qwen/Qwen2.5-Coder-32B-Instruct"
echo "  Port: 8080"
echo "  GPU utilisation: 85%"
echo "  Max context: 32768 tokens"

vllm serve Qwen/Qwen2.5-Coder-32B-Instruct \
  --port 8080 \
  --tensor-parallel-size 1 \
  --gpu-memory-utilization 0.85 \
  --max-model-len 32768 \
  --enable-chunked-prefill \
  --trust-remote-code \
  &

VLLM_PID=$!
echo "  vLLM PID: $VLLM_PID"

# ── 4. Wait for vLLM to be ready ──────────────────────────────
echo "[4/4] Waiting for vLLM to be ready..."
MAX_WAIT=300  # 5 minutes max
ELAPSED=0
until curl -sf http://localhost:8080/health > /dev/null 2>&1; do
  if [ "$ELAPSED" -ge "$MAX_WAIT" ]; then
    echo "ERROR: vLLM did not become ready within ${MAX_WAIT}s"
    kill "$VLLM_PID" 2>/dev/null || true
    exit 1
  fi
  echo "  Waiting... (${ELAPSED}s elapsed)"
  sleep 5
  ELAPSED=$((ELAPSED + 5))
done

echo ""
echo "============================================================"
echo "  vLLM is READY at http://localhost:8080"
echo "  Starting CodeSentry API at http://localhost:8000 ..."
echo "============================================================"
echo ""

# Start CodeSentry
uvicorn main:app --host 0.0.0.0 --port 8000 --reload