File size: 4,788 Bytes
fcb2b04
 
 
 
6379283
fcb2b04
 
 
 
 
6379283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcb2b04
6379283
 
fcb2b04
6379283
fcb2b04
 
 
 
 
6379283
fcb2b04
 
6379283
 
 
 
 
 
 
 
 
 
 
 
 
 
fcb2b04
6379283
fcb2b04
 
 
 
6379283
fcb2b04
6379283
 
 
fcb2b04
 
6379283
 
fcb2b04
 
6379283
 
 
 
 
fcb2b04
 
6379283
 
 
 
fcb2b04
6379283
 
 
 
 
fcb2b04
6379283
fcb2b04
6379283
 
fcb2b04
6379283
fcb2b04
6379283
 
 
 
 
 
fcb2b04
6379283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fcb2b04
6379283
 
 
fcb2b04
6379283
fcb2b04
6379283
 
 
 
 
 
 
 
fcb2b04
 
 
 
6379283
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/bin/bash
# Deploy Stack 2.9 to RunPod
# Requires: runpodctl installed and configured

set -euo pipefail

echo "πŸš€ Deploying Stack 2.9 to RunPod"
echo "================================"
echo ""

# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'

# Configuration (can be overridden by environment variables)
IMAGE="${RUNPOD_IMAGE:-docker.io/library/pytorch:2.1.0-cuda11.8-cudnn8-runtime}"
TEMPLATE_NAME="${RUNPOD_TEMPLATE_NAME:-stack-2.9-template}"
CONTAINER_NAME="${RUNPOD_CONTAINER_NAME:-stack-2.9-server}"
GPU_TYPE="${RUNPOD_GPU_TYPE:-NVIDIA RTX A6000}"
DISK_SIZE="${RUNPOD_DISK_SIZE:-50}"
MODEL_PATH="${MODEL_PATH:-/workspace/models/stack-2.9-awq}"
VLLM_PORT="${VLLM_PORT:-8000}"

# Check prerequisites
command -v runpodctl >/dev/null 2>&1 || {
    echo -e "${RED}❌ runpodctl not found. Install from: https://github.com/runpod/runpodctl${NC}"
    exit 1
}

echo "πŸ“‹ Configuration:"
echo "  GPU: $GPU_TYPE"
echo "  Disk: ${DISK_SIZE}GB"
echo "  Image: $IMAGE"
echo "  Model path: $MODEL_PATH"
echo ""

# Step 1: Create template (one-time, may already exist)
echo "πŸ“¦ Creating/verifying RunPod template..."
if ! runpodctl get template "$TEMPLATE_NAME" &>/dev/null; then
    runpodctl create template \
      --name "$TEMPLATE_NAME" \
      --image "$IMAGE" \
      --docker-run-args "--gpus all -e MODEL_PATH=$MODEL_PATH -e VLLM_PORT=$VLLM_PORT -p $VLLM_PORT:8000" \
      --volume "/workspace/models:$MODEL_PATH:ro" \
      --volume "/workspace/output:/workspace/output" \
      --container-disk-size "${DISK_SIZE}GB"
    echo -e "${GREEN}βœ… Template created${NC}"
else
    echo -e "${YELLOW}⚠️  Template already exists, using existing${NC}"
fi

# Step 2: Deploy pod
echo "☁️  Deploying pod..."
POD_ID=$(runpodctl create pod \
  --name "$CONTAINER_NAME" \
  --gpu-type "$GPU_TYPE" \
  --disk-size "${DISK_SIZE}GB" \
  --template "$TEMPLATE_NAME" \
  --env "MODEL_PATH=$MODEL_PATH" \
  --env "VLLM_PORT=$VLLM_PORT" \
  --port "$VLLM_PORT" \
  --query id)

echo -e "${GREEN}βœ… Pod created: $POD_ID${NC}"
echo "  Waiting for startup (this may take 2-3 minutes for first-time model load)..."
sleep 60

# Step 3: Copy deployment files
echo "πŸ“€ Copying code to pod..."
# Create deployment package
TEMP_PACKAGE="/tmp/stack-2.9-deployment-$(date +%s).tar.gz"
tar czf "$TEMP_PACKAGE" \
  stack-2.9-deploy/ \
  requirements.txt \
  2>/dev/null || {
    echo -e "${RED}❌ Failed to create deployment package${NC}"
    exit 1
}

# Copy to pod
if ! runpodctl cp "$TEMP_PACKAGE" "$POD_ID:/workspace/" ; then
    echo -e "${RED}❌ Failed to copy package to pod${NC}"
    exit 1
fi

# Extract and setup
echo "πŸ”§ Setting up on pod..."
runpodctl ssh "$POD_ID" bash -c "'
set -euo pipefail
cd /workspace
tar xzf stack-2.9-*.tar.gz

# Install system dependencies
apt-get update && apt-get install -y --no-install-recommends \
    python3-pip \
    python3-venv \
    curl \
    && rm -rf /var/lib/apt/lists/*

# Upgrade pip and install requirements
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install -r requirements.txt

# Check if model exists
if [ ! -d \"$MODEL_PATH\" ] || [ -z \"$(ls -A $MODEL_PATH 2>/dev/null)\" ]; then
    echo \"⚠️  Model not found at $MODEL_PATH\"
    echo \"   You have two options:\"
    echo \"   1. Upload your model to: $MODEL_PATH\"\n    echo \"   2. Set MODEL_PATH to a HuggingFace model name and it will be downloaded\"\n    echo \"   Example: export MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct\"\n    echo \"   Note: Downloading large models may take hours and exceed pod disk space.\"\n    echo \"   Recommendation: Upload AWQ-quantized model to S3 and download it.\"\nfi

echo \"Starting vLLM server...\"
cd /workspace/stack-2.9-deploy
nohup python vllm_server.py > vllm.log 2>&1 &
echo \$! > /tmp/vllm.pid
'" || {
    echo -e "${RED}❌ Failed to setup pod${NC}"
    exit 1
}

# Step 4: Wait and check status
echo "⏳ Waiting for vLLM server to start..."
sleep 30

# Get pod status
echo ""
echo "πŸ“Š Pod status:"
runpodctl get pod "$POD_ID"

# Get public URL
PUBLIC_URL=$(runpodctl get pod "$POD_ID" --query "url" --output text 2>/dev/null || echo "pending")

echo ""
echo -e "${GREEN}βœ… Deployment initiated!${NC}"
echo "  Pod ID: $POD_ID"
echo "  vLLM API: http://$PUBLIC_URL:8000"
echo "  Health: http://$PUBLIC_URL:8000/health"
echo ""
echo "πŸ“‹ To monitor:"
echo "  runpodctl logs $POD_ID            # View logs"
echo "  runpodctl ssh $POD_ID            # SSH into pod"
echo "  runpodctl stop pod $POD_ID       # Stop (saves disk)"
echo "  runpodctl delete pod $POD_ID     # Delete (you lose data)"
echo ""
echo -e "${YELLOW}⚠️  First server startup may take 5-15 minutes as the model loads${NC}"
echo -e "${YELLOW}⚠️  Monitor logs: runpodctl logs $POD_ID${NC}"