File size: 4,788 Bytes
fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 fcb2b04 6379283 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | #!/bin/bash
# Deploy Stack 2.9 to RunPod
# Requires: runpodctl installed and configured
set -euo pipefail
echo "π Deploying Stack 2.9 to RunPod"
echo "================================"
echo ""
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Configuration (can be overridden by environment variables)
IMAGE="${RUNPOD_IMAGE:-docker.io/library/pytorch:2.1.0-cuda11.8-cudnn8-runtime}"
TEMPLATE_NAME="${RUNPOD_TEMPLATE_NAME:-stack-2.9-template}"
CONTAINER_NAME="${RUNPOD_CONTAINER_NAME:-stack-2.9-server}"
GPU_TYPE="${RUNPOD_GPU_TYPE:-NVIDIA RTX A6000}"
DISK_SIZE="${RUNPOD_DISK_SIZE:-50}"
MODEL_PATH="${MODEL_PATH:-/workspace/models/stack-2.9-awq}"
VLLM_PORT="${VLLM_PORT:-8000}"
# Check prerequisites
command -v runpodctl >/dev/null 2>&1 || {
echo -e "${RED}β runpodctl not found. Install from: https://github.com/runpod/runpodctl${NC}"
exit 1
}
echo "π Configuration:"
echo " GPU: $GPU_TYPE"
echo " Disk: ${DISK_SIZE}GB"
echo " Image: $IMAGE"
echo " Model path: $MODEL_PATH"
echo ""
# Step 1: Create template (one-time, may already exist)
echo "π¦ Creating/verifying RunPod template..."
if ! runpodctl get template "$TEMPLATE_NAME" &>/dev/null; then
runpodctl create template \
--name "$TEMPLATE_NAME" \
--image "$IMAGE" \
--docker-run-args "--gpus all -e MODEL_PATH=$MODEL_PATH -e VLLM_PORT=$VLLM_PORT -p $VLLM_PORT:8000" \
--volume "/workspace/models:$MODEL_PATH:ro" \
--volume "/workspace/output:/workspace/output" \
--container-disk-size "${DISK_SIZE}GB"
echo -e "${GREEN}β
Template created${NC}"
else
echo -e "${YELLOW}β οΈ Template already exists, using existing${NC}"
fi
# Step 2: Deploy pod
echo "βοΈ Deploying pod..."
POD_ID=$(runpodctl create pod \
--name "$CONTAINER_NAME" \
--gpu-type "$GPU_TYPE" \
--disk-size "${DISK_SIZE}GB" \
--template "$TEMPLATE_NAME" \
--env "MODEL_PATH=$MODEL_PATH" \
--env "VLLM_PORT=$VLLM_PORT" \
--port "$VLLM_PORT" \
--query id)
echo -e "${GREEN}β
Pod created: $POD_ID${NC}"
echo " Waiting for startup (this may take 2-3 minutes for first-time model load)..."
sleep 60
# Step 3: Copy deployment files
echo "π€ Copying code to pod..."
# Create deployment package
TEMP_PACKAGE="/tmp/stack-2.9-deployment-$(date +%s).tar.gz"
tar czf "$TEMP_PACKAGE" \
stack-2.9-deploy/ \
requirements.txt \
2>/dev/null || {
echo -e "${RED}β Failed to create deployment package${NC}"
exit 1
}
# Copy to pod
if ! runpodctl cp "$TEMP_PACKAGE" "$POD_ID:/workspace/" ; then
echo -e "${RED}β Failed to copy package to pod${NC}"
exit 1
fi
# Extract and setup
echo "π§ Setting up on pod..."
runpodctl ssh "$POD_ID" bash -c "'
set -euo pipefail
cd /workspace
tar xzf stack-2.9-*.tar.gz
# Install system dependencies
apt-get update && apt-get install -y --no-install-recommends \
python3-pip \
python3-venv \
curl \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip and install requirements
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install -r requirements.txt
# Check if model exists
if [ ! -d \"$MODEL_PATH\" ] || [ -z \"$(ls -A $MODEL_PATH 2>/dev/null)\" ]; then
echo \"β οΈ Model not found at $MODEL_PATH\"
echo \" You have two options:\"
echo \" 1. Upload your model to: $MODEL_PATH\"\n echo \" 2. Set MODEL_PATH to a HuggingFace model name and it will be downloaded\"\n echo \" Example: export MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct\"\n echo \" Note: Downloading large models may take hours and exceed pod disk space.\"\n echo \" Recommendation: Upload AWQ-quantized model to S3 and download it.\"\nfi
echo \"Starting vLLM server...\"
cd /workspace/stack-2.9-deploy
nohup python vllm_server.py > vllm.log 2>&1 &
echo \$! > /tmp/vllm.pid
'" || {
echo -e "${RED}β Failed to setup pod${NC}"
exit 1
}
# Step 4: Wait and check status
echo "β³ Waiting for vLLM server to start..."
sleep 30
# Get pod status
echo ""
echo "π Pod status:"
runpodctl get pod "$POD_ID"
# Get public URL
PUBLIC_URL=$(runpodctl get pod "$POD_ID" --query "url" --output text 2>/dev/null || echo "pending")
echo ""
echo -e "${GREEN}β
Deployment initiated!${NC}"
echo " Pod ID: $POD_ID"
echo " vLLM API: http://$PUBLIC_URL:8000"
echo " Health: http://$PUBLIC_URL:8000/health"
echo ""
echo "π To monitor:"
echo " runpodctl logs $POD_ID # View logs"
echo " runpodctl ssh $POD_ID # SSH into pod"
echo " runpodctl stop pod $POD_ID # Stop (saves disk)"
echo " runpodctl delete pod $POD_ID # Delete (you lose data)"
echo ""
echo -e "${YELLOW}β οΈ First server startup may take 5-15 minutes as the model loads${NC}"
echo -e "${YELLOW}β οΈ Monitor logs: runpodctl logs $POD_ID${NC}"
|