Stack-2-9-finetuned / stack /deploy /runpod_deploy.sh
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/bin/bash
# Deploy Stack 2.9 to RunPod
# Requires: runpodctl installed and configured
set -euo pipefail
echo "πŸš€ Deploying Stack 2.9 to RunPod"
echo "================================"
echo ""
# Color codes
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m'
# Configuration (can be overridden by environment variables)
IMAGE="${RUNPOD_IMAGE:-docker.io/library/pytorch:2.1.0-cuda11.8-cudnn8-runtime}"
TEMPLATE_NAME="${RUNPOD_TEMPLATE_NAME:-stack-2.9-template}"
CONTAINER_NAME="${RUNPOD_CONTAINER_NAME:-stack-2.9-server}"
GPU_TYPE="${RUNPOD_GPU_TYPE:-NVIDIA RTX A6000}"
DISK_SIZE="${RUNPOD_DISK_SIZE:-50}"
MODEL_PATH="${MODEL_PATH:-/workspace/models/stack-2.9-awq}"
VLLM_PORT="${VLLM_PORT:-8000}"
# Check prerequisites
command -v runpodctl >/dev/null 2>&1 || {
echo -e "${RED}❌ runpodctl not found. Install from: https://github.com/runpod/runpodctl${NC}"
exit 1
}
echo "πŸ“‹ Configuration:"
echo " GPU: $GPU_TYPE"
echo " Disk: ${DISK_SIZE}GB"
echo " Image: $IMAGE"
echo " Model path: $MODEL_PATH"
echo ""
# Step 1: Create template (one-time, may already exist)
echo "πŸ“¦ Creating/verifying RunPod template..."
if ! runpodctl get template "$TEMPLATE_NAME" &>/dev/null; then
runpodctl create template \
--name "$TEMPLATE_NAME" \
--image "$IMAGE" \
--docker-run-args "--gpus all -e MODEL_PATH=$MODEL_PATH -e VLLM_PORT=$VLLM_PORT -p $VLLM_PORT:8000" \
--volume "/workspace/models:$MODEL_PATH:ro" \
--volume "/workspace/output:/workspace/output" \
--container-disk-size "${DISK_SIZE}GB"
echo -e "${GREEN}βœ… Template created${NC}"
else
echo -e "${YELLOW}⚠️ Template already exists, using existing${NC}"
fi
# Step 2: Deploy pod
echo "☁️ Deploying pod..."
POD_ID=$(runpodctl create pod \
--name "$CONTAINER_NAME" \
--gpu-type "$GPU_TYPE" \
--disk-size "${DISK_SIZE}GB" \
--template "$TEMPLATE_NAME" \
--env "MODEL_PATH=$MODEL_PATH" \
--env "VLLM_PORT=$VLLM_PORT" \
--port "$VLLM_PORT" \
--query id)
echo -e "${GREEN}βœ… Pod created: $POD_ID${NC}"
echo " Waiting for startup (this may take 2-3 minutes for first-time model load)..."
sleep 60
# Step 3: Copy deployment files
echo "πŸ“€ Copying code to pod..."
# Create deployment package
TEMP_PACKAGE="/tmp/stack-2.9-deployment-$(date +%s).tar.gz"
tar czf "$TEMP_PACKAGE" \
stack-2.9-deploy/ \
requirements.txt \
2>/dev/null || {
echo -e "${RED}❌ Failed to create deployment package${NC}"
exit 1
}
# Copy to pod
if ! runpodctl cp "$TEMP_PACKAGE" "$POD_ID:/workspace/" ; then
echo -e "${RED}❌ Failed to copy package to pod${NC}"
exit 1
fi
# Extract and setup
echo "πŸ”§ Setting up on pod..."
runpodctl ssh "$POD_ID" bash -c "'
set -euo pipefail
cd /workspace
tar xzf stack-2.9-*.tar.gz
# Install system dependencies
apt-get update && apt-get install -y --no-install-recommends \
python3-pip \
python3-venv \
curl \
&& rm -rf /var/lib/apt/lists/*
# Upgrade pip and install requirements
python3 -m pip install --upgrade pip setuptools wheel
python3 -m pip install -r requirements.txt
# Check if model exists
if [ ! -d \"$MODEL_PATH\" ] || [ -z \"$(ls -A $MODEL_PATH 2>/dev/null)\" ]; then
echo \"⚠️ Model not found at $MODEL_PATH\"
echo \" You have two options:\"
echo \" 1. Upload your model to: $MODEL_PATH\"\n echo \" 2. Set MODEL_PATH to a HuggingFace model name and it will be downloaded\"\n echo \" Example: export MODEL_PATH=meta-llama/Llama-3.1-8B-Instruct\"\n echo \" Note: Downloading large models may take hours and exceed pod disk space.\"\n echo \" Recommendation: Upload AWQ-quantized model to S3 and download it.\"\nfi
echo \"Starting vLLM server...\"
cd /workspace/stack-2.9-deploy
nohup python vllm_server.py > vllm.log 2>&1 &
echo \$! > /tmp/vllm.pid
'" || {
echo -e "${RED}❌ Failed to setup pod${NC}"
exit 1
}
# Step 4: Wait and check status
echo "⏳ Waiting for vLLM server to start..."
sleep 30
# Get pod status
echo ""
echo "πŸ“Š Pod status:"
runpodctl get pod "$POD_ID"
# Get public URL
PUBLIC_URL=$(runpodctl get pod "$POD_ID" --query "url" --output text 2>/dev/null || echo "pending")
echo ""
echo -e "${GREEN}βœ… Deployment initiated!${NC}"
echo " Pod ID: $POD_ID"
echo " vLLM API: http://$PUBLIC_URL:8000"
echo " Health: http://$PUBLIC_URL:8000/health"
echo ""
echo "πŸ“‹ To monitor:"
echo " runpodctl logs $POD_ID # View logs"
echo " runpodctl ssh $POD_ID # SSH into pod"
echo " runpodctl stop pod $POD_ID # Stop (saves disk)"
echo " runpodctl delete pod $POD_ID # Delete (you lose data)"
echo ""
echo -e "${YELLOW}⚠️ First server startup may take 5-15 minutes as the model loads${NC}"
echo -e "${YELLOW}⚠️ Monitor logs: runpodctl logs $POD_ID${NC}"