walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/bin/bash
#
# Stack 2.9 Turnkey Deployment Script
# One-command deployment for Local, RunPod, and VastAI platforms
#
# Usage:
# ./deploy.sh [platform] [options]
#
# Platforms:
# local - Deploy locally with docker-compose
# runpod - Deploy to RunPod (requires runpodctl)
# vastai - Deploy to Vast.ai (requires vastai)
# kubernetes - Deploy to Kubernetes cluster
#
# Examples:
# ./deploy.sh local --model TheBloke/Llama-2-7B-Chat-AWQ
# ./deploy.sh runpod --gpu A100-40GB
# ./deploy.sh kubernetes --namespace inference
#
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
cyan='\033[0;36m'
NC='\033[0m' # No Color
# Default values
PLATFORM="${1:-local}"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
COMPOSE_FILE="${SCRIPT_DIR}/docker-compose.yaml"
IMAGE_NAME="stack-2.9-server"
VERSION="2.9.0"
BUILD_ARGS=""
MODEL_ID=""
HF_TOKEN=""
# Print banner
echo -e "${BLUE}"
echo "╔════════════════════════════════════════════════════╗"
echo "║ Stack 2.9 Deployment Tool v${VERSION} ║"
echo "║ Turnkey LLM Inference Server Deployment ║"
echo "╚════════════════════════════════════════════════════╝"
echo -e "${NC}"
# Parse arguments
parse_args() {
shift
while [[ $# -gt 0 ]]; do
case $1 in
--model)
MODEL_ID="$2"
shift 2
;;
--token)
HF_TOKEN="$2"
shift 2
;;
--gpu)
GPU_TYPE="$2"
shift 2
;;
--namespace)
K8S_NAMESPACE="$2"
shift 2
;;
--help)
show_help
exit 0
;;
*)
echo -e "${RED}Unknown option: $1${NC}"
show_help
exit 1
;;
esac
done
}
show_help() {
echo "Usage: $0 [platform] [options]"
echo ""
echo "Platforms:"
echo " local Deploy with docker-compose (default)"
echo " runpod Deploy to RunPod.io"
echo " vastai Deploy to Vast.ai"
echo " kubernetes Deploy to Kubernetes"
echo ""
echo "Options:"
echo " --model ID Hugging Face model ID (default: TheBloke/Llama-2-7B-Chat-AWQ)"
echo " --token TOKEN Hugging Face token for gated models"
echo " --gpu TYPE GPU type for cloud deployments"
echo " --namespace NS Kubernetes namespace (default: default)"
echo " --help Show this help message"
echo ""
echo "Examples:"
echo " $0 local --model mistralai/Mistral-7B-Instruct-v0.2"
echo " $0 runpod --gpu A100-40GB"
echo " $0 kubernetes --namespace inference"
}
# Platform detection
detect_platform() {
if command -v nvidia-smi &> /dev/null; then
echo -e "${GREEN}✓ NVIDIA GPU detected${NC}"
HAS_GPU=true
else
echo -e "${YELLOW}⚠ No NVIDIA GPU detected - CPU-only mode${NC}"
HAS_GPU=false
fi
if command -v docker &> /dev/null && command -v docker-compose &> /dev/null; then
echo -e "${GREEN}✓ Docker & Docker Compose available${NC}"
HAS_DOCKER=true
else
echo -e "${RED}✗ Docker not available${NC}"
HAS_DOCKER=false
fi
}
# Build Docker image
build_image() {
echo -e "\n${BLUE}Building Docker image...${NC}"
if [ -n "$MODEL_ID" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg MODEL_ID=$MODEL_ID"
fi
docker build \
--build-arg PYTHON_VERSION=3.10 \
--build-arg VLLM_VERSION=0.6.3 \
--build-arg CUDA_VERSION=12.1.0 \
-t "${IMAGE_NAME}:${VERSION}" \
-t "${IMAGE_NAME}:latest" \
"$SCRIPT_DIR"
echo -e "${GREEN}✓ Docker image built successfully${NC}"
}
# Deploy locally with docker-compose
deploy_local() {
echo -e "\n${BLUE}Deploying locally with Docker Compose...${NC}"
if [ "$HAS_DOCKER" = false ]; then
echo -e "${RED}Error: Docker is required for local deployment${NC}"
exit 1
fi
# Build image
build_image
# Create .env file
ENV_FILE="${SCRIPT_DIR}/.env"
cat > "$ENV_FILE" << EOF
# Stack 2.9 Local Deployment Configuration
MODEL_ID=${MODEL_ID:-TheBloke/Llama-2-7B-Chat-AWQ}
HUGGING_FACE_TOKEN=${HF_TOKEN}
QUANTIZATION=awq
TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9}
MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096}
MAX_NUM_SEQS=${MAX_NUM_SEQS:-64}
MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-4096}
HOST=0.0.0.0
PORT=8000
EOF
echo -e "${YELLOW}Configuration saved to $ENV_FILE${NC}"
# Start services
echo -e "\n${BLUE}Starting Stack 2.9 service...${NC}"
docker-compose -f "$COMPOSE_FILE" up -d
echo -e "\n${GREEN}✓ Stack 2.9 is starting...${NC}"
echo -e " API Endpoint: ${BLUE}http://localhost:8000${NC}"
echo -e " Health Check: ${BLUE}http://localhost:8000/health${NC}"
echo -e " API Docs: ${BLUE}http://localhost:8000/docs${NC}"
echo -e "\n${YELLOW}View logs: docker-compose -f '$COMPOSE_FILE' logs -f${NC}"
echo -e "${YELLOW}Stop service: docker-compose -f '$COMPOSE_FILE' down${NC}"
}
# Deploy to RunPod
deploy_runpod() {
echo -e "\n${BLUE}Deploying to RunPod...${NC}"
if ! command -v runpodctl &> /dev/null; then
echo -e "${RED}Error: runpodctl not found${NC}"
echo "Install with: npm install -g runpodctl"
exit 1
fi
# Build and push image
echo -e "${YELLOW}Building and pushing Docker image to registry...${NC}"
# User needs to set their registry
read -p "Enter your Docker registry (e.g., docker.io/username): " REGISTRY
if [ -z "$REGISTRY" ]; then
echo -e "${RED}Registry is required for RunPod deployment${NC}"
exit 1
fi
build_image
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}"
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest"
docker push "${REGISTRY}/${IMAGE_NAME}:latest"
docker push "${REGISTRY}/${IMAGE_NAME}:${VERSION}"
# Update runpod-template.json with registry
sed -i.bak "s|your-registry/stack-2.9:latest|${REGISTRY}/${IMAGE_NAME}:latest|g" \
"${SCRIPT_DIR}/runpod-template.json"
rm -f "${SCRIPT_DIR}/runpod-template.json.bak"
# Create template on RunPod
echo -e "${YELLOW}Creating RunPod template...${NC}"
runpodctl create template \
--template-file "${SCRIPT_DIR}/runpod-template.json" \
--name "stack-2.9-${VERSION}"
echo -e "\n${GREEN}✓ RunPod template created!${NC}"
echo -e "${YELLOW}Next steps:${NC}"
echo " 1. Go to RunPod.io and deploy using template 'stack-2.9-${VERSION}'"
echo " 2. Set your desired GPU type"
echo " 3. Configure MODEL_ID and HUGGING_FACE_TOKEN environment variables"
echo " 4. The server will start automatically on port 8000"
}
# Deploy to VastAI
deploy_vastai() {
echo -e "\n${BLUE}Deploying to Vast.ai...${NC}"
if ! command -v vastai &> /dev/null; then
echo -e "${RED}Error: vastai CLI not found${NC}"
echo "Install from: https://vast.ai/docs/cli"
exit 1
fi
# Build and push image
read -p "Enter your Docker registry (e.g., docker.io/username): " REGISTRY
if [ -z "$REGISTRY" ]; then
echo -e "${RED}Registry is required for VastAI deployment${NC}"
exit 1
fi
build_image
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}"
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest"
docker push "${REGISTRY}/${IMAGE_NAME}:latest"
docker push "${REGISTRY}/${IMAGE_NAME}:${VERSION}"
# Update vastai-template.json
sed -i.bak "s|your-registry/stack-2.9:latest|${REGISTRY}/${IMAGE_NAME}:latest|g" \
"${SCRIPT_DIR}/vastai-template.json"
rm -f "${SCRIPT_DIR}/vastai-template.json.bak"
echo -e "${GREEN}✓ VastAI template ready!${NC}"
echo -e "${YELLOW}Deploy with:${NC}"
echo " vastai create instance --template-file ${SCRIPT_DIR}/vastai-template.json"
echo ""
echo "Or manually:"
echo " 1. Select GPU: RTX 4090 24GB or higher"
echo " 2. Set max bid to ~\$0.50/hour"
echo " 3. Upload template and launch"
}
# Deploy to Kubernetes
deploy_kubernetes() {
echo -e "\n${BLUE}Deploying to Kubernetes...${NC}"
K8S_NAMESPACE="${K8S_NAMESPACE:-stack-2.9}"
# Check kubectl
if ! command -v kubectl &> /dev/null; then
echo -e "${RED}Error: kubectl not found${NC}"
exit 1
fi
# Build and push image (requires image builder)
read -p "Enter your Docker registry: " REGISTRY
if [ -z "$REGISTRY" ]; then
echo -e "${YELLOW}Using local image - ensure your K8s cluster can access it${NC}"
else
build_image
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}"
docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest"
docker push "${REGISTRY}/${IMAGE_NAME}:latest"
IMAGE="${REGISTRY}/${IMAGE_NAME}:latest"
fi
IMAGE="${IMAGE:-${IMAGE_NAME}:latest}"
# Apply manifests
echo -e "${YELLOW}Applying Kubernetes manifests...${NC}"
# Create namespace if needed
kubectl create namespace "$K8S_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f -
# Update image in deployment
sed "s|your-registry/stack-2.9:latest|${IMAGE}|g" \
"${SCRIPT_DIR}/kubernetes/deployment.yaml" | \
kubectl apply -n "$K8S_NAMESPACE" -f -
# Apply PVC
kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/pvc.yaml"
# Apply service
kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/service.yaml"
# Apply HPA
kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/hpa.yaml"
echo -e "\n${GREEN}✓ Kubernetes deployment complete!${NC}"
echo -e " Namespace: ${K8S_NAMESPACE}"
echo -e " Checking deployment status..."
kubectl wait --for=condition=available --timeout=300s deployment/stack-2.9 -n "$K8S_NAMESPACE"
# Get service URL
SERVICE_TYPE=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.spec.type}')
if [ "$SERVICE_TYPE" = "LoadBalancer" ]; then
EXTERNAL_IP=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
echo -e " API Endpoint: ${BLUE}http://${EXTERNAL_IP}:8000${NC}"
else
NODE_PORT=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.spec.ports[0].nodePort}')
NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}')
echo -e " API Endpoint: ${BLUE}http://${NODE_IP}:${NODE_PORT}${NC}"
fi
}
# Health check after deployment
health_check() {
local url="http://localhost:${PORT:-8000}/health"
echo -e "\n${YELLOW}Waiting for server to become healthy...${NC}"
max_attempts=30
attempt=1
while [ $attempt -le $max_attempts ]; do
if curl -sf "$url" &> /dev/null; then
echo -e "${GREEN}✓ Server is healthy!${NC}"
echo -e "Health check response:"
curl -s "$url" | python3 -m json.tool 2>/dev/null || curl -s "$url"
return 0
fi
echo -n "."
sleep 2
((attempt++))
done
echo -e "\n${RED}✗ Health check failed after $((max_attempts * 2)) seconds${NC}"
echo "Check logs with: docker-compose -f '$COMPOSE_FILE' logs"
return 1
}
# Main execution
main() {
case "$PLATFORM" in
local|"")
detect_platform
deploy_local
health_check
;;
runpod)
detect_platform
deploy_runpod
;;
vastai)
detect_platform
deploy_vastai
;;
kubernetes|k8s)
deploy_kubernetes
;;
*)
echo -e "${RED}Unknown platform: $PLATFORM${NC}"
show_help
exit 1
;;
esac
}
# Run
parse_args "$@"
main