| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -euo pipefail |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| BLUE='\033[0;34m' |
| cyan='\033[0;36m' |
| NC='\033[0m' |
|
|
| |
| PLATFORM="${1:-local}" |
| SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| COMPOSE_FILE="${SCRIPT_DIR}/docker-compose.yaml" |
| IMAGE_NAME="stack-2.9-server" |
| VERSION="2.9.0" |
| BUILD_ARGS="" |
| MODEL_ID="" |
| HF_TOKEN="" |
|
|
| |
| echo -e "${BLUE}" |
| echo "╔════════════════════════════════════════════════════╗" |
| echo "║ Stack 2.9 Deployment Tool v${VERSION} ║" |
| echo "║ Turnkey LLM Inference Server Deployment ║" |
| echo "╚════════════════════════════════════════════════════╝" |
| echo -e "${NC}" |
|
|
| |
| parse_args() { |
| shift |
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --model) |
| MODEL_ID="$2" |
| shift 2 |
| ;; |
| --token) |
| HF_TOKEN="$2" |
| shift 2 |
| ;; |
| --gpu) |
| GPU_TYPE="$2" |
| shift 2 |
| ;; |
| --namespace) |
| K8S_NAMESPACE="$2" |
| shift 2 |
| ;; |
| --help) |
| show_help |
| exit 0 |
| ;; |
| *) |
| echo -e "${RED}Unknown option: $1${NC}" |
| show_help |
| exit 1 |
| ;; |
| esac |
| done |
| } |
|
|
| show_help() { |
| echo "Usage: $0 [platform] [options]" |
| echo "" |
| echo "Platforms:" |
| echo " local Deploy with docker-compose (default)" |
| echo " runpod Deploy to RunPod.io" |
| echo " vastai Deploy to Vast.ai" |
| echo " kubernetes Deploy to Kubernetes" |
| echo "" |
| echo "Options:" |
| echo " --model ID Hugging Face model ID (default: TheBloke/Llama-2-7B-Chat-AWQ)" |
| echo " --token TOKEN Hugging Face token for gated models" |
| echo " --gpu TYPE GPU type for cloud deployments" |
| echo " --namespace NS Kubernetes namespace (default: default)" |
| echo " --help Show this help message" |
| echo "" |
| echo "Examples:" |
| echo " $0 local --model mistralai/Mistral-7B-Instruct-v0.2" |
| echo " $0 runpod --gpu A100-40GB" |
| echo " $0 kubernetes --namespace inference" |
| } |
|
|
| |
| detect_platform() { |
| if command -v nvidia-smi &> /dev/null; then |
| echo -e "${GREEN}✓ NVIDIA GPU detected${NC}" |
| HAS_GPU=true |
| else |
| echo -e "${YELLOW}⚠ No NVIDIA GPU detected - CPU-only mode${NC}" |
| HAS_GPU=false |
| fi |
|
|
| if command -v docker &> /dev/null && command -v docker-compose &> /dev/null; then |
| echo -e "${GREEN}✓ Docker & Docker Compose available${NC}" |
| HAS_DOCKER=true |
| else |
| echo -e "${RED}✗ Docker not available${NC}" |
| HAS_DOCKER=false |
| fi |
| } |
|
|
| |
| build_image() { |
| echo -e "\n${BLUE}Building Docker image...${NC}" |
|
|
| if [ -n "$MODEL_ID" ]; then |
| BUILD_ARGS="$BUILD_ARGS --build-arg MODEL_ID=$MODEL_ID" |
| fi |
|
|
| docker build \ |
| --build-arg PYTHON_VERSION=3.10 \ |
| --build-arg VLLM_VERSION=0.6.3 \ |
| --build-arg CUDA_VERSION=12.1.0 \ |
| -t "${IMAGE_NAME}:${VERSION}" \ |
| -t "${IMAGE_NAME}:latest" \ |
| "$SCRIPT_DIR" |
|
|
| echo -e "${GREEN}✓ Docker image built successfully${NC}" |
| } |
|
|
| |
| deploy_local() { |
| echo -e "\n${BLUE}Deploying locally with Docker Compose...${NC}" |
|
|
| if [ "$HAS_DOCKER" = false ]; then |
| echo -e "${RED}Error: Docker is required for local deployment${NC}" |
| exit 1 |
| fi |
|
|
| |
| build_image |
|
|
| |
| ENV_FILE="${SCRIPT_DIR}/.env" |
| cat > "$ENV_FILE" << EOF |
| # Stack 2.9 Local Deployment Configuration |
| MODEL_ID=${MODEL_ID:-TheBloke/Llama-2-7B-Chat-AWQ} |
| HUGGING_FACE_TOKEN=${HF_TOKEN} |
| QUANTIZATION=awq |
| TENSOR_PARALLEL_SIZE=${TENSOR_PARALLEL_SIZE:-1} |
| GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION:-0.9} |
| MAX_MODEL_LEN=${MAX_MODEL_LEN:-4096} |
| MAX_NUM_SEQS=${MAX_NUM_SEQS:-64} |
| MAX_NUM_BATCHED_TOKENS=${MAX_NUM_BATCHED_TOKENS:-4096} |
| HOST=0.0.0.0 |
| PORT=8000 |
| EOF |
|
|
| echo -e "${YELLOW}Configuration saved to $ENV_FILE${NC}" |
|
|
| |
| echo -e "\n${BLUE}Starting Stack 2.9 service...${NC}" |
| docker-compose -f "$COMPOSE_FILE" up -d |
|
|
| echo -e "\n${GREEN}✓ Stack 2.9 is starting...${NC}" |
| echo -e " API Endpoint: ${BLUE}http://localhost:8000${NC}" |
| echo -e " Health Check: ${BLUE}http://localhost:8000/health${NC}" |
| echo -e " API Docs: ${BLUE}http://localhost:8000/docs${NC}" |
| echo -e "\n${YELLOW}View logs: docker-compose -f '$COMPOSE_FILE' logs -f${NC}" |
| echo -e "${YELLOW}Stop service: docker-compose -f '$COMPOSE_FILE' down${NC}" |
| } |
|
|
| |
| deploy_runpod() { |
| echo -e "\n${BLUE}Deploying to RunPod...${NC}" |
|
|
| if ! command -v runpodctl &> /dev/null; then |
| echo -e "${RED}Error: runpodctl not found${NC}" |
| echo "Install with: npm install -g runpodctl" |
| exit 1 |
| fi |
|
|
| |
| echo -e "${YELLOW}Building and pushing Docker image to registry...${NC}" |
| |
| read -p "Enter your Docker registry (e.g., docker.io/username): " REGISTRY |
| if [ -z "$REGISTRY" ]; then |
| echo -e "${RED}Registry is required for RunPod deployment${NC}" |
| exit 1 |
| fi |
|
|
| build_image |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}" |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest" |
| docker push "${REGISTRY}/${IMAGE_NAME}:latest" |
| docker push "${REGISTRY}/${IMAGE_NAME}:${VERSION}" |
|
|
| |
| sed -i.bak "s|your-registry/stack-2.9:latest|${REGISTRY}/${IMAGE_NAME}:latest|g" \ |
| "${SCRIPT_DIR}/runpod-template.json" |
| rm -f "${SCRIPT_DIR}/runpod-template.json.bak" |
|
|
| |
| echo -e "${YELLOW}Creating RunPod template...${NC}" |
| runpodctl create template \ |
| --template-file "${SCRIPT_DIR}/runpod-template.json" \ |
| --name "stack-2.9-${VERSION}" |
|
|
| echo -e "\n${GREEN}✓ RunPod template created!${NC}" |
| echo -e "${YELLOW}Next steps:${NC}" |
| echo " 1. Go to RunPod.io and deploy using template 'stack-2.9-${VERSION}'" |
| echo " 2. Set your desired GPU type" |
| echo " 3. Configure MODEL_ID and HUGGING_FACE_TOKEN environment variables" |
| echo " 4. The server will start automatically on port 8000" |
| } |
|
|
| |
| deploy_vastai() { |
| echo -e "\n${BLUE}Deploying to Vast.ai...${NC}" |
|
|
| if ! command -v vastai &> /dev/null; then |
| echo -e "${RED}Error: vastai CLI not found${NC}" |
| echo "Install from: https://vast.ai/docs/cli" |
| exit 1 |
| fi |
|
|
| |
| read -p "Enter your Docker registry (e.g., docker.io/username): " REGISTRY |
| if [ -z "$REGISTRY" ]; then |
| echo -e "${RED}Registry is required for VastAI deployment${NC}" |
| exit 1 |
| fi |
|
|
| build_image |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}" |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest" |
| docker push "${REGISTRY}/${IMAGE_NAME}:latest" |
| docker push "${REGISTRY}/${IMAGE_NAME}:${VERSION}" |
|
|
| |
| sed -i.bak "s|your-registry/stack-2.9:latest|${REGISTRY}/${IMAGE_NAME}:latest|g" \ |
| "${SCRIPT_DIR}/vastai-template.json" |
| rm -f "${SCRIPT_DIR}/vastai-template.json.bak" |
|
|
| echo -e "${GREEN}✓ VastAI template ready!${NC}" |
| echo -e "${YELLOW}Deploy with:${NC}" |
| echo " vastai create instance --template-file ${SCRIPT_DIR}/vastai-template.json" |
| echo "" |
| echo "Or manually:" |
| echo " 1. Select GPU: RTX 4090 24GB or higher" |
| echo " 2. Set max bid to ~\$0.50/hour" |
| echo " 3. Upload template and launch" |
| } |
|
|
| |
| deploy_kubernetes() { |
| echo -e "\n${BLUE}Deploying to Kubernetes...${NC}" |
|
|
| K8S_NAMESPACE="${K8S_NAMESPACE:-stack-2.9}" |
|
|
| |
| if ! command -v kubectl &> /dev/null; then |
| echo -e "${RED}Error: kubectl not found${NC}" |
| exit 1 |
| fi |
|
|
| |
| read -p "Enter your Docker registry: " REGISTRY |
| if [ -z "$REGISTRY" ]; then |
| echo -e "${YELLOW}Using local image - ensure your K8s cluster can access it${NC}" |
| else |
| build_image |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:${VERSION}" |
| docker tag "${IMAGE_NAME}:latest" "${REGISTRY}/${IMAGE_NAME}:latest" |
| docker push "${REGISTRY}/${IMAGE_NAME}:latest" |
| IMAGE="${REGISTRY}/${IMAGE_NAME}:latest" |
| fi |
|
|
| IMAGE="${IMAGE:-${IMAGE_NAME}:latest}" |
|
|
| |
| echo -e "${YELLOW}Applying Kubernetes manifests...${NC}" |
|
|
| |
| kubectl create namespace "$K8S_NAMESPACE" --dry-run=client -o yaml | kubectl apply -f - |
|
|
| |
| sed "s|your-registry/stack-2.9:latest|${IMAGE}|g" \ |
| "${SCRIPT_DIR}/kubernetes/deployment.yaml" | \ |
| kubectl apply -n "$K8S_NAMESPACE" -f - |
|
|
| |
| kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/pvc.yaml" |
|
|
| |
| kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/service.yaml" |
|
|
| |
| kubectl apply -n "$K8S_NAMESPACE" -f "${SCRIPT_DIR}/kubernetes/hpa.yaml" |
|
|
| echo -e "\n${GREEN}✓ Kubernetes deployment complete!${NC}" |
| echo -e " Namespace: ${K8S_NAMESPACE}" |
| echo -e " Checking deployment status..." |
|
|
| kubectl wait --for=condition=available --timeout=300s deployment/stack-2.9 -n "$K8S_NAMESPACE" |
|
|
| |
| SERVICE_TYPE=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.spec.type}') |
| if [ "$SERVICE_TYPE" = "LoadBalancer" ]; then |
| EXTERNAL_IP=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.status.loadBalancer.ingress[0].ip}') |
| echo -e " API Endpoint: ${BLUE}http://${EXTERNAL_IP}:8000${NC}" |
| else |
| NODE_PORT=$(kubectl get svc stack-2.9 -n "$K8S_NAMESPACE" -o jsonpath='{.spec.ports[0].nodePort}') |
| NODE_IP=$(kubectl get nodes -o jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') |
| echo -e " API Endpoint: ${BLUE}http://${NODE_IP}:${NODE_PORT}${NC}" |
| fi |
| } |
|
|
| |
| health_check() { |
| local url="http://localhost:${PORT:-8000}/health" |
| echo -e "\n${YELLOW}Waiting for server to become healthy...${NC}" |
|
|
| max_attempts=30 |
| attempt=1 |
|
|
| while [ $attempt -le $max_attempts ]; do |
| if curl -sf "$url" &> /dev/null; then |
| echo -e "${GREEN}✓ Server is healthy!${NC}" |
| echo -e "Health check response:" |
| curl -s "$url" | python3 -m json.tool 2>/dev/null || curl -s "$url" |
| return 0 |
| fi |
|
|
| echo -n "." |
| sleep 2 |
| ((attempt++)) |
| done |
|
|
| echo -e "\n${RED}✗ Health check failed after $((max_attempts * 2)) seconds${NC}" |
| echo "Check logs with: docker-compose -f '$COMPOSE_FILE' logs" |
| return 1 |
| } |
|
|
| |
| main() { |
| case "$PLATFORM" in |
| local|"") |
| detect_platform |
| deploy_local |
| health_check |
| ;; |
| runpod) |
| detect_platform |
| deploy_runpod |
| ;; |
| vastai) |
| detect_platform |
| deploy_vastai |
| ;; |
| kubernetes|k8s) |
| deploy_kubernetes |
| ;; |
| *) |
| echo -e "${RED}Unknown platform: $PLATFORM${NC}" |
| show_help |
| exit 1 |
| ;; |
| esac |
| } |
|
|
| |
| parse_args "$@" |
| main |
|
|