Stack-2-9-finetuned / stack /deploy /local_deploy.sh
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/bin/bash
# Stack 2.9 Local Deployment Script
# Usage: ./local_deploy.sh [options]
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Default configuration
COMPOSE_FILE="docker-compose.yml"
MODEL_PATH="./models"
MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct" # Will be replaced with Stack 2.9
MODEL_FORMAT="hf"
GPU_MEMORY_UTILIZATION="0.9"
LOG_LEVEL="INFO"
# Function to print colored output
print_status() {
echo -e "${BLUE}[INFO]${NC} $1"
}
print_success() {
echo -e "${GREEN}[SUCCESS]${NC} $1"
}
print_warning() {
echo -e "${YELLOW}[WARNING]${NC} $1"
}
print_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
# Function to check prerequisites
check_prerequisites() {
print_status "Checking prerequisites..."
# Check Docker
if ! command -v docker &> /dev/null; then
print_error "Docker is not installed or not in PATH"
exit 1
fi
# Check Docker Compose (v1 or v2)
if docker compose version &> /dev/null; then
COMPOSE_CMD="docker compose"
elif command -v docker-compose &> /dev/null; then
COMPOSE_CMD="docker-compose"
else
print_error "Docker Compose is not installed or not in PATH"
exit 1
fi
# Check NVIDIA Docker support
if ! docker info 2>/dev/null | grep -q "nvidia"; then
print_warning "NVIDIA Docker support not detected. GPU acceleration may not work."
print_warning "Ensure nvidia-docker2 is installed and configured."
fi
print_success "Prerequisites check passed"
}
# Function to setup environment
setup_environment() {
print_status "Setting up environment..."
# Create directories
mkdir -p models logs
chmod 755 models logs
# Create .env file
cat > .env << EOF
MODEL_PATH=${MODEL_PATH}
MODEL_NAME=${MODEL_NAME}
MODEL_FORMAT=${MODEL_FORMAT}
GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION}
LOG_LEVEL=${LOG_LEVEL}
EOF
print_success "Environment setup complete"
}
# Function to download model
download_model() {
print_status "Downloading model (this may take a while)..."
if [ ! -d "models/${MODEL_NAME##*/}" ]; then
print_status "Downloading ${MODEL_NAME}..."
# Use HuggingFace Hub to download model
if command -v huggingface-cli &> /dev/null; then
huggingface-cli download ${MODEL_NAME} --local-dir models
elif command -v git &> /dev/null; then
git lfs install
git clone https://huggingface.co/${MODEL_NAME} models/${MODEL_NAME##*/}
else
print_error "Neither huggingface-cli nor git is available for model download"
exit 1
fi
print_success "Model downloaded successfully"
else
print_warning "Model already exists, skipping download"
fi
}
# Function to start services
start_services() {
print_status "Starting services..."
${COMPOSE_CMD} -f ${COMPOSE_FILE} up -d
print_status "Waiting for services to be ready..."
sleep 30
# Check if services are running
if ${COMPOSE_CMD} -f ${COMPOSE_FILE} ps | grep -q "Up"; then
print_success "Services started successfully"
else
print_error "Failed to start services"
${COMPOSE_CMD} -f ${COMPOSE_FILE} logs
exit 1
fi
}
# Function to check status
check_status() {
print_status "Checking service status..."
${COMPOSE_CMD} -f ${COMPOSE_FILE} ps
print_status "Health check..."
if python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()" &> /dev/null; then
print_success "vLLM server is healthy"
else
print_warning "vLLM server health check failed"
fi
}
# Function to show usage
show_usage() {
echo "Usage: $0 [OPTIONS]"
echo ""
echo "Options:"
echo " -h, --help Show this help message"
echo " --no-model Skip model download"
echo " --force-download Force download even if model exists"
echo " --clean Clean up before deployment"
echo ""
echo "Environment variables:"
echo " MODEL_PATH Path to model directory"
echo " MODEL_NAME HuggingFace model name"
echo " MODEL_FORMAT Model format (hf, safetensors, etc.)"
echo " GPU_MEMORY_UTILIZATION GPU memory utilization (0.0-1.0)"
echo " LOG_LEVEL Log level (DEBUG, INFO, WARNING, ERROR)"
}
# Parse command line arguments
NO_MODEL=false
FORCE_DOWNLOAD=false
CLEAN=false
while [[ $# -gt 0 ]]; do
case $1 in
-h|--help)
show_usage
exit 0
;;
--no-model)
NO_MODEL=true
shift
;;
--force-download)
FORCE_DOWNLOAD=true
shift
;;
--clean)
CLEAN=true
shift
;;
*)
print_error "Unknown option: $1"
show_usage
exit 1
;;
esac
done
# Clean up if requested
if [[ "${CLEAN}" == "true" ]]; then
print_status "Cleaning up existing deployment..."
${COMPOSE_CMD} -f ${COMPOSE_FILE} down -v
rm -rf models logs
fi
# Main deployment process
main() {
print_status "Starting Stack 2.9 local deployment..."
echo "==================================="
# Check prerequisites
check_prerequisites
# Setup environment
setup_environment
# Download model if not skipped
if [[ "${NO_MODEL}" == "false" ]]; then
if [[ "${FORCE_DOWNLOAD}" == "true" ]] || [ ! -d "models/${MODEL_NAME##*/}" ]; then
download_model
else
print_warning "Model exists and --force-download not specified, skipping download"
fi
else
print_warning "Model download skipped as requested"
fi
# Start services
start_services
# Check status
check_status
print_success "Stack 2.9 deployment completed successfully!"
echo ""
echo "Service URLs:"
echo " vLLM API: http://localhost:8000"
echo " Prometheus: http://localhost:9090"
echo " Grafana: http://localhost:3000"
echo " Traefik Dashboard: http://localhost:8080"
echo ""
echo "Health check: http://localhost:8000/health"
echo ""
echo "To stop services: ${COMPOSE_CMD} -f ${COMPOSE_FILE} down"
echo "To view logs: ${COMPOSE_CMD} -f ${COMPOSE_FILE} logs -f"
}
# Run main function
main "$@"