| #!/bin/bash |
|
|
| |
| |
|
|
| set -e |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| BLUE='\033[0;34m' |
| NC='\033[0m' |
|
|
| |
| COMPOSE_FILE="docker-compose.yml" |
| MODEL_PATH="./models" |
| MODEL_NAME="meta-llama/Llama-3.1-8B-Instruct" |
| MODEL_FORMAT="hf" |
| GPU_MEMORY_UTILIZATION="0.9" |
| LOG_LEVEL="INFO" |
|
|
| |
| print_status() { |
| echo -e "${BLUE}[INFO]${NC} $1" |
| } |
|
|
| print_success() { |
| echo -e "${GREEN}[SUCCESS]${NC} $1" |
| } |
|
|
| print_warning() { |
| echo -e "${YELLOW}[WARNING]${NC} $1" |
| } |
|
|
| print_error() { |
| echo -e "${RED}[ERROR]${NC} $1" |
| } |
|
|
| |
| check_prerequisites() { |
| print_status "Checking prerequisites..." |
| |
| |
| if ! command -v docker &> /dev/null; then |
| print_error "Docker is not installed or not in PATH" |
| exit 1 |
| fi |
| |
| |
| if docker compose version &> /dev/null; then |
| COMPOSE_CMD="docker compose" |
| elif command -v docker-compose &> /dev/null; then |
| COMPOSE_CMD="docker-compose" |
| else |
| print_error "Docker Compose is not installed or not in PATH" |
| exit 1 |
| fi |
| |
| |
| if ! docker info 2>/dev/null | grep -q "nvidia"; then |
| print_warning "NVIDIA Docker support not detected. GPU acceleration may not work." |
| print_warning "Ensure nvidia-docker2 is installed and configured." |
| fi |
| |
| print_success "Prerequisites check passed" |
| } |
|
|
| |
| setup_environment() { |
| print_status "Setting up environment..." |
| |
| |
| mkdir -p models logs |
| chmod 755 models logs |
| |
| |
| cat > .env << EOF |
| MODEL_PATH=${MODEL_PATH} |
| MODEL_NAME=${MODEL_NAME} |
| MODEL_FORMAT=${MODEL_FORMAT} |
| GPU_MEMORY_UTILIZATION=${GPU_MEMORY_UTILIZATION} |
| LOG_LEVEL=${LOG_LEVEL} |
| EOF |
| |
| print_success "Environment setup complete" |
| } |
|
|
| |
| download_model() { |
| print_status "Downloading model (this may take a while)..." |
| |
| if [ ! -d "models/${MODEL_NAME##*/}" ]; then |
| print_status "Downloading ${MODEL_NAME}..." |
| |
| |
| if command -v huggingface-cli &> /dev/null; then |
| huggingface-cli download ${MODEL_NAME} --local-dir models |
| elif command -v git &> /dev/null; then |
| git lfs install |
| git clone https://huggingface.co/${MODEL_NAME} models/${MODEL_NAME##*/} |
| else |
| print_error "Neither huggingface-cli nor git is available for model download" |
| exit 1 |
| fi |
| |
| print_success "Model downloaded successfully" |
| else |
| print_warning "Model already exists, skipping download" |
| fi |
| } |
|
|
| |
| start_services() { |
| print_status "Starting services..." |
| |
| ${COMPOSE_CMD} -f ${COMPOSE_FILE} up -d |
| |
| print_status "Waiting for services to be ready..." |
| sleep 30 |
| |
| |
| if ${COMPOSE_CMD} -f ${COMPOSE_FILE} ps | grep -q "Up"; then |
| print_success "Services started successfully" |
| else |
| print_error "Failed to start services" |
| ${COMPOSE_CMD} -f ${COMPOSE_FILE} logs |
| exit 1 |
| fi |
| } |
|
|
| |
| check_status() { |
| print_status "Checking service status..." |
| |
| ${COMPOSE_CMD} -f ${COMPOSE_FILE} ps |
| |
| print_status "Health check..." |
| if python3 -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health').read()" &> /dev/null; then |
| print_success "vLLM server is healthy" |
| else |
| print_warning "vLLM server health check failed" |
| fi |
| } |
|
|
| |
| show_usage() { |
| echo "Usage: $0 [OPTIONS]" |
| echo "" |
| echo "Options:" |
| echo " -h, --help Show this help message" |
| echo " --no-model Skip model download" |
| echo " --force-download Force download even if model exists" |
| echo " --clean Clean up before deployment" |
| echo "" |
| echo "Environment variables:" |
| echo " MODEL_PATH Path to model directory" |
| echo " MODEL_NAME HuggingFace model name" |
| echo " MODEL_FORMAT Model format (hf, safetensors, etc.)" |
| echo " GPU_MEMORY_UTILIZATION GPU memory utilization (0.0-1.0)" |
| echo " LOG_LEVEL Log level (DEBUG, INFO, WARNING, ERROR)" |
| } |
|
|
| |
| NO_MODEL=false |
| FORCE_DOWNLOAD=false |
| CLEAN=false |
|
|
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| -h|--help) |
| show_usage |
| exit 0 |
| ;; |
| --no-model) |
| NO_MODEL=true |
| shift |
| ;; |
| --force-download) |
| FORCE_DOWNLOAD=true |
| shift |
| ;; |
| --clean) |
| CLEAN=true |
| shift |
| ;; |
| *) |
| print_error "Unknown option: $1" |
| show_usage |
| exit 1 |
| ;; |
| esac |
| done |
|
|
| |
| if [[ "${CLEAN}" == "true" ]]; then |
| print_status "Cleaning up existing deployment..." |
| ${COMPOSE_CMD} -f ${COMPOSE_FILE} down -v |
| rm -rf models logs |
| fi |
|
|
| |
| main() { |
| print_status "Starting Stack 2.9 local deployment..." |
| echo "===================================" |
| |
| |
| check_prerequisites |
| |
| |
| setup_environment |
| |
| |
| if [[ "${NO_MODEL}" == "false" ]]; then |
| if [[ "${FORCE_DOWNLOAD}" == "true" ]] || [ ! -d "models/${MODEL_NAME##*/}" ]; then |
| download_model |
| else |
| print_warning "Model exists and --force-download not specified, skipping download" |
| fi |
| else |
| print_warning "Model download skipped as requested" |
| fi |
| |
| |
| start_services |
| |
| |
| check_status |
| |
| print_success "Stack 2.9 deployment completed successfully!" |
| echo "" |
| echo "Service URLs:" |
| echo " vLLM API: http://localhost:8000" |
| echo " Prometheus: http://localhost:9090" |
| echo " Grafana: http://localhost:3000" |
| echo " Traefik Dashboard: http://localhost:8080" |
| echo "" |
| echo "Health check: http://localhost:8000/health" |
| echo "" |
| echo "To stop services: ${COMPOSE_CMD} -f ${COMPOSE_FILE} down" |
| echo "To view logs: ${COMPOSE_CMD} -f ${COMPOSE_FILE} logs -f" |
| } |
|
|
| |
| main "$@" |