File size: 2,747 Bytes
fcb2b04 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | version: '3.8'
services:
# Main vLLM service with GPU support
vllm:
build:
context: .
dockerfile: Dockerfile
ports:
- "8000:8000"
environment:
- MODEL_PATH=/models
- MODEL_NAME=meta-llama/Llama-3.1-8B-Instruct
- MODEL_FORMAT=hf
- REDIS_URL=redis://redis:6379
- GPU_MEMORY_UTILIZATION=0.9
- LOG_LEVEL=INFO
volumes:
- ./models:/models:ro
- ./logs:/app/logs
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: all
capabilities: [gpu]
depends_on:
- redis
restart: unless-stopped
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
interval: 30s
timeout: 10s
retries: 3
start_period: 120s
# Optional Redis for caching
redis:
image: redis:7-alpine
ports:
- "6379:6379"
volumes:
- redis_data:/data
restart: unless-stopped
# Prometheus metrics collection
prometheus:
image: prom/prometheus:latest
ports:
- "9090:9090"
volumes:
- ./prometheus.yml:/etc/prometheus/prometheus.yml
- prometheus_data:/prometheus
command:
- '--config.file=/etc/prometheus/prometheus.yml'
- '--storage.tsdb.path=/prometheus'
- '--web.console.libraries=/etc/prometheus/console_libraries'
- '--web.console.templates=/etc/prometheus/consoles'
- '--storage.tsdb.retention.time=200h'
- '--web.enable-lifecycle'
restart: unless-stopped
# Traefik for HTTPS and reverse proxy
traefik:
image: traefik:v3.0
command:
- '--api.dashboard=true'
- '--providers.docker=true'
- '--providers.docker.exposedbydefault=false'
- '--entrypoints.web.address=:80'
- '--entrypoints.websecure.address=:443'
- '--certificatesresolvers.myresolver.acme.tlschallenge=true'
- '--certificatesresolvers.myresolver.acme.email=your-email@example.com'
- '--certificatesresolvers.myresolver.acme.storage=/letsencrypt/acme.json'
ports:
- "80:80"
- "443:443"
- "8080:8080" # Traefik dashboard
volumes:
- /var/run/docker.sock:/var/run/docker.sock:ro
- traefik_data:/letsencrypt
restart: unless-stopped
# Optional: Grafana for visualization
grafana:
image: grafana/grafana:latest
ports:
- "3000:3000"
environment:
- GF_SECURITY_ADMIN_PASSWORD=admin123
volumes:
- grafana_data:/var/lib/grafana
- ./grafana/provisioning:/etc/grafana/provisioning
depends_on:
- prometheus
restart: unless-stopped
volumes:
redis_data:
prometheus_data:
traefik_data:
grafana_data:
networks:
default:
driver: bridge |