| set -e | |
| # Stack 2.9 vLLM Startup Script | |
| # Handles proper startup, logging, and signal handling | |
| echo "π Starting Stack 2.9 vLLM Server" | |
| echo "================================" | |
| # Configuration | |
| LOG_DIR="/app/logs" | |
| PID_FILE="/app/vllm.pid" | |
| LOG_FILE="${LOG_DIR}/vllm.log" | |
| # Create log directory if it doesn't exist | |
| mkdir -p "${LOG_DIR}" | |
| # Function to cleanup on exit | |
| cleanup() { | |
| echo "π Shutting down vLLM server..." | |
| if [ -f "${PID_FILE}" ]; then | |
| kill "$(cat ${PID_FILE})" 2>/dev/null || true | |
| rm "${PID_FILE}" | |
| fi | |
| exit 0 | |
| } | |
| # Trap signals | |
| trap cleanup SIGINT SIGTERM EXIT | |
| # Check if model directory exists | |
| if [ ! -d "/models" ] || [ -z "$(ls -A /models 2>/dev/null)" ]; then | |
| echo "β οΈ Warning: No model found in /models" | |
| echo " Expected model files in /models directory" | |
| echo " Mount a volume with your model or download via HF" | |
| fi | |
| # Check for required environment variables | |
| echo "π Environment Configuration:" | |
| echo " MODEL_PATH: ${MODEL_PATH:-/models}" | |
| echo " MODEL_NAME: ${MODEL_NAME:-meta-llama/Llama-3.1-8B-Instruct}" | |
| echo " GPU_MEMORY_UTILIZATION: ${GPU_MEMORY_UTILIZATION:-0.9}" | |
| echo " MAX_MODEL_LEN: ${MAX_MODEL_LEN:-131072}" | |
| echo "" | |
| # Start the server | |
| echo "Starting vLLM server..." | |
| python vllm_server.py 2>&1 | tee -a "${LOG_FILE}" & | |
| echo $! > "${PID_FILE}" | |
| echo "β vLLM server started with PID $(cat ${PID_FILE})" | |
| echo " Logs: ${LOG_FILE}" | |
| echo " Health: http://localhost:8000/health" | |
| echo "" | |
| echo "Press Ctrl+C to stop" | |
| # Wait for process | |
| wait "${PID_FILE}" 2>/dev/null || true | |