# Use CUDA 12.1 base image FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 # Avoid prompts ENV DEBIAN_FRONTEND=noninteractive # Install Python 3.11 and other essentials RUN apt-get update && apt-get install -y \ python3.11 \ python3-pip \ python3.11-dev \ git \ && rm -rf /var/lib/apt/lists/* # Set python3.11 as default python RUN ln -s /usr/bin/python3.11 /usr/bin/python WORKDIR /app # Upgrade pip RUN pip install --no-cache-dir -U pip setuptools wheel # Install PyTorch with CUDA 12.1 support RUN pip install --no-cache-dir \ torch==2.4.0 \ triton \ xformers \ --index-url https://download.pytorch.org/whl/cu121 # Install Unsloth and let it resolve its own compatible TRL/PEFT stack. RUN pip install --no-cache-dir \ "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" \ datasets \ wandb \ matplotlib \ fastapi \ uvicorn \ pydantic # Copy the project files COPY . . # Install the local package in editable mode RUN pip install -e . # Make scripts executable RUN chmod +x scripts/*.py # Set environment variables ENV MODEL_NAME="meta-llama/Llama-3.2-3B-Instruct" ENV OUTPUT_DIR="outputs/commitguard-llama-3b-grpo" ENV WANDB_PROJECT="commitguard" # Default command: Run training and push to Hub # Note: HF_TOKEN and WANDB_API_KEY should be set as Space Secrets CMD ["python", "scripts/train_grpo.py", "--samples", "200", "--max-steps", "300", "--push-to-hub"]