Spaces:
Running
Running
Fix all: writable /tmp cache, no login(), proper permissions
Browse files- Dockerfile +4 -3
- train_on_hf.py +4 -6
Dockerfile
CHANGED
|
@@ -2,9 +2,8 @@ FROM python:3.10-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
ENV HF_HOME=/
|
| 6 |
-
ENV TRANSFORMERS_CACHE=/
|
| 7 |
-
RUN mkdir -p /app/.cache/huggingface
|
| 8 |
|
| 9 |
RUN pip install --no-cache-dir \
|
| 10 |
torch \
|
|
@@ -17,4 +16,6 @@ RUN pip install --no-cache-dir \
|
|
| 17 |
|
| 18 |
COPY train_on_hf.py .
|
| 19 |
|
|
|
|
|
|
|
| 20 |
CMD ["sh", "-c", "python train_on_hf.py --hf-token $HF_TOKEN"]
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
ENV HF_HOME=/tmp/hf_cache
|
| 6 |
+
ENV TRANSFORMERS_CACHE=/tmp/hf_cache
|
|
|
|
| 7 |
|
| 8 |
RUN pip install --no-cache-dir \
|
| 9 |
torch \
|
|
|
|
| 16 |
|
| 17 |
COPY train_on_hf.py .
|
| 18 |
|
| 19 |
+
RUN chmod -R 777 /app
|
| 20 |
+
|
| 21 |
CMD ["sh", "-c", "python train_on_hf.py --hf-token $HF_TOKEN"]
|
train_on_hf.py
CHANGED
|
@@ -13,15 +13,15 @@ Usage (on HF with GPU):
|
|
| 13 |
|
| 14 |
import argparse
|
| 15 |
import json
|
| 16 |
-
import os
|
| 17 |
-
import sys
|
| 18 |
from pathlib import Path
|
| 19 |
|
| 20 |
|
| 21 |
def setup(hf_token: str):
|
| 22 |
"""Login and download data from HF."""
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
| 25 |
|
| 26 |
# Download training data
|
| 27 |
data_dir = Path("data")
|
|
@@ -326,8 +326,6 @@ def merge_and_push(hf_token: str):
|
|
| 326 |
"""Merge LoRA, push merged model to HF Hub."""
|
| 327 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 328 |
from peft import PeftModel
|
| 329 |
-
from huggingface_hub import login
|
| 330 |
-
login(token=hf_token)
|
| 331 |
|
| 332 |
config = json.load(open("data/preprocessing_config.json"))
|
| 333 |
MODEL_NAME = config["model"]["name"]
|
|
|
|
| 13 |
|
| 14 |
import argparse
|
| 15 |
import json
|
|
|
|
|
|
|
| 16 |
from pathlib import Path
|
| 17 |
|
| 18 |
|
| 19 |
def setup(hf_token: str):
|
| 20 |
"""Login and download data from HF."""
|
| 21 |
+
import os
|
| 22 |
+
os.environ["HF_TOKEN"] = hf_token
|
| 23 |
+
os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
|
| 24 |
+
from huggingface_hub import hf_hub_download, snapshot_download
|
| 25 |
|
| 26 |
# Download training data
|
| 27 |
data_dir = Path("data")
|
|
|
|
| 326 |
"""Merge LoRA, push merged model to HF Hub."""
|
| 327 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 328 |
from peft import PeftModel
|
|
|
|
|
|
|
| 329 |
|
| 330 |
config = json.load(open("data/preprocessing_config.json"))
|
| 331 |
MODEL_NAME = config["model"]["name"]
|