Rayugacodes commited on
Commit
8b8863d
·
verified ·
1 Parent(s): 74820e1

Fix all: writable /tmp cache, no login(), proper permissions

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -3
  2. train_on_hf.py +4 -6
Dockerfile CHANGED
@@ -2,9 +2,8 @@ FROM python:3.10-slim
2
 
3
  WORKDIR /app
4
 
5
- ENV HF_HOME=/app/.cache/huggingface
6
- ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
7
- RUN mkdir -p /app/.cache/huggingface
8
 
9
  RUN pip install --no-cache-dir \
10
  torch \
@@ -17,4 +16,6 @@ RUN pip install --no-cache-dir \
17
 
18
  COPY train_on_hf.py .
19
 
 
 
20
  CMD ["sh", "-c", "python train_on_hf.py --hf-token $HF_TOKEN"]
 
2
 
3
  WORKDIR /app
4
 
5
+ ENV HF_HOME=/tmp/hf_cache
6
+ ENV TRANSFORMERS_CACHE=/tmp/hf_cache
 
7
 
8
  RUN pip install --no-cache-dir \
9
  torch \
 
16
 
17
  COPY train_on_hf.py .
18
 
19
+ RUN chmod -R 777 /app
20
+
21
  CMD ["sh", "-c", "python train_on_hf.py --hf-token $HF_TOKEN"]
train_on_hf.py CHANGED
@@ -13,15 +13,15 @@ Usage (on HF with GPU):
13
 
14
  import argparse
15
  import json
16
- import os
17
- import sys
18
  from pathlib import Path
19
 
20
 
21
  def setup(hf_token: str):
22
  """Login and download data from HF."""
23
- from huggingface_hub import login, hf_hub_download, snapshot_download
24
- login(token=hf_token)
 
 
25
 
26
  # Download training data
27
  data_dir = Path("data")
@@ -326,8 +326,6 @@ def merge_and_push(hf_token: str):
326
  """Merge LoRA, push merged model to HF Hub."""
327
  from transformers import AutoModelForCausalLM, AutoTokenizer
328
  from peft import PeftModel
329
- from huggingface_hub import login
330
- login(token=hf_token)
331
 
332
  config = json.load(open("data/preprocessing_config.json"))
333
  MODEL_NAME = config["model"]["name"]
 
13
 
14
  import argparse
15
  import json
 
 
16
  from pathlib import Path
17
 
18
 
19
  def setup(hf_token: str):
20
  """Login and download data from HF."""
21
+ import os
22
+ os.environ["HF_TOKEN"] = hf_token
23
+ os.environ["HUGGING_FACE_HUB_TOKEN"] = hf_token
24
+ from huggingface_hub import hf_hub_download, snapshot_download
25
 
26
  # Download training data
27
  data_dir = Path("data")
 
326
  """Merge LoRA, push merged model to HF Hub."""
327
  from transformers import AutoModelForCausalLM, AutoTokenizer
328
  from peft import PeftModel
 
 
329
 
330
  config = json.load(open("data/preprocessing_config.json"))
331
  MODEL_NAME = config["model"]["name"]