Spaces:

bayan10
/

bayan-api

Running

App Files Files Community

bayan-api / Dockerfile

youssefreda9

Fix HF_HOME caching in Dockerfile and update UI

0b76ff9 about 10 hours ago

Raw

History Blame Contribute Delete

3.72 kB

	FROM python:3.12-slim

	WORKDIR /app

	# Install system dependencies
	RUN apt-get update && apt-get install -y --no-install-recommends \
	build-essential \
	&& rm -rf /var/lib/apt/lists/*

	# Copy requirements and install Python dependencies
	# Install CPU-only PyTorch first (saves ~1.5GB vs full torch with CUDA)
	COPY requirements.txt .
	RUN pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu && \
	pip install --no-cache-dir -r requirements.txt

	# Pre-download models during build (network is available here)
	# At runtime, the container has NO outbound DNS, so models must be cached

	# Set HF_HOME to a global path so non-root users (like in HF Spaces) can access cached models
	ENV HF_HOME=/opt/huggingface
	RUN mkdir -p /opt/huggingface && chmod 777 /opt/huggingface
	# 1. Summarization model (MBart, float16)
	RUN python -c "\
	from transformers import MBartForConditionalGeneration, AutoTokenizer, AutoConfig; \
	import torch; \
	repo = 'bayan10/summarization-model'; \
	print('Downloading summarization tokenizer...'); \
	AutoTokenizer.from_pretrained(repo); \
	print('Downloading summarization config...'); \
	AutoConfig.from_pretrained(repo); \
	print('Downloading summarization model (float16)...'); \
	MBartForConditionalGeneration.from_pretrained(repo, torch_dtype=torch.float16); \
	print('Summarization model cached!'); \
	"

	# 2. Spelling model (AraSpell — AraBERT encoder-decoder + checkpoint)
	RUN python -c "\
	from huggingface_hub import hf_hub_download; \
	from transformers import AutoTokenizer, EncoderDecoderModel, AutoModelForMaskedLM; \
	print('Downloading AraSpell checkpoint...'); \
	hf_hub_download(repo_id='bayan10/AraSpell-Model', filename='last_model.pt'); \
	print('Downloading AraBERT tokenizer...'); \
	AutoTokenizer.from_pretrained('aubmindlab/bert-base-arabertv02'); \
	print('Downloading AraBERT encoder-decoder...'); \
	EncoderDecoderModel.from_encoder_decoder_pretrained('aubmindlab/bert-base-arabertv02', 'aubmindlab/bert-base-arabertv02'); \
	print('Downloading AraBERT MLM (for ContextualCorrector)...'); \
	AutoModelForMaskedLM.from_pretrained('aubmindlab/bert-base-arabertv02'); \
	print('Spelling model + MLM cached!'); \
	"

	# 3. Grammar — camel-tools MLE disambiguator data
	# Set CAMELTOOLS_DATA to a global path so non-root users (like in HF Spaces) can access it
	ENV CAMELTOOLS_DATA=/opt/camel_tools
	RUN mkdir -p /opt/camel_tools && chmod 777 /opt/camel_tools && camel_data -i light

	# 4. Punctuation model (PuncAra-v1 — EncoderDecoderModel)
	RUN python -c "\
	from transformers import EncoderDecoderModel, AutoTokenizer; \
	repo = 'bayan10/PuncAra-v1'; \
	print('Downloading PuncAra-v1 tokenizer...'); \
	AutoTokenizer.from_pretrained(repo); \
	print('Downloading PuncAra-v1 model...'); \
	EncoderDecoderModel.from_pretrained(repo); \
	print('PuncAra-v1 cached!'); \
	"

	# 5. Dialect-to-MSA model (mT5, float16)
	RUN python -c "\
	from transformers import AutoTokenizer, AutoModelForSeq2SeqLM; \
	import torch; \
	repo = 'bayan10/dialect-to-msa-model'; \
	print('Downloading dialect tokenizer...'); \
	AutoTokenizer.from_pretrained(repo); \
	print('Downloading dialect model (float16)...'); \
	AutoModelForSeq2SeqLM.from_pretrained(repo, torch_dtype=torch.float16); \
	print('Dialect model cached!'); \
	"

	# Copy application code
	COPY src/ ./src/
	COPY quran.py ./
	COPY quran_master.db ./
	COPY .env* ./

	# Set environment variables
	ENV PORT=7860
	ENV DEBUG=False
	ENV PYTHONUNBUFFERED=1

	# Expose port
	EXPOSE 7860

	# Start the app with gunicorn (single worker to minimize RAM)
	# Timeout 300s: full pipeline (spelling ~50s + grammar ~8s + punctuation ~30s + cold start)
	CMD ["gunicorn", "--chdir", "src", "app:app", "--bind", "0.0.0.0:7860", "--timeout", "300", "--workers", "1"]