Spaces:
Sleeping
Sleeping
faizr206
commited on
Commit
·
5fe40ea
1
Parent(s):
8b8fced
email to dataset
Browse files- Dockerfile +1 -1
- app/main.py +45 -9
Dockerfile
CHANGED
|
@@ -6,7 +6,7 @@ COPY app /app
|
|
| 6 |
# App deps only (no LaTeX)
|
| 7 |
RUN python -m pip install --upgrade pip && \
|
| 8 |
python -m pip install --no-cache-dir \
|
| 9 |
-
fastapi "uvicorn[standard]" pydantic python-dotenv google-genai
|
| 10 |
|
| 11 |
ENV PORT=7860
|
| 12 |
EXPOSE 7860
|
|
|
|
| 6 |
# App deps only (no LaTeX)
|
| 7 |
RUN python -m pip install --upgrade pip && \
|
| 8 |
python -m pip install --no-cache-dir \
|
| 9 |
+
fastapi "uvicorn[standard]" pydantic python-dotenv google-genai huggingface-hub
|
| 10 |
|
| 11 |
ENV PORT=7860
|
| 12 |
EXPOSE 7860
|
app/main.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
-
import os, re, uuid, subprocess, sys, time, traceback, threading
|
|
|
|
| 2 |
from collections import deque
|
| 3 |
from pathlib import Path
|
| 4 |
from typing import Optional, Tuple
|
|
@@ -7,6 +8,8 @@ from fastapi import FastAPI, HTTPException, Response
|
|
| 7 |
from fastapi.middleware.cors import CORSMiddleware
|
| 8 |
from pydantic import BaseModel, validator
|
| 9 |
|
|
|
|
|
|
|
| 10 |
# Optional .env for local testing
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
load_dotenv()
|
|
@@ -34,8 +37,23 @@ app.add_middleware(
|
|
| 34 |
|
| 35 |
|
| 36 |
RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
|
| 40 |
# ---------------- simple 10 RPM rate limiter ----------------
|
| 41 |
class RateLimiter:
|
|
@@ -394,7 +412,7 @@ class EmailIn(BaseModel):
|
|
| 394 |
|
| 395 |
@validator("email")
|
| 396 |
def validate_email(cls, value: str) -> str:
|
| 397 |
-
cleaned = value.strip()
|
| 398 |
if not cleaned:
|
| 399 |
raise ValueError("Email cannot be empty")
|
| 400 |
if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", cleaned):
|
|
@@ -429,11 +447,29 @@ def generate_and_render(inp: PromptIn):
|
|
| 429 |
|
| 430 |
@app.post("/store-email")
|
| 431 |
def store_email(email: EmailIn):
|
| 432 |
-
"""
|
|
|
|
|
|
|
|
|
|
| 433 |
sanitized_email = email.sanitized
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
try:
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
raise HTTPException(500, "Failed to save email address")
|
| 439 |
-
return {"stored": True}
|
|
|
|
| 1 |
+
import json, os, re, uuid, subprocess, sys, time, traceback, threading
|
| 2 |
+
from io import BytesIO
|
| 3 |
from collections import deque
|
| 4 |
from pathlib import Path
|
| 5 |
from typing import Optional, Tuple
|
|
|
|
| 8 |
from fastapi.middleware.cors import CORSMiddleware
|
| 9 |
from pydantic import BaseModel, validator
|
| 10 |
|
| 11 |
+
from huggingface_hub import HfApi, create_repo, CommitOperationAdd
|
| 12 |
+
|
| 13 |
# Optional .env for local testing
|
| 14 |
from dotenv import load_dotenv
|
| 15 |
load_dotenv()
|
|
|
|
| 37 |
|
| 38 |
|
| 39 |
RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
|
| 40 |
+
|
| 41 |
+
HF_DATASET_ID = os.getenv("HF_DATASET_ID", "MathFrames/email-log")
|
| 42 |
+
HF_TOKEN = os.getenv("HF_TOKEN", "")
|
| 43 |
+
|
| 44 |
+
hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
|
| 45 |
+
if hf_api:
|
| 46 |
+
try:
|
| 47 |
+
create_repo(
|
| 48 |
+
HF_DATASET_ID,
|
| 49 |
+
repo_type="dataset",
|
| 50 |
+
private=True,
|
| 51 |
+
exist_ok=True,
|
| 52 |
+
token=HF_TOKEN,
|
| 53 |
+
)
|
| 54 |
+
except Exception:
|
| 55 |
+
# Ignore startup race/permission errors; individual writes will surface issues.
|
| 56 |
+
pass
|
| 57 |
|
| 58 |
# ---------------- simple 10 RPM rate limiter ----------------
|
| 59 |
class RateLimiter:
|
|
|
|
| 412 |
|
| 413 |
@validator("email")
|
| 414 |
def validate_email(cls, value: str) -> str:
|
| 415 |
+
cleaned = value.strip().lower()
|
| 416 |
if not cleaned:
|
| 417 |
raise ValueError("Email cannot be empty")
|
| 418 |
if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", cleaned):
|
|
|
|
| 447 |
|
| 448 |
@app.post("/store-email")
|
| 449 |
def store_email(email: EmailIn):
|
| 450 |
+
"""Store the provided email address in the configured Hugging Face dataset."""
|
| 451 |
+
if not hf_api or not HF_TOKEN:
|
| 452 |
+
raise HTTPException(500, "Email logging is not configured")
|
| 453 |
+
|
| 454 |
sanitized_email = email.sanitized
|
| 455 |
+
timestamp = int(time.time())
|
| 456 |
+
key = f"emails/{int(time.time() * 1000)}-{uuid.uuid4().hex}.json"
|
| 457 |
+
payload = {"email": sanitized_email, "ts": timestamp}
|
| 458 |
+
|
| 459 |
try:
|
| 460 |
+
hf_api.create_commit(
|
| 461 |
+
repo_id=HF_DATASET_ID,
|
| 462 |
+
repo_type="dataset",
|
| 463 |
+
operations=[
|
| 464 |
+
CommitOperationAdd(
|
| 465 |
+
path_in_repo=key,
|
| 466 |
+
path_or_fileobj=BytesIO(json.dumps(payload).encode("utf-8")),
|
| 467 |
+
)
|
| 468 |
+
],
|
| 469 |
+
commit_message=f"Log email: {sanitized_email}",
|
| 470 |
+
token=HF_TOKEN,
|
| 471 |
+
)
|
| 472 |
+
except Exception as exc:
|
| 473 |
+
print("Failed to log email to Hugging Face:", exc, file=sys.stderr)
|
| 474 |
raise HTTPException(500, "Failed to save email address")
|
| 475 |
+
return {"stored": True, "path": key}
|