faizr206 commited on
Commit
5fe40ea
·
1 Parent(s): 8b8fced

email to dataset

Browse files
Files changed (2) hide show
  1. Dockerfile +1 -1
  2. app/main.py +45 -9
Dockerfile CHANGED
@@ -6,7 +6,7 @@ COPY app /app
6
  # App deps only (no LaTeX)
7
  RUN python -m pip install --upgrade pip && \
8
  python -m pip install --no-cache-dir \
9
- fastapi "uvicorn[standard]" pydantic python-dotenv google-genai
10
 
11
  ENV PORT=7860
12
  EXPOSE 7860
 
6
  # App deps only (no LaTeX)
7
  RUN python -m pip install --upgrade pip && \
8
  python -m pip install --no-cache-dir \
9
+ fastapi "uvicorn[standard]" pydantic python-dotenv google-genai huggingface-hub
10
 
11
  ENV PORT=7860
12
  EXPOSE 7860
app/main.py CHANGED
@@ -1,4 +1,5 @@
1
- import os, re, uuid, subprocess, sys, time, traceback, threading
 
2
  from collections import deque
3
  from pathlib import Path
4
  from typing import Optional, Tuple
@@ -7,6 +8,8 @@ from fastapi import FastAPI, HTTPException, Response
7
  from fastapi.middleware.cors import CORSMiddleware
8
  from pydantic import BaseModel, validator
9
 
 
 
10
  # Optional .env for local testing
11
  from dotenv import load_dotenv
12
  load_dotenv()
@@ -34,8 +37,23 @@ app.add_middleware(
34
 
35
 
36
  RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
37
- DATA_DIR = Path("data"); DATA_DIR.mkdir(parents=True, exist_ok=True)
38
- EMAILS_FILE = DATA_DIR / "emails.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  # ---------------- simple 10 RPM rate limiter ----------------
41
  class RateLimiter:
@@ -394,7 +412,7 @@ class EmailIn(BaseModel):
394
 
395
  @validator("email")
396
  def validate_email(cls, value: str) -> str:
397
- cleaned = value.strip()
398
  if not cleaned:
399
  raise ValueError("Email cannot be empty")
400
  if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", cleaned):
@@ -429,11 +447,29 @@ def generate_and_render(inp: PromptIn):
429
 
430
  @app.post("/store-email")
431
  def store_email(email: EmailIn):
432
- """Append the provided email address to a server-side text file."""
 
 
 
433
  sanitized_email = email.sanitized
 
 
 
 
434
  try:
435
- with EMAILS_FILE.open("a", encoding="utf-8") as fh:
436
- fh.write(f"{sanitized_email}\n")
437
- except Exception:
 
 
 
 
 
 
 
 
 
 
 
438
  raise HTTPException(500, "Failed to save email address")
439
- return {"stored": True}
 
1
+ import json, os, re, uuid, subprocess, sys, time, traceback, threading
2
+ from io import BytesIO
3
  from collections import deque
4
  from pathlib import Path
5
  from typing import Optional, Tuple
 
8
  from fastapi.middleware.cors import CORSMiddleware
9
  from pydantic import BaseModel, validator
10
 
11
+ from huggingface_hub import HfApi, create_repo, CommitOperationAdd
12
+
13
  # Optional .env for local testing
14
  from dotenv import load_dotenv
15
  load_dotenv()
 
37
 
38
 
39
  RUNS = Path("runs"); RUNS.mkdir(parents=True, exist_ok=True)
40
+
41
+ HF_DATASET_ID = os.getenv("HF_DATASET_ID", "MathFrames/email-log")
42
+ HF_TOKEN = os.getenv("HF_TOKEN", "")
43
+
44
+ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
45
+ if hf_api:
46
+ try:
47
+ create_repo(
48
+ HF_DATASET_ID,
49
+ repo_type="dataset",
50
+ private=True,
51
+ exist_ok=True,
52
+ token=HF_TOKEN,
53
+ )
54
+ except Exception:
55
+ # Ignore startup race/permission errors; individual writes will surface issues.
56
+ pass
57
 
58
  # ---------------- simple 10 RPM rate limiter ----------------
59
  class RateLimiter:
 
412
 
413
  @validator("email")
414
  def validate_email(cls, value: str) -> str:
415
+ cleaned = value.strip().lower()
416
  if not cleaned:
417
  raise ValueError("Email cannot be empty")
418
  if not re.match(r"^[^@\s]+@[^@\s]+\.[^@\s]+$", cleaned):
 
447
 
448
  @app.post("/store-email")
449
  def store_email(email: EmailIn):
450
+ """Store the provided email address in the configured Hugging Face dataset."""
451
+ if not hf_api or not HF_TOKEN:
452
+ raise HTTPException(500, "Email logging is not configured")
453
+
454
  sanitized_email = email.sanitized
455
+ timestamp = int(time.time())
456
+ key = f"emails/{int(time.time() * 1000)}-{uuid.uuid4().hex}.json"
457
+ payload = {"email": sanitized_email, "ts": timestamp}
458
+
459
  try:
460
+ hf_api.create_commit(
461
+ repo_id=HF_DATASET_ID,
462
+ repo_type="dataset",
463
+ operations=[
464
+ CommitOperationAdd(
465
+ path_in_repo=key,
466
+ path_or_fileobj=BytesIO(json.dumps(payload).encode("utf-8")),
467
+ )
468
+ ],
469
+ commit_message=f"Log email: {sanitized_email}",
470
+ token=HF_TOKEN,
471
+ )
472
+ except Exception as exc:
473
+ print("Failed to log email to Hugging Face:", exc, file=sys.stderr)
474
  raise HTTPException(500, "Failed to save email address")
475
+ return {"stored": True, "path": key}