Spaces:
Running
Running
fix: Simplify Dockerfile and add startup logging for HF Space debugging
Browse files- Dockerfile +12 -15
- Dockerfile.minimal +13 -0
- server/api.py +62 -7
Dockerfile
CHANGED
|
@@ -2,32 +2,29 @@ FROM python:3.12-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
# Install system
|
| 6 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
gcc g++ curl && \
|
| 8 |
rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
#
|
| 11 |
COPY requirements.txt .
|
| 12 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
|
| 14 |
-
# Download spaCy model
|
| 15 |
-
RUN python -m spacy download en_core_web_sm
|
|
|
|
|
|
|
| 16 |
|
| 17 |
-
# Copy
|
| 18 |
COPY . .
|
| 19 |
|
| 20 |
-
#
|
| 21 |
-
RUN chmod +x start.sh
|
| 22 |
-
|
| 23 |
-
# Create output dir
|
| 24 |
-
ENV OUTPUT_DIR=/tmp/geo-output
|
| 25 |
RUN mkdir -p /tmp/geo-output
|
|
|
|
| 26 |
|
| 27 |
-
#
|
| 28 |
-
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
|
| 29 |
-
CMD curl -f http://localhost:7860/health || exit 1
|
| 30 |
-
|
| 31 |
EXPOSE 7860
|
| 32 |
|
| 33 |
-
|
|
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
+
# Install system dependencies
|
| 6 |
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 7 |
gcc g++ curl && \
|
| 8 |
rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
+
# Copy and install Python dependencies
|
| 11 |
COPY requirements.txt .
|
| 12 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 13 |
|
| 14 |
+
# Download spaCy model (with retry)
|
| 15 |
+
RUN python -m spacy download en_core_web_sm || \
|
| 16 |
+
python -m spacy download en_core_web_sm || \
|
| 17 |
+
echo "spaCy model download failed, will retry at startup"
|
| 18 |
|
| 19 |
+
# Copy application code
|
| 20 |
COPY . .
|
| 21 |
|
| 22 |
+
# Create output directory
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
RUN mkdir -p /tmp/geo-output
|
| 24 |
+
ENV OUTPUT_DIR=/tmp/geo-output
|
| 25 |
|
| 26 |
+
# Expose port
|
|
|
|
|
|
|
|
|
|
| 27 |
EXPOSE 7860
|
| 28 |
|
| 29 |
+
# Start application
|
| 30 |
+
CMD ["python", "-m", "uvicorn", "server.api:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "info"]
|
Dockerfile.minimal
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.12-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# Minimal dependencies
|
| 6 |
+
RUN pip install --no-cache-dir fastapi uvicorn
|
| 7 |
+
|
| 8 |
+
# Create minimal app
|
| 9 |
+
RUN echo 'from fastapi import FastAPI\napp = FastAPI()\n@app.get("/")\ndef read_root():\n return {"status": "ok", "message": "GEO Platform Running"}\n@app.get("/health")\ndef health():\n return {"status": "healthy"}' > main.py
|
| 10 |
+
|
| 11 |
+
EXPOSE 7860
|
| 12 |
+
|
| 13 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
|
server/api.py
CHANGED
|
@@ -12,26 +12,75 @@ from dotenv import load_dotenv
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# lazily import heavy pipeline to avoid startup failures when optional deps
|
| 16 |
# (like spaCy) are not installed. import inside handlers that need it.
|
| 17 |
run_pipeline = None
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
from server import
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
from fastapi import WebSocket
|
| 25 |
import asyncio
|
| 26 |
|
| 27 |
OUTPUT_DIR = Path(os.environ.get('OUTPUT_DIR', str(Path(__file__).resolve().parent.parent / 'output')))
|
| 28 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 29 |
|
|
|
|
|
|
|
| 30 |
app = FastAPI(title='GEO Platform API')
|
| 31 |
|
|
|
|
|
|
|
| 32 |
# Serve frontend static files
|
| 33 |
frontend_dir = Path(__file__).resolve().parent.parent / 'frontend'
|
| 34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
class CrawlRequest(BaseModel):
|
| 37 |
url: str
|
|
@@ -342,6 +391,12 @@ async def api_results(ts: str | None = None):
|
|
| 342 |
out['schema'] = schema_path.read_text(encoding='utf-8')
|
| 343 |
return out
|
| 344 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
@app.get('/health')
|
| 346 |
async def health_check():
|
| 347 |
return {'status': 'healthy', 'service': 'GEO Platform'}
|
|
|
|
| 12 |
|
| 13 |
load_dotenv()
|
| 14 |
|
| 15 |
+
print(" GEO Platform API starting...")
|
| 16 |
+
print(f" Working directory: {os.getcwd()}")
|
| 17 |
+
print(f" Python version: {os.sys.version}")
|
| 18 |
+
|
| 19 |
# lazily import heavy pipeline to avoid startup failures when optional deps
|
| 20 |
# (like spaCy) are not installed. import inside handlers that need it.
|
| 21 |
run_pipeline = None
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
from server import ai_visibility
|
| 25 |
+
print(" ai_visibility loaded")
|
| 26 |
+
except Exception as e:
|
| 27 |
+
print(f" ai_visibility failed: {e}")
|
| 28 |
+
ai_visibility = None
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
from server import ai_analysis
|
| 32 |
+
print(" ai_analysis loaded")
|
| 33 |
+
except Exception as e:
|
| 34 |
+
print(f" ai_analysis failed: {e}")
|
| 35 |
+
ai_analysis = None
|
| 36 |
+
|
| 37 |
+
try:
|
| 38 |
+
from server import job_queue
|
| 39 |
+
print(" job_queue loaded")
|
| 40 |
+
except Exception as e:
|
| 41 |
+
print(f" job_queue failed: {e}")
|
| 42 |
+
job_queue = None
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
from server import keyword_engine
|
| 46 |
+
print(" keyword_engine loaded")
|
| 47 |
+
except Exception as e:
|
| 48 |
+
print(f" keyword_engine failed: {e}")
|
| 49 |
+
keyword_engine = None
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
from server import users as user_mgmt
|
| 53 |
+
print(" users loaded")
|
| 54 |
+
except Exception as e:
|
| 55 |
+
print(f" users failed: {e}")
|
| 56 |
+
user_mgmt = None
|
| 57 |
+
|
| 58 |
+
try:
|
| 59 |
+
from server import search_intel
|
| 60 |
+
print(" search_intel loaded")
|
| 61 |
+
except Exception as e:
|
| 62 |
+
print(f" search_intel failed: {e}")
|
| 63 |
+
search_intel = None
|
| 64 |
+
|
| 65 |
from fastapi import WebSocket
|
| 66 |
import asyncio
|
| 67 |
|
| 68 |
OUTPUT_DIR = Path(os.environ.get('OUTPUT_DIR', str(Path(__file__).resolve().parent.parent / 'output')))
|
| 69 |
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 70 |
|
| 71 |
+
print(f" Output directory: {OUTPUT_DIR}")
|
| 72 |
+
|
| 73 |
app = FastAPI(title='GEO Platform API')
|
| 74 |
|
| 75 |
+
print(" FastAPI app created")
|
| 76 |
+
|
| 77 |
# Serve frontend static files
|
| 78 |
frontend_dir = Path(__file__).resolve().parent.parent / 'frontend'
|
| 79 |
+
if frontend_dir.exists():
|
| 80 |
+
app.mount('/static', StaticFiles(directory=str(frontend_dir)), name='static')
|
| 81 |
+
print(f" Frontend mounted: {frontend_dir}")
|
| 82 |
+
else:
|
| 83 |
+
print(f" Frontend directory not found: {frontend_dir}")
|
| 84 |
|
| 85 |
class CrawlRequest(BaseModel):
|
| 86 |
url: str
|
|
|
|
| 391 |
out['schema'] = schema_path.read_text(encoding='utf-8')
|
| 392 |
return out
|
| 393 |
|
| 394 |
+
@app.on_event("startup")
|
| 395 |
+
async def startup_event():
|
| 396 |
+
print(" GEO Platform API is ready!")
|
| 397 |
+
print(f" Access at: http://0.0.0.0:7860")
|
| 398 |
+
print(f" Health check: http://0.0.0.0:7860/health")
|
| 399 |
+
|
| 400 |
@app.get('/health')
|
| 401 |
async def health_check():
|
| 402 |
return {'status': 'healthy', 'service': 'GEO Platform'}
|