Spaces:
Sleeping
Sleeping
File size: 5,316 Bytes
3177683 1bc8c61 3177683 1bc8c61 3177683 c413c42 3177683 845b7b0 3177683 845b7b0 3177683 845b7b0 e453bf9 c413c42 845b7b0 3177683 c413c42 3177683 c413c42 3177683 845b7b0 3177683 1bc8c61 3177683 d60f5f6 c413c42 3177683 1bc8c61 3177683 1bc8c61 3177683 2ec7684 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 | """
Text Feature Extraction β Hugging Face Inference Endpoint Handler
Extracts all 9 text features from conversation transcript:
t0_explicit_free, t1_explicit_busy, t2_avg_resp_len, t3_short_ratio,
t4_cognitive_load, t5_time_pressure, t6_deflection, t7_sentiment,
t8_coherence, t9_latency
Derived from: src/text_features.py
"""
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
# Imports from standardized modules
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
try:
from text_features import TextFeatureExtractor
except ImportError:
import sys
sys.path.append('.')
from text_features import TextFeatureExtractor
# Initialize global extractor
print("[INFO] Initializing Global TextFeatureExtractor...")
# Preload models to avoid first-request latency in the Space runtime.
extractor = TextFeatureExtractor(use_intent_model=True, preload=True)
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
# FastAPI handler for deployment
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from pydantic import BaseModel
from typing import Optional, List, Dict
import traceback
import numpy as np
import os
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
# Constants & Defaults
# ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ #
DEFAULT_TEXT_FEATURES = {
"t0_explicit_free": 0.0, "t1_explicit_busy": 0.0,
"t2_avg_resp_len": 0.0, "t3_short_ratio": 0.0,
"t4_cognitive_load": 0.0, "t5_time_pressure": 0.0,
"t6_deflection": 0.0, "t7_sentiment": 0.0,
"t8_coherence": 0.5, "t9_latency": 0.0,
}
app = FastAPI(title="Text Feature Extraction API", version="1.0.0")
def _cors_origins_from_env() -> list[str]:
raw = (os.getenv("ALLOWED_ORIGINS") or "").strip()
if not raw:
return ["*"]
return [o.strip() for o in raw.split(",") if o.strip()]
_cors_origins = _cors_origins_from_env()
app.add_middleware(
CORSMiddleware,
allow_origins=_cors_origins,
# Browsers reject: Access-Control-Allow-Origin="*" with credentials=true.
allow_credentials=("*" not in _cors_origins),
allow_methods=["*"], allow_headers=["*"],
)
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
print(f"[GLOBAL ERROR] {request.url}: {exc}")
traceback.print_exc()
return JSONResponse(
status_code=200,
content={**DEFAULT_TEXT_FEATURES, "_error": str(exc), "_handler": "global"},
)
class TextRequest(BaseModel):
transcript: str = ""
# Optional list of extra utterances if available
utterances: List[str] = []
question: str = ""
events: Optional[List[Dict]] = None
@app.get("/")
async def root():
return {
"service": "Text Feature Extraction API",
"version": "1.0.0",
"endpoints": ["/health", "/extract-text-features"],
}
@app.get("/health")
async def health():
return {
"status": "healthy",
"intent_model_loaded": extractor.use_intent_model,
"models_preloaded": True,
}
@app.post("/extract-text-features")
async def extract_text_features(data: TextRequest):
"""Extract all 9 text features from transcript."""
# Prepare inputs for TextFeatureExtractor.extract_all
# It expects: transcript_list, full_transcript, question, events
transcript_list = data.utterances
if not transcript_list and data.transcript:
transcript_list = [data.transcript]
features = extractor.extract_all(
transcript_list=transcript_list,
full_transcript=data.transcript,
question=data.question,
events=data.events,
)
# Sanitize inputs to ensure floats
sanitized = {}
for k, v in features.items():
if isinstance(v, float):
sanitized[k] = 0.0 if np.isnan(v) or np.isinf(v) else v
else:
sanitized[k] = v
return sanitized
if __name__ == "__main__":
import uvicorn
import os
port = int(os.environ.get("PORT", 7860))
uvicorn.run(app, host="0.0.0.0", port=port)
|