walidsobhie-code
feat: Add free deployment support for Stack 2.9
239da7a
"""
HuggingFace Spaces Deployment for Stack 2.9
Free inference API on HuggingFace Spaces.
https://huggingface.co/docs/hub/spaces-sdks-docker
"""
# =============================================================================
# app.py - Stack 2.9 Inference API
# Deploy this to HuggingFace Spaces for free inference
# =============================================================================
import os
import json
from typing import Optional, List, Dict
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import requests
app = FastAPI(title="Stack 2.9 API")
# Model configuration
MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-Coder-7B-Instruct")
API_URL = os.environ.get("API_URL", "") # Your model API URL
HF_TOKEN = os.environ.get("HF_TOKEN", "") # HuggingFace token
# ============================================================================
# Request/Response Models
# ============================================================================
class ChatMessage(BaseModel):
role: str
content: str
class ChatRequest(BaseModel):
messages: List[ChatMessage]
max_tokens: int = 1024
temperature: float = 0.7
top_p: float = 0.9
class ChatResponse(BaseModel):
content: str
model: str
usage: Optional[Dict] = None
class CompletionRequest(BaseModel):
prompt: str
max_tokens: int = 512
temperature: float = 0.7
# ============================================================================
# Health Check
# ============================================================================
@app.get("/health")
async def health():
return {"status": "healthy", "model": MODEL_NAME}
@app.get("/")
async def root():
return {
"name": "Stack 2.9",
"version": "1.0.0",
"model": MODEL_NAME,
"endpoints": {
"chat": "/v1/chat/completions",
"complete": "/v1/completions",
"health": "/health"
}
}
# ============================================================================
# OpenAI-Compatible API
# ============================================================================
@app.post("/v1/chat/completions", response_model=ChatResponse)
async def chat_completions(request: ChatRequest):
"""OpenAI-compatible chat endpoint"""
if API_URL:
# Use external API
response = requests.post(
f"{API_URL}/v1/chat/completions",
headers={"Authorization": f"Bearer {HF_TOKEN}"},
json={
"messages": [m.dict() for m in request.messages],
"max_tokens": request.max_tokens,
"temperature": request.temperature,
},
timeout=60
)
return response.json()
# Placeholder for local model
raise HTTPException(
status_code=503,
detail="No model API configured. Set API_URL environment variable."
)
@app.post("/v1/completions")
async def completions(request: CompletionRequest):
"""OpenAI-compatible completion endpoint"""
if API_URL:
response = requests.post(
f"{API_URL}/v1/completions",
headers={"Authorization": f"Bearer {HF_TOKEN}"},
json={
"prompt": request.prompt,
"max_tokens": request.max_tokens,
"temperature": request.temperature,
},
timeout=60
)
return response.json()
raise HTTPException(
status_code=503,
detail="No model API configured"
)
# ============================================================================
# Model Info
# ============================================================================
@app.get("/v1/models")
async def list_models():
return {
"object": "list",
"data": [
{
"id": MODEL_NAME,
"object": "model",
"created": 1700000000,
"owned_by": "stack-2.9"
}
]
}
# ============================================================================
# Run Server
# ============================================================================
if __name__ == "__main__":
import uvicorn
port = int(os.environ.get("PORT", "7860"))
uvicorn.run(app, host="0.0.0.0", port=port)