| """ |
| HuggingFace Spaces Deployment for Stack 2.9 |
| |
| Free inference API on HuggingFace Spaces. |
| https://huggingface.co/docs/hub/spaces-sdks-docker |
| """ |
|
|
| |
| |
| |
| |
|
|
| import os |
| import json |
| from typing import Optional, List, Dict |
| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| import requests |
|
|
| app = FastAPI(title="Stack 2.9 API") |
|
|
| |
| MODEL_NAME = os.environ.get("MODEL_NAME", "Qwen/Qwen2.5-Coder-7B-Instruct") |
| API_URL = os.environ.get("API_URL", "") |
| HF_TOKEN = os.environ.get("HF_TOKEN", "") |
|
|
| |
| |
| |
|
|
| class ChatMessage(BaseModel): |
| role: str |
| content: str |
|
|
| class ChatRequest(BaseModel): |
| messages: List[ChatMessage] |
| max_tokens: int = 1024 |
| temperature: float = 0.7 |
| top_p: float = 0.9 |
|
|
| class ChatResponse(BaseModel): |
| content: str |
| model: str |
| usage: Optional[Dict] = None |
|
|
| class CompletionRequest(BaseModel): |
| prompt: str |
| max_tokens: int = 512 |
| temperature: float = 0.7 |
|
|
| |
| |
| |
|
|
| @app.get("/health") |
| async def health(): |
| return {"status": "healthy", "model": MODEL_NAME} |
|
|
| @app.get("/") |
| async def root(): |
| return { |
| "name": "Stack 2.9", |
| "version": "1.0.0", |
| "model": MODEL_NAME, |
| "endpoints": { |
| "chat": "/v1/chat/completions", |
| "complete": "/v1/completions", |
| "health": "/health" |
| } |
| } |
|
|
| |
| |
| |
|
|
| @app.post("/v1/chat/completions", response_model=ChatResponse) |
| async def chat_completions(request: ChatRequest): |
| """OpenAI-compatible chat endpoint""" |
|
|
| if API_URL: |
| |
| response = requests.post( |
| f"{API_URL}/v1/chat/completions", |
| headers={"Authorization": f"Bearer {HF_TOKEN}"}, |
| json={ |
| "messages": [m.dict() for m in request.messages], |
| "max_tokens": request.max_tokens, |
| "temperature": request.temperature, |
| }, |
| timeout=60 |
| ) |
| return response.json() |
|
|
| |
| raise HTTPException( |
| status_code=503, |
| detail="No model API configured. Set API_URL environment variable." |
| ) |
|
|
| @app.post("/v1/completions") |
| async def completions(request: CompletionRequest): |
| """OpenAI-compatible completion endpoint""" |
|
|
| if API_URL: |
| response = requests.post( |
| f"{API_URL}/v1/completions", |
| headers={"Authorization": f"Bearer {HF_TOKEN}"}, |
| json={ |
| "prompt": request.prompt, |
| "max_tokens": request.max_tokens, |
| "temperature": request.temperature, |
| }, |
| timeout=60 |
| ) |
| return response.json() |
|
|
| raise HTTPException( |
| status_code=503, |
| detail="No model API configured" |
| ) |
|
|
| |
| |
| |
|
|
| @app.get("/v1/models") |
| async def list_models(): |
| return { |
| "object": "list", |
| "data": [ |
| { |
| "id": MODEL_NAME, |
| "object": "model", |
| "created": 1700000000, |
| "owned_by": "stack-2.9" |
| } |
| ] |
| } |
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| port = int(os.environ.get("PORT", "7860")) |
| uvicorn.run(app, host="0.0.0.0", port=port) |