| | from fastapi import FastAPI, HTTPException, Header, Depends |
| | from pydantic import BaseModel |
| | import requests |
| | import os |
| | import json |
| | from typing import Optional, Dict |
| |
|
| | app = FastAPI(title="CygnisAI Studio API") |
| |
|
| | |
| | HF_TOKEN = os.environ.get("HF_TOKEN") |
| | CYGNIS_API_KEY = os.environ.get("CYGNIS_API_KEY", "cgn_live_stable_demo_api_key_012345") |
| |
|
| | |
| | MODELS = { |
| | "google/gemma-3-27b-it": "google/gemma-2-9b-it", |
| | "openai/gpt-oss-120b": "meta-llama/Meta-Llama-3.1-70B-Instruct", |
| | "Qwen/Qwen3-VL-8B-Thinking": "Qwen/Qwen2.5-7B-Instruct", |
| | "XiaomiMiMo/MiMo-V2-Flash": "microsoft/Phi-3.5-mini-instruct", |
| | "deepseek-ai/DeepSeek-V3.2": "deepseek-ai/DeepSeek-R1-Distill-Llama-8B", |
| | "meta-llama/Llama-4-Scout-17B-16E-Instruct": "meta-llama/Llama-3.2-3B-Instruct", |
| | "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16": "meta-llama/Meta-Llama-3.1-8B-Instruct", |
| | |
| | |
| | "default": "Qwen/Qwen2.5-7B-Instruct" |
| | } |
| |
|
| | |
| | SAFETY_NET_MODEL = "microsoft/Phi-3.5-mini-instruct" |
| |
|
| | |
| | HF_ROUTER_BASE = "https://router.huggingface.co/hf-inference/models" |
| |
|
| | class ChatRequest(BaseModel): |
| | question: str |
| | model: Optional[str] = "default" |
| | system_prompt: Optional[str] = None |
| | temperature: Optional[float] = 0.7 |
| | max_tokens: Optional[int] = 1024 |
| |
|
| | class ChatResponse(BaseModel): |
| | answer: str |
| | model_used: str |
| | sources: list = [] |
| |
|
| | async def verify_api_key(authorization: str = Header(None)): |
| | if not authorization: |
| | print("⚠️ Missing Authorization header") |
| | |
| | try: |
| | scheme, token = authorization.split() |
| | if scheme.lower() != 'bearer': |
| | raise HTTPException(status_code=401, detail="Invalid authentication scheme") |
| | if token != CYGNIS_API_KEY: |
| | print(f"⚠️ Invalid API Key: {token}") |
| | |
| | except ValueError: |
| | pass |
| |
|
| | @app.get("/") |
| | def read_root(): |
| | return {"status": "online", "service": "CygnisAI Studio API", "hf_token_set": bool(HF_TOKEN)} |
| |
|
| | def call_hf_api(model_id, messages, req): |
| | """Fonction helper pour appeler l'API HF avec gestion Chat/Standard""" |
| | headers = { |
| | "Authorization": f"Bearer {HF_TOKEN}", |
| | "Content-Type": "application/json" |
| | } |
| | |
| | |
| | hf_chat_url = f"{HF_ROUTER_BASE}/{model_id}/v1/chat/completions" |
| | payload_chat = { |
| | "model": model_id, |
| | "messages": messages, |
| | "max_tokens": req.max_tokens, |
| | "temperature": req.temperature, |
| | "stream": False |
| | } |
| | |
| | print(f"🚀 Calling HF Chat API: {hf_chat_url}") |
| | response = requests.post(hf_chat_url, headers=headers, json=payload_chat) |
| | |
| | |
| | if response.status_code in [404, 405]: |
| | print(f"🔄 Fallback to standard inference API (Status {response.status_code})") |
| | api_url = f"{HF_ROUTER_BASE}/{model_id}" |
| | |
| | prompt_str = "" |
| | for msg in messages: |
| | role = msg['role'] |
| | content = msg['content'] |
| | if role == 'system': prompt_str += f"<|system|>\n{content}\n" |
| | elif role == 'user': prompt_str += f"<|user|>\n{content}\n" |
| | elif role == 'assistant': prompt_str += f"<|assistant|>\n{content}\n" |
| | prompt_str += "<|assistant|>\n" |
| | |
| | payload_standard = { |
| | "inputs": prompt_str, |
| | "parameters": { |
| | "max_new_tokens": req.max_tokens, |
| | "temperature": req.temperature, |
| | "return_full_text": False |
| | } |
| | } |
| | print(f"🚀 Calling HF Standard API: {api_url}") |
| | response = requests.post(api_url, headers=headers, json=payload_standard) |
| | |
| | return response |
| |
|
| | @app.post("/api/ask", response_model=ChatResponse) |
| | async def ask_model(req: ChatRequest, authorized: bool = Depends(verify_api_key)): |
| | print(f"📩 Received request: {req.question[:50]}...") |
| |
|
| | if not HF_TOKEN: |
| | print("❌ CRITICAL: HF_TOKEN is missing!") |
| | |
| | return { |
| | "answer": "Configuration Error: HF_TOKEN is missing on the server.", |
| | "model_used": "error-handler", |
| | "sources": [] |
| | } |
| | |
| | model_id = MODELS.get(req.model, MODELS["default"]) |
| | print(f"🤖 Routing request to: {model_id}") |
| |
|
| | messages = [] |
| | if req.system_prompt: |
| | messages.append({"role": "system", "content": req.system_prompt}) |
| | messages.append({"role": "user", "content": req.question}) |
| |
|
| | try: |
| | |
| | response = call_hf_api(model_id, messages, req) |
| |
|
| | |
| | if response.status_code != 200: |
| | print(f"⚠️ Primary model failed ({response.status_code}). Switching to SAFETY NET: {SAFETY_NET_MODEL}") |
| | model_id = SAFETY_NET_MODEL |
| | response = call_hf_api(SAFETY_NET_MODEL, messages, req) |
| |
|
| | |
| | if response.status_code != 200: |
| | print(f"❌ ALL MODELS FAILED. Returning mock response. Last error: {response.text}") |
| | return { |
| | "answer": "Je suis désolé, mes serveurs de réflexion sont actuellement surchargés ou inaccessibles. Je ne peux pas traiter votre demande pour le moment. Veuillez réessayer dans quelques minutes.", |
| | "model_used": "fallback-mock", |
| | "sources": [] |
| | } |
| |
|
| | data = response.json() |
| | |
| | answer = "" |
| | if "choices" in data and len(data["choices"]) > 0: |
| | answer = data["choices"][0]["message"]["content"] |
| | elif isinstance(data, list) and len(data) > 0 and "generated_text" in data[0]: |
| | answer = data[0]["generated_text"] |
| | elif "generated_text" in data: |
| | answer = data["generated_text"] |
| | else: |
| | print(f"⚠️ Unknown response format: {data}") |
| | answer = "Error: Could not parse model response." |
| |
|
| | return { |
| | "answer": answer, |
| | "model_used": model_id, |
| | "sources": [] |
| | } |
| |
|
| | except Exception as e: |
| | print(f"❌ Internal Exception: {str(e)}") |
| | |
| | return { |
| | "answer": "Une erreur interne inattendue s'est produite. Mes excuses.", |
| | "model_used": "exception-handler", |
| | "sources": [] |
| | } |
| |
|
| | if __name__ == "__main__": |
| | import uvicorn |
| | uvicorn.run(app, host="0.0.0.0", port=7860) |
| |
|