Spaces:
Running
Running
| """ | |
| LLM module. HuggingFace Inference API as primary, OpenRouter as fallback. | |
| """ | |
| import os | |
| import logging | |
| from dotenv import load_dotenv | |
| from tenacity import retry, stop_after_attempt, wait_exponential | |
| load_dotenv() | |
| logger = logging.getLogger(__name__) | |
| # ── OpenRouter (primary) ────────────────────────────────── | |
| _openrouter_client = None | |
| _openrouter_models = [ | |
| "arcee-ai/trinity-large-preview:free", | |
| "nvidia/nemotron-3-super-120b-a12b:free", | |
| "nvidia/nemotron-3-nano-30b-a3b:free" | |
| ] | |
| # ── HuggingFace Inference API (fallback) ────────────────── | |
| _hf_client = None | |
| def _init_openrouter(): | |
| global _openrouter_client | |
| api_key = os.getenv("OPENROUTER_API_KEY" ) | |
| if not api_key: | |
| logger.warning("OPENROUTER_API_KEY not set — OpenRouter disabled") | |
| return False | |
| try: | |
| from openai import OpenAI | |
| _openrouter_client = OpenAI( | |
| base_url="https://openrouter.ai/api/v1", | |
| api_key=api_key, | |
| ) | |
| logger.info("OpenRouter ready (fallback)") | |
| return True | |
| except Exception as e: | |
| logger.error(f"OpenRouter init failed: {e}") | |
| return False | |
| def _init_hf(): | |
| global _hf_client | |
| token = os.getenv("HF_TOKEN") | |
| if not token: | |
| logger.warning("HF_TOKEN not set — HF Inference API disabled") | |
| return False | |
| try: | |
| from huggingface_hub import InferenceClient | |
| _hf_client = InferenceClient( | |
| model="meta-llama/Llama-3.3-70B-Instruct", | |
| token=token | |
| ) | |
| logger.info("HF Inference API ready (primary)") | |
| return True | |
| except Exception as e: | |
| logger.error(f"HF Inference API init failed: {e}") | |
| return False | |
| _openrouter_ready = _init_openrouter() | |
| _hf_ready = _init_hf() | |
| def _call_openrouter(messages: list) -> str: | |
| """Call OpenRouter with fallback across multiple models.""" | |
| for model in _openrouter_models: | |
| try: | |
| response = _openrouter_client.chat.completions.create( | |
| model=model, | |
| messages=messages, | |
| max_tokens=1500, | |
| temperature=0.3, | |
| ) | |
| logger.info(f"OpenRouter success with {model}") | |
| return response.choices[0].message.content | |
| except Exception as e: | |
| logger.warning(f"OpenRouter model {model} failed: {e}") | |
| continue | |
| raise Exception("All OpenRouter models failed") | |
| def _call_hf(messages: list) -> str: | |
| """Call HuggingFace Inference API.""" | |
| response = _hf_client.chat_completion( | |
| messages=messages, | |
| max_tokens=1500, | |
| temperature=0.3, | |
| ) | |
| return response.choices[0].message.content | |
| def _call_with_fallback(messages: list) -> str: | |
| """Try HF first, then OpenRouter.""" | |
| if _hf_ready and _hf_client: | |
| try: | |
| return _call_hf(messages) | |
| except Exception as e: | |
| logger.warning(f"HF Inference failed: {e}, trying OpenRouter") | |
| if _openrouter_ready and _openrouter_client: | |
| try: | |
| return _call_openrouter(messages) | |
| except Exception as e: | |
| logger.error(f"OpenRouter also failed: {e}") | |
| raise Exception("All LLM providers failed") | |
| def call_llm_raw(messages: list) -> str: | |
| """ | |
| Call LLM with pre-built messages list. | |
| Used by V2 agent for Pass 1 and Pass 3. | |
| """ | |
| return _call_with_fallback(messages) | |
| def call_llm(query: str, context: str) -> str: | |
| """ | |
| Call LLM with query and context. | |
| Used by V1 agent. | |
| """ | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are NyayaSetu, an Indian legal research assistant. Answer only from provided excerpts. Cite judgment IDs. End with: NOTE: This is not legal advice." | |
| }, | |
| { | |
| "role": "user", | |
| "content": f"QUESTION: {query}\n\nSOURCES:\n{context}\n\nAnswer based on sources. Cite judgment IDs." | |
| } | |
| ] | |
| return _call_with_fallback(messages) |