NLP-RAG / models /mistral_7b.py
Qar-Raz's picture
hf-space: deploy branch without frontend/data/results
c7256ee
from huggingface_hub import InferenceClient
import os
class Mistral_7b:
def __init__(self, token):
self.client = InferenceClient(api_key=token)
# Provider-suffixed ids (e.g. :featherless-ai) are not valid HF repo ids.
# Keep a sane default and allow override via env for experimentation.
self.model_id = os.getenv("MISTRAL_MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.2")
def generate_stream(self, prompt, max_tokens=1500, temperature=0.1):
try:
stream = self.client.chat.completions.create(
model=self.model_id,
messages=[{"role": "user", "content": prompt}],
max_tokens=max_tokens,
temperature=temperature,
stream=True,
)
for chunk in stream:
if chunk.choices and chunk.choices[0].delta.content:
content = chunk.choices[0].delta.content
yield content
except Exception as e:
yield f" Mistral Featherless Error: {e}"
def generate(self, prompt, max_tokens=500, temperature=0.1):
return "".join(self.generate_stream(prompt, max_tokens=max_tokens, temperature=temperature))