| from huggingface_hub import InferenceClient | |
| import os | |
| class Mistral_7b: | |
| def __init__(self, token): | |
| self.client = InferenceClient(api_key=token) | |
| # Provider-suffixed ids (e.g. :featherless-ai) are not valid HF repo ids. | |
| # Keep a sane default and allow override via env for experimentation. | |
| self.model_id = os.getenv("MISTRAL_MODEL_ID", "mistralai/Mistral-7B-Instruct-v0.2") | |
| def generate_stream(self, prompt, max_tokens=1500, temperature=0.1): | |
| try: | |
| stream = self.client.chat.completions.create( | |
| model=self.model_id, | |
| messages=[{"role": "user", "content": prompt}], | |
| max_tokens=max_tokens, | |
| temperature=temperature, | |
| stream=True, | |
| ) | |
| for chunk in stream: | |
| if chunk.choices and chunk.choices[0].delta.content: | |
| content = chunk.choices[0].delta.content | |
| yield content | |
| except Exception as e: | |
| yield f" Mistral Featherless Error: {e}" | |
| def generate(self, prompt, max_tokens=500, temperature=0.1): | |
| return "".join(self.generate_stream(prompt, max_tokens=max_tokens, temperature=temperature)) |