Spaces:
Sleeping
Sleeping
| # Vision tool using Groq's Meta-Llama Scout model | |
| from smolagents import tool | |
| from groq import Groq | |
| import os | |
| def _llama_analyze(image_b64: str, prompt: str) -> str: | |
| """Internal helper to query the Llama vision model.""" | |
| client = Groq(api_key=os.environ.get("GROQ_API_KEY")) | |
| messages = [ | |
| { | |
| "role": "user", | |
| "content": [ | |
| {"type": "text", "text": prompt}, | |
| {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{image_b64}"}}, | |
| ], | |
| } | |
| ] | |
| response = client.chat.completions.create( | |
| #model="meta-llama/llama-4-scout-17b-16e-instruct", | |
| model ="qwen/qwen-qwq-32b", | |
| messages=messages, | |
| stream=False, | |
| max_completion_tokens=512, | |
| ) | |
| return response.choices[0].message.content | |
| def image_reasoning_tool(image_file: str, prompt: str | None = None) -> dict: | |
| """Perform OCR and optional vision analysis on an image. | |
| This single entry point unifies OCR extraction and Llama vision reasoning so | |
| the planner only sees one image tool. | |
| Args: | |
| image_file: Path to the image file to analyze. | |
| prompt: Optional instruction for the vision model. If omitted, only OCR | |
| is performed. | |
| Returns: | |
| Dictionary with OCR text, base64 image data and optional vision model | |
| response. | |
| """ | |
| try: | |
| from PIL import Image | |
| from smolagents.utils import encode_image_base64 | |
| import pytesseract | |
| image = Image.open(image_file) | |
| b64 = encode_image_base64(image) | |
| ocr_text = pytesseract.image_to_string(image) | |
| vision_text = "" | |
| if prompt: | |
| try: | |
| vision_text = _llama_analyze(b64, prompt) | |
| except Exception as e: # vision errors shouldn't break OCR result | |
| vision_text = f"Error processing image with vision model: {e}" | |
| return {"ocr_text": ocr_text, "vision_text": vision_text, "base64_image": b64} | |
| except Exception as e: | |
| return { | |
| "ocr_text": "", | |
| "vision_text": "", | |
| "base64_image": "", | |
| "error": str(e), | |
| } | |