| from mcp.server.fastmcp import FastMCP |
| import os |
| from typing import Optional, List, Any, Dict |
| from huggingface_hub import InferenceClient |
|
|
| |
| mcp = FastMCP("Hugging Face tools") |
|
|
| |
| HF_TOKEN = os.environ.get("HF_TOKEN") |
| if not HF_TOKEN: |
| print("Warning: HF_TOKEN environment variable not set. Some authenticated requests may fail.") |
|
|
| client = InferenceClient(token=HF_TOKEN) |
|
|
| @mcp.tool() |
| def list_available_tasks() -> str: |
| """Lists all the AI tasks supported by this server.""" |
| tasks = [ |
| "Audio-Text-to-Text", "Image-Text-to-Text", "Image-Text-to-Image", |
| "Image-Text-to-Video", "Visual Question Answering", "Document Question Answering", |
| "Video-Text-to-Text", "Visual Document Retrieval", "Depth Estimation", |
| "Image Classification", "Object Detection", "Image Segmentation", |
| "Text-to-Image", "Image-to-Text", "Image-to-Image", "Image-to-Video", |
| "Unconditional Image Generation", "Video Classification", "Text-to-Video", |
| "Zero-Shot Image Classification", "Mask Generation", "Zero-Shot Object Detection", |
| "Text-to-3D", "Image-to-3D", "Image Feature Extraction", "Keypoint Detection", |
| "Video-to-Video", "Text Classification", "Token Classification", |
| "Table Question Answering", "Question Answering", "Zero-Shot Classification", |
| "Translation", "Summarization", "Feature Extraction", "Text Generation", |
| "Fill-Mask", "Sentence Similarity", "Text Ranking", "Text-to-Speech", |
| "Text-to-Audio", "Automatic Speech Recognition", "Audio-to-Audio", |
| "Audio Classification", "Voice Activity Detection", "Tabular Classification", |
| "Tabular Regression", "Time Series Forecasting", "Reinforcement Learning", |
| "Robotics", "Graph Machine Learning" |
| ] |
| return f"Supported Tasks: {', '.join(tasks)}" |
|
|
| @mcp.tool() |
| def visual_question_answering(image: str, question: str, model: Optional[str] = None) -> str: |
| """ |
| Answer questions about an image. |
| Args: |
| image: URL or Base64 string of the image. |
| question: The question to answer. |
| model: Optional model ID (e.g., 'dandelin/vilt-b32-finetuned-vqa'). |
| """ |
| try: |
| |
| |
| |
| |
| |
| |
| |
| result = client.visual_question_answering(image, question, model=model) |
| |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def text_to_image(prompt: str, model: Optional[str] = None) -> str: |
| """ |
| Generate an image from text. |
| Returns: Base64 encoded image string. |
| """ |
| try: |
| img = client.text_to_image(prompt, model=model) |
| |
| import utils |
| if not isinstance(img, utils.Image.Image): |
| |
| import io |
| img = utils.Image.open(io.BytesIO(img)) |
| return utils.encode_image(img) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def image_classification(image: str, model: Optional[str] = None) -> str: |
| """ |
| Classify an image. |
| Args: |
| image: URL or Base64 string. |
| """ |
| try: |
| result = client.image_classification(image, model=model) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def object_detection(image: str, model: Optional[str] = None) -> str: |
| """ |
| Detect objects in an image. |
| Args: |
| image: URL or Base64 string. |
| """ |
| try: |
| result = client.object_detection(image, model=model) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def image_to_text(image: str, model: Optional[str] = None) -> str: |
| """ |
| Generate a caption or text description for an image. |
| Args: |
| image: URL or Base64 string. |
| """ |
| try: |
| result = client.image_to_text(image, model=model) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def text_generation(prompt: str, model: Optional[str] = None, max_new_tokens: int = 500) -> str: |
| """ |
| Generate text based on a prompt. |
| Args: |
| prompt: Input text. |
| model: Model ID. |
| max_new_tokens: Maximum tokens to generate. |
| """ |
| try: |
| return client.text_generation(prompt, model=model, max_new_tokens=max_new_tokens) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def summarization(text: str, model: Optional[str] = None) -> str: |
| """ |
| Summarize a text. |
| """ |
| try: |
| result = client.summarization(text, model=model) |
| |
| if isinstance(result, list) and len(result) > 0: |
| return result[0].get('summary_text', str(result)) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def translation(text: str, model: Optional[str] = None) -> str: |
| """ |
| Translate text. Model usually determines source/target languages. |
| """ |
| try: |
| |
| |
| result = client.translation(text, model=model) |
| if isinstance(result, list) and len(result) > 0: |
| return result[0].get('translation_text', str(result)) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def text_classification(text: str, model: Optional[str] = None) -> str: |
| """ |
| Classify text (e.g. sentiment analysis). |
| """ |
| try: |
| result = client.text_classification(text, model=model) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def automatic_speech_recognition(audio: str, model: Optional[str] = None) -> str: |
| """ |
| Transcribe audio. |
| Args: |
| audio: URL or Base64 string of the audio file. |
| """ |
| try: |
| |
| |
| |
| |
| import base64 |
| if not (audio.startswith("http://") or audio.startswith("https://")): |
| audio_data = base64.b64decode(audio) |
| result = client.automatic_speech_recognition(audio_data, model=model) |
| else: |
| result = client.automatic_speech_recognition(audio, model=model) |
| |
| if isinstance(result, dict): |
| return result.get('text', str(result)) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def text_to_speech(text: str, model: Optional[str] = None) -> str: |
| """ |
| Generate audio from text. |
| Returns: Base64 encoded audio. |
| """ |
| try: |
| audio_bytes = client.text_to_speech(text, model=model) |
| import base64 |
| return base64.b64encode(audio_bytes).decode('utf-8') |
| except Exception as e: |
| return f"Error: {e}" |
|
|
| @mcp.tool() |
| def generic_hf_inference(task: str, inputs: Dict[str, Any], model: Optional[str] = None) -> str: |
| """ |
| Run any Hugging Face inference task that doesn't have a specific tool. |
| Args: |
| task: The task name (e.g., 'text-generation', 'translation'). |
| inputs: Dictionary of inputs required for the task. |
| model: Model ID to use. |
| """ |
| try: |
| |
| |
| |
| import json |
| result = client.post(json=inputs, model=model, task=task) |
| return str(result) |
| except Exception as e: |
| return f"Error: {e}" |
|
|
|
|