| """ |
| Translation Agent |
| Translates non-English comments to English using LLM |
| """ |
|
|
| from typing import Dict, Any |
| import json |
| from langchain_openai import ChatOpenAI |
| from langchain.schema import HumanMessage, SystemMessage |
| from agents.base_agent import BaseAgent |
| import logging |
|
|
| logger = logging.getLogger(__name__) |
|
|
|
|
| class TranslationAgent(BaseAgent): |
| """ |
| Agent that translates text from source language to English. |
| Uses LLM for high-quality, context-aware translation. |
| """ |
|
|
| def __init__(self, config: Dict[str, Any], api_key: str): |
| """ |
| Initialize the Translation Agent. |
| |
| Args: |
| config: Configuration dictionary |
| api_key: OpenAI API key |
| """ |
| super().__init__("TranslationAgent", config) |
| self.api_key = api_key |
| self.llm = ChatOpenAI( |
| model=self.model, |
| temperature=self.temperature, |
| api_key=self.api_key |
| ) |
|
|
| def validate_input(self, input_data: Dict[str, Any]) -> bool: |
| """ |
| Validate that input contains required fields. |
| |
| Args: |
| input_data: Input dictionary |
| |
| Returns: |
| True if valid, False otherwise |
| """ |
| required_fields = ["comment_text", "is_english"] |
| return all(field in input_data for field in required_fields) |
|
|
| def translate_text(self, text: str, source_language: str) -> Dict[str, Any]: |
| """ |
| Translate text from source language to English using LLM. |
| |
| Args: |
| text: Text to translate |
| source_language: Source language name |
| |
| Returns: |
| Dictionary with translation results |
| """ |
| system_prompt = """You are a professional translator specializing in social media content related to music and education. |
| Translate the given text from the source language to English. The text is a comment on a musical content. |
| Preserve the tone, intent, and any emojis or special characters. |
| For informal social media language, maintain the casual tone in translation. |
| |
| Return your response in JSON format with the following fields: |
| - translated_text: The English translation |
| - translation_confidence: Your confidence level (high, medium, low) |
| - notes: Any important notes about the translation (optional) |
| """ |
|
|
| user_prompt = f"""Translate this {source_language} comment to English: |
| |
| "{text}" |
| |
| Return JSON only.""" |
|
|
| try: |
| messages = [ |
| SystemMessage(content=system_prompt), |
| HumanMessage(content=user_prompt) |
| ] |
|
|
| response = self.llm.invoke(messages) |
| result = self._parse_llm_json_response(response.content) |
|
|
| return { |
| "success": True, |
| "translated_text": result.get("translated_text", text), |
| "translation_confidence": result.get("translation_confidence", "medium"), |
| "translation_notes": result.get("notes", "") |
| } |
|
|
| except json.JSONDecodeError as e: |
| self.log_processing(f"JSON decode error: {str(e)}", "warning") |
| |
| return { |
| "success": False, |
| "translated_text": text, |
| "translation_confidence": "low", |
| "translation_notes": "JSON parsing failed", |
| "error": str(e) |
| } |
|
|
| except Exception as e: |
| self.log_processing(f"Translation failed: {str(e)}", "error") |
| return { |
| "success": False, |
| "translated_text": text, |
| "translation_confidence": "low", |
| "translation_notes": "Translation error", |
| "error": str(e) |
| } |
|
|
| def process(self, input_data: Dict[str, Any]) -> Dict[str, Any]: |
| """ |
| Process comment and translate if needed. |
| |
| Args: |
| input_data: Dictionary containing comment data with language info |
| |
| Returns: |
| Dictionary with translation results |
| """ |
| try: |
| |
| if not self.validate_input(input_data): |
| return { |
| "success": False, |
| "error": "Invalid input: missing required fields", |
| "translated_text": input_data.get("comment_text", ""), |
| "translation_performed": False |
| } |
|
|
| comment_text = input_data["comment_text"] |
| is_english = input_data["is_english"] |
| source_language = input_data.get("language", "Unknown") |
|
|
| |
| if is_english: |
| result = { |
| "success": True, |
| "translated_text": comment_text, |
| "translation_performed": False, |
| "translation_confidence": "N/A", |
| "translation_notes": "Original text is English" |
| } |
| self.log_processing("Text is already English, skipping translation", "debug") |
| else: |
| |
| self.log_processing( |
| f"Translating from {source_language} to English", |
| "debug" |
| ) |
|
|
| translation_result = self.translate_text(comment_text, source_language) |
|
|
| result = { |
| "success": translation_result.get("success", True), |
| "translated_text": translation_result.get("translated_text", comment_text), |
| "translation_performed": True, |
| "translation_confidence": translation_result.get("translation_confidence", "medium"), |
| "translation_notes": translation_result.get("translation_notes", "") |
| } |
|
|
| if "error" in translation_result: |
| result["translation_error"] = translation_result["error"] |
|
|
| |
| for key, value in input_data.items(): |
| if key not in result: |
| result[key] = value |
|
|
| return result |
|
|
| except Exception as e: |
| return self.handle_error(e, "translation") |
|
|
| def _parse_llm_json_response(self, response_content: str) -> Dict[str, Any]: |
| """ |
| Parse LLM response that may contain JSON wrapped in markdown code blocks. |
| |
| Args: |
| response_content: Raw response content from LLM |
| |
| Returns: |
| Parsed JSON dictionary |
| |
| Raises: |
| json.JSONDecodeError: If JSON cannot be parsed |
| """ |
| content = response_content.strip() |
|
|
| |
| if content.startswith("```json"): |
| |
| content = content[7:] |
| if content.endswith("```"): |
| content = content[:-3] |
| content = content.strip() |
| elif content.startswith("```"): |
| |
| content = content[3:] |
| if content.endswith("```"): |
| content = content[:-3] |
| content = content.strip() |
|
|
| |
| return json.loads(content) |