| import json |
| import os |
| import re |
|
|
| import pypdf |
| from duckduckgo_search import DDGS |
|
|
|
|
| def get_clean_text(content): |
| """Извлекает чистый текст из любого формата (str, dict, list).""" |
| if content is None: |
| return "" |
| if isinstance(content, str): |
| return content |
| if isinstance(content, dict): |
| return content.get("text", "") |
| if isinstance(content, list): |
| return "".join( |
| [ |
| item if isinstance(item, str) else item.get("text", "") |
| for item in content |
| ] |
| ) |
| return str(content) |
|
|
|
|
| def extract_text_from_file(file_path): |
| """Читает текст из загруженного файла.""" |
| if not file_path or not os.path.exists(file_path): |
| return "" |
| ext = os.path.splitext(file_path)[1].lower() |
| try: |
| if ext == ".pdf": |
| text = "" |
| with open(file_path, "rb") as f: |
| reader = pypdf.PdfReader(f) |
| for page in reader.pages: |
| text += (page.extract_text() or "") + "\n" |
| return text |
| else: |
| with open(file_path, "r", encoding="utf-8", errors="ignore") as f: |
| return f.read() |
| except Exception as e: |
| return f"[Ошибка чтения файла: {e}]" |
|
|
|
|
| def web_search(query: str, max_results: int = 3) -> list: |
| """Выполняет поиск в интернете через DuckDuckGo и возвращает список словарей.""" |
| try: |
| with DDGS() as ddgs: |
| results = list(ddgs.text(query, max_results=max_results)) |
| if not results: |
| return [] |
|
|
| formatted_results = [] |
| for r in results: |
| formatted_results.append( |
| { |
| "title": r.get("title", ""), |
| "url": r.get("href", ""), |
| "snippet": r.get("body", ""), |
| } |
| ) |
| return formatted_results |
| except Exception as e: |
| print(f"Ошибка DDGS: {e}") |
| return [] |
|
|
|
|
| def extract_search_query(text: str): |
| """Супер-надежный парсер запроса (с поддержкой XML, JSON и логов).""" |
| if not text: |
| return None |
|
|
| |
| print(f"\n--- [DEBUG] RAW ROUTER OUTPUT ---\n{text}\n---------------------------\n") |
|
|
| |
| clean = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip() |
| if not clean: |
| clean = text |
|
|
| |
| xml_match = re.search(r"<search>(.*?)</search>", clean, re.IGNORECASE) |
| if xml_match: |
| return xml_match.group(1).strip() |
|
|
| |
| json_match = re.search(r"\{.*?\}", clean, re.DOTALL) |
| if json_match: |
| try: |
| data = json.loads(json_match.group(0)) |
| if data.get("search") is True or str(data.get("search")).lower() == "true": |
| return data.get("query") |
| except: |
| pass |
|
|
| |
| if "search" in text.lower(): |
| match = re.search( |
| r"(?:запрос|query|text)['\"]\s*:\s*['\"]([^'\"]+)['\"]", text, re.IGNORECASE |
| ) |
| if match: |
| val = match.group(1) |
| val = re.sub( |
| r"^(?:🔍\s*)?(?:Поиск(?:\s*по\s*запросу)?|Search):\s*", |
| "", |
| val, |
| flags=re.IGNORECASE, |
| ).strip() |
| if val and val.lower() not in ["no", "false", "none", "no_search"]: |
| return val |
|
|
| return None |
|
|