import os from colorama import Fore, Style # type: ignore[import] from langchain_core.tools import tool from tavily import TavilyClient # type: ignore[import] @tool def web_search(query: str, depth: str = "advanced", max_results: int = 5) -> str: """ Search the web using Tavily and extract full Markdown content from top results. Useful for in-depth analysis, table data, or detailed technical documentation. Args: query: The search query string. depth: Search depth, either "basic" or "advanced". max_results: Number of results to return (recommended 3-5 to save tokens). """ print(f"{Fore.GREEN}[Search & Extract] {query}{Style.RESET_ALL}") client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"]) # 1. Search and get URLs search_response = client.search( query=query, search_depth=depth, max_results=max_results ) results = search_response.get("results", []) if not results: return "No results found." urls = [r["url"] for r in results] # 2. Extract full content via Extract API (max 20 URLs per call) try: extraction = client.extract( urls=urls, extract_depth="advanced", # For tables and structured data format="markdown", # Most readable format for LLMs ) extracted_results = { item["url"]: item["raw_content"] for item in extraction.get("results", []) } except Exception as e: print(f"{Fore.RED}Extraction failed: {e}{Style.RESET_ALL}") extracted_results = {} # 3. Format output final_output = [] for r in results: url = r["url"] title = r["title"] snippet = r["content"] # Original search snippet full_content = extracted_results.get(url, "Full content extraction failed.") content_block = ( f"### Title: {title}\n" f"**URL:** {url}\n" f"**Snippet:** {snippet}\n\n" f"**Full Markdown Content:**\n\n{full_content}\n" f"{'=' * 50}" ) final_output.append(content_block) return "\n\n".join(final_output) if __name__ == "__main__": from dotenv import load_dotenv load_dotenv() # Test run test_query = "What is LangGraph?" result = web_search.invoke({"query": test_query}) print(result)