AlexTrinityBlock's picture
refactor(agent): split websearch agent into separate modules
a394be7
import os
from colorama import Fore, Style # type: ignore[import]
from langchain_core.tools import tool
from tavily import TavilyClient # type: ignore[import]
@tool
def web_search(query: str, depth: str = "advanced", max_results: int = 5) -> str:
"""
Search the web using Tavily and extract full Markdown content from top results.
Useful for in-depth analysis, table data, or detailed technical documentation.
Args:
query: The search query string.
depth: Search depth, either "basic" or "advanced".
max_results: Number of results to return (recommended 3-5 to save tokens).
"""
print(f"{Fore.GREEN}[Search & Extract] {query}{Style.RESET_ALL}")
client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
# 1. Search and get URLs
search_response = client.search(
query=query, search_depth=depth, max_results=max_results
)
results = search_response.get("results", [])
if not results:
return "No results found."
urls = [r["url"] for r in results]
# 2. Extract full content via Extract API (max 20 URLs per call)
try:
extraction = client.extract(
urls=urls,
extract_depth="advanced", # For tables and structured data
format="markdown", # Most readable format for LLMs
)
extracted_results = {
item["url"]: item["raw_content"] for item in extraction.get("results", [])
}
except Exception as e:
print(f"{Fore.RED}Extraction failed: {e}{Style.RESET_ALL}")
extracted_results = {}
# 3. Format output
final_output = []
for r in results:
url = r["url"]
title = r["title"]
snippet = r["content"] # Original search snippet
full_content = extracted_results.get(url, "Full content extraction failed.")
content_block = (
f"### Title: {title}\n"
f"**URL:** {url}\n"
f"**Snippet:** {snippet}\n\n"
f"**Full Markdown Content:**\n\n{full_content}\n"
f"{'=' * 50}"
)
final_output.append(content_block)
return "\n\n".join(final_output)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
# Test run
test_query = "What is LangGraph?"
result = web_search.invoke({"query": test_query})
print(result)