File size: 2,369 Bytes
a877f54 a394be7 a877f54 40deb66 a877f54 40deb66 a877f54 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 | import os
from colorama import Fore, Style # type: ignore[import]
from langchain_core.tools import tool
from tavily import TavilyClient # type: ignore[import]
@tool
def web_search(query: str, depth: str = "advanced", max_results: int = 5) -> str:
"""
Search the web using Tavily and extract full Markdown content from top results.
Useful for in-depth analysis, table data, or detailed technical documentation.
Args:
query: The search query string.
depth: Search depth, either "basic" or "advanced".
max_results: Number of results to return (recommended 3-5 to save tokens).
"""
print(f"{Fore.GREEN}[Search & Extract] {query}{Style.RESET_ALL}")
client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
# 1. Search and get URLs
search_response = client.search(
query=query, search_depth=depth, max_results=max_results
)
results = search_response.get("results", [])
if not results:
return "No results found."
urls = [r["url"] for r in results]
# 2. Extract full content via Extract API (max 20 URLs per call)
try:
extraction = client.extract(
urls=urls,
extract_depth="advanced", # For tables and structured data
format="markdown", # Most readable format for LLMs
)
extracted_results = {
item["url"]: item["raw_content"] for item in extraction.get("results", [])
}
except Exception as e:
print(f"{Fore.RED}Extraction failed: {e}{Style.RESET_ALL}")
extracted_results = {}
# 3. Format output
final_output = []
for r in results:
url = r["url"]
title = r["title"]
snippet = r["content"] # Original search snippet
full_content = extracted_results.get(url, "Full content extraction failed.")
content_block = (
f"### Title: {title}\n"
f"**URL:** {url}\n"
f"**Snippet:** {snippet}\n\n"
f"**Full Markdown Content:**\n\n{full_content}\n"
f"{'=' * 50}"
)
final_output.append(content_block)
return "\n\n".join(final_output)
if __name__ == "__main__":
from dotenv import load_dotenv
load_dotenv()
# Test run
test_query = "What is LangGraph?"
result = web_search.invoke({"query": test_query})
print(result)
|