Final_Assignment_Template

Sleeping

App Files Files Community

Final_Assignment_Template / agent /tools /search.py

AlexTrinityBlock

refactor(agent): split websearch agent into separate modules

a394be7 29 days ago

raw

history blame contribute delete

2.37 kB

	import os
	from colorama import Fore, Style # type: ignore[import]
	from langchain_core.tools import tool
	from tavily import TavilyClient # type: ignore[import]


	@tool
	def web_search(query: str, depth: str = "advanced", max_results: int = 5) -> str:
	"""
	Search the web using Tavily and extract full Markdown content from top results.
	Useful for in-depth analysis, table data, or detailed technical documentation.

	Args:
	query: The search query string.
	depth: Search depth, either "basic" or "advanced".
	max_results: Number of results to return (recommended 3-5 to save tokens).
	"""
	print(f"{Fore.GREEN}[Search & Extract] {query}{Style.RESET_ALL}")
	client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])

	# 1. Search and get URLs
	search_response = client.search(
	query=query, search_depth=depth, max_results=max_results
	)

	results = search_response.get("results", [])
	if not results:
	return "No results found."

	urls = [r["url"] for r in results]

	# 2. Extract full content via Extract API (max 20 URLs per call)
	try:
	extraction = client.extract(
	urls=urls,
	extract_depth="advanced", # For tables and structured data
	format="markdown", # Most readable format for LLMs
	)

	extracted_results = {
	item["url"]: item["raw_content"] for item in extraction.get("results", [])
	}
	except Exception as e:
	print(f"{Fore.RED}Extraction failed: {e}{Style.RESET_ALL}")
	extracted_results = {}

	# 3. Format output
	final_output = []
	for r in results:
	url = r["url"]
	title = r["title"]
	snippet = r["content"] # Original search snippet
	full_content = extracted_results.get(url, "Full content extraction failed.")

	content_block = (
	f"### Title: {title}\n"
	f"URL: {url}\n"
	f"Snippet: {snippet}\n\n"
	f"Full Markdown Content:\n\n{full_content}\n"
	f"{'=' * 50}"
	)
	final_output.append(content_block)

	return "\n\n".join(final_output)


	if __name__ == "__main__":
	from dotenv import load_dotenv

	load_dotenv()

	# Test run
	test_query = "What is LangGraph?"
	result = web_search.invoke({"query": test_query})
	print(result)