| | from dotenv import load_dotenv |
| | from typing import TypedDict, List, Dict, Any, Optional, Annotated |
| |
|
| | from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFaceEmbeddings |
| | from langchain_google_genai import ChatGoogleGenerativeAI |
| | from langchain_groq import ChatGroq |
| |
|
| | from langgraph.graph import StateGraph, MessagesState, START, END |
| | from langgraph.graph.message import add_messages |
| | from langchain_core.messages import SystemMessage, HumanMessage, AnyMessage, AIMessage |
| | from langchain_core.messages.ai import subtract_usage |
| |
|
| | from langchain.tools import Tool |
| | from langchain_core.tools import tool |
| | from langchain_community.tools.tavily_search import TavilySearchResults |
| | from langchain_community.document_loaders import WikipediaLoader |
| | from langchain_community.document_loaders import ArxivLoader |
| | from langchain_community.retrievers import BM25Retriever |
| |
|
| | from langgraph.prebuilt import ToolNode, tools_condition |
| |
|
| | from prompts import system_prompt |
| |
|
| |
|
| | |
| | load_dotenv() |
| |
|
| |
|
| | |
| | def get_arxiv_url(content: str) -> str: |
| | """Extract arXiv ID from text content and format as a URL.""" |
| | lines = content.split('\n') |
| | for line in lines: |
| | if line.strip().startswith('arXiv:'): |
| | parts = line.strip().split() |
| | if parts: |
| | arxiv_id_with_prefix = parts[0] |
| | |
| | arxiv_id = arxiv_id_with_prefix.replace('arXiv:', '').strip() |
| | |
| | |
| | base_arxiv_id = arxiv_id.split('v')[0] |
| | return f"https://arxiv.org/abs/{base_arxiv_id}" |
| | return "unknown" |
| |
|
| |
|
| | |
| | @tool |
| | def search_wiki(query: str) -> Dict[str, str]: |
| | """Search Wikipedia for a query and return maximum 2 results. |
| | |
| | Args: |
| | query: The search query.""" |
| | print(f" executing search_wiki with query: {query}") |
| | try: |
| | search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| | print(f"Found {len(search_docs)} documents for query '{query}'") |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | f'<Document source="{doc.metadata.get("source", "unknown")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| | for doc in search_docs |
| | ] |
| | ) |
| | if not formatted_search_docs: |
| | print ("Empty search results") |
| | return {"wiki_results": formatted_search_docs} |
| | except Exception as e: |
| | print(f"Error in search_wiki: {e}") |
| | return {"wiki_results": f"Search error: {str(e)}"} |
| |
|
| |
|
| | |
| | @tool |
| | def search_web(query: str) -> Dict[str, str]: |
| | """Search Tavily for a query and return maximum 3 results. |
| | |
| | Args: |
| | query: The search query.""" |
| | print(f" executing search_web with query: {query}") |
| | |
| | try: |
| | search_docs = TavilySearchResults(max_results=3).run(query) |
| | print(f"DEBUG: search_docs type: {type(search_docs)}") |
| | print(f"DEBUG: search_docs content: {search_docs}") |
| | print(f"Found {len(search_docs)} documents for query '{query}'") |
| | |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | f'<Document source="{doc.get("url", "unknown")}" page="{doc.get("page", "N/A")}"/>\n{doc.get("content", "")}\n</Document>' |
| | for doc in search_docs |
| | ] |
| | ) |
| | if not formatted_search_docs: |
| | print ("Empty search results") |
| | return {"web_results": formatted_search_docs} |
| | except Exception as e: |
| | print(f"Error in search_web: {e}") |
| | return {"web_results": f"Search error: {str(e)}"} |
| |
|
| |
|
| | |
| | @tool |
| | def search_arxiv(query: str) -> Dict[str, str]: |
| | """Search Arxiv for a query and return maximum 3 result. |
| | |
| | Args: |
| | query: The search query.""" |
| | print(f" executing search_arxiv with query: {query}") |
| | try: |
| | search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
| | print(f"DEBUG: search_docs type: {type(search_docs)}") |
| | print(f"DEBUG: search_docs content: {search_docs}") |
| | print(f"Found {len(search_docs)} documents for query '{query}'") |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | f'<Document source="{get_arxiv_url(doc.page_content)}" page="{doc.metadata.get("page", "N/A")}"/>\n{doc.page_content[:1000]}\n</Document>' |
| | for doc in search_docs |
| | ] |
| | ) |
| | if not formatted_search_docs: |
| | print ("Empty search results") |
| | return {"arxiv_results": formatted_search_docs} |
| | except Exception as e: |
| | print(f"Error in search_arxiv: {e}") |
| | return {"arxiv_results": f"Search error: {str(e)}"} |
| |
|
| |
|
| | |
| | |
| |
|
| |
|
| | |
| | sys_msg = SystemMessage(content=system_prompt) |
| |
|
| |
|
| | tools = [ |
| | search_web, |
| | search_wiki, |
| | search_arxiv |
| | ] |
| |
|
| |
|
| | |
| | def build_graph(): |
| | |
| | llm = ChatGoogleGenerativeAI( |
| | model="gemini-2.5-flash-preview-04-17", |
| | temperature=0 |
| | ) |
| | print(f"DEBUG: llm object = {llm}") |
| |
|
| | |
| | llm_with_tools = llm.bind_tools(tools) |
| | print(f"DEBUG: llm_with_tools object = {llm_with_tools}") |
| |
|
| | |
| | class AgentState(TypedDict): |
| | messages: Annotated[list[AnyMessage], add_messages] |
| |
|
| | def assistant(state: AgentState): |
| | result = llm_with_tools.invoke(state["messages"]) |
| | print(f"DEBUG: LLM result = {result}") |
| | |
| | |
| | if isinstance(result, AIMessage) and result.usage_metadata is None: |
| | |
| | result.usage_metadata = {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0} |
| |
|
| | return { |
| | "messages": [result] |
| | } |
| |
|
| |
|
| | |
| | builder = StateGraph(AgentState) |
| |
|
| | |
| | builder.add_node("assistant", assistant) |
| | builder.add_node("tools", ToolNode(tools)) |
| |
|
| | |
| | builder.add_edge(START, "assistant") |
| | builder.add_conditional_edges( |
| | "assistant", |
| | tools_condition, |
| | { |
| | |
| | "tools": "tools", |
| | |
| | END: END, |
| | } |
| | ) |
| | builder.add_edge("tools", "assistant") |
| |
|
| | return builder.compile() |
| |
|
| |
|
| | if __name__ == "__main__": |
| | |
| | question = "latest research on quantum computing" |
| | graph = build_graph() |
| | messages = [HumanMessage(content=question)] |
| | print(f"Running graph with question: {question}") |
| | messages = graph.invoke({"messages": messages}) |
| | print("Graph execution finished. Messages:") |
| | for m in messages["messages"]: |
| | m.pretty_print() |
| |
|