Commit ·
a877f54
1
Parent(s): ca1474d
feat(agent): add web search agent with LangGraph and Tavily integration
Browse files- Created agent module with websearch_agent and run() entrypoint
- Implemented Tavily search tool (max_results=10)
- Updated Gradio app to use the new agent instead of fixed answers
- Added dependencies: langchain, langchain-google-genai, langgraph, tavily-python
- Added .env.example for API key configuration (GOOGLE_API_KEY, TAVILY_API_KEY)
- Updated README with setup and usage documentation
- Removed old template files (main.py, pyproject.toml)
- .env.example +2 -0
- .gitignore +1 -0
- README.md +56 -1
- agent/__init__.py +3 -0
- agent/__pycache__/__init__.cpython-314.pyc +0 -0
- agent/__pycache__/agent.cpython-314.pyc +0 -0
- agent/agent.py +40 -0
- agent/tools/__init__.py +3 -0
- agent/tools/__pycache__/__init__.cpython-314.pyc +0 -0
- agent/tools/__pycache__/search.cpython-314.pyc +0 -0
- agent/tools/search.py +74 -0
- app.py +6 -5
- main.py +0 -6
- pyproject.toml +0 -7
- requirements.txt +9 -2
- uv.lock +0 -0
.env.example
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
GOOGLE_API_KEY=your-google-api-key-here
|
| 2 |
+
TAVILY_API_KEY=your-tavily-api-key-here
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
.env
|
README.md
CHANGED
|
@@ -15,9 +15,64 @@ hf_oauth_expiration_minutes: 480
|
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 16 |
|
| 17 |
|
| 18 |
-
##
|
|
|
|
|
|
|
| 19 |
|
| 20 |
```bash
|
| 21 |
uv pip install -r requirements.txt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
uv run app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
```
|
|
|
|
| 15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 16 |
|
| 17 |
|
| 18 |
+
## Setup
|
| 19 |
+
|
| 20 |
+
1. Install dependencies:
|
| 21 |
|
| 22 |
```bash
|
| 23 |
uv pip install -r requirements.txt
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
2. Copy `.env.example` to `.env` and fill in your API keys:
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
cp .env.example .env
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
```
|
| 33 |
+
GOOGLE_API_KEY=your-google-api-key-here
|
| 34 |
+
TAVILY_API_KEY=your-tavily-api-key-here
|
| 35 |
+
```
|
| 36 |
+
|
| 37 |
+
## Usage
|
| 38 |
+
|
| 39 |
+
### Run Gradio app locally
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
uv run app.py
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
### Run the agent directly
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
uv run python -m agent.agent
|
| 49 |
+
```
|
| 50 |
+
|
| 51 |
+
### Test the search tool only
|
| 52 |
+
|
| 53 |
+
```bash
|
| 54 |
+
uv run python -m agent.tools.search
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
### Use in code
|
| 58 |
+
|
| 59 |
+
```python
|
| 60 |
+
from agent import run
|
| 61 |
+
|
| 62 |
+
answer = run("What is LangGraph?")
|
| 63 |
+
print(answer)
|
| 64 |
+
```
|
| 65 |
+
|
| 66 |
+
## Project Structure
|
| 67 |
+
|
| 68 |
+
```
|
| 69 |
+
agent/
|
| 70 |
+
├── __init__.py # Exports run()
|
| 71 |
+
├── agent.py # websearch_agent + run entrypoint
|
| 72 |
+
└── tools/
|
| 73 |
+
├── __init__.py # Exports web_search
|
| 74 |
+
└── search.py # Tavily search tool (max_results=10)
|
| 75 |
+
app.py # Gradio UI for HuggingFace Spaces
|
| 76 |
+
.env.example # API keys template
|
| 77 |
+
requirements.txt # Python dependencies
|
| 78 |
```
|
agent/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agent.agent import run
|
| 2 |
+
|
| 3 |
+
__all__ = ["run"]
|
agent/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (218 Bytes). View file
|
|
|
agent/__pycache__/agent.cpython-314.pyc
ADDED
|
Binary file (1.69 kB). View file
|
|
|
agent/agent.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from langchain.agents import create_agent
|
| 4 |
+
from agent.tools.search import web_search
|
| 5 |
+
|
| 6 |
+
load_dotenv()
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def websearch_agent(query: str) -> str:
|
| 10 |
+
"""Create a simple agent with Gemini and Tavily search."""
|
| 11 |
+
base_agent = create_agent(
|
| 12 |
+
model="google_genai:gemini-3-flash-preview",
|
| 13 |
+
tools=[web_search],
|
| 14 |
+
)
|
| 15 |
+
result = base_agent.invoke(
|
| 16 |
+
{
|
| 17 |
+
"messages": [
|
| 18 |
+
{
|
| 19 |
+
"role": "system",
|
| 20 |
+
"content": "Try to search web sites and get the answer.",
|
| 21 |
+
},
|
| 22 |
+
{"role": "user", "content": query},
|
| 23 |
+
]
|
| 24 |
+
}
|
| 25 |
+
)
|
| 26 |
+
content = result["messages"][-1].content[0]
|
| 27 |
+
return content["text"]
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def run(query: str) -> str:
|
| 31 |
+
"""Let Multiple agents finish the work"""
|
| 32 |
+
result = ""
|
| 33 |
+
result = websearch_agent(query)
|
| 34 |
+
return result
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
if __name__ == "__main__":
|
| 38 |
+
query = input("Query:\n")
|
| 39 |
+
answer = run(query)
|
| 40 |
+
print(answer)
|
agent/tools/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from agent.tools.search import web_search
|
| 2 |
+
|
| 3 |
+
__all__ = ["web_search"]
|
agent/tools/__pycache__/__init__.cpython-314.pyc
ADDED
|
Binary file (238 Bytes). View file
|
|
|
agent/tools/__pycache__/search.cpython-314.pyc
ADDED
|
Binary file (3.01 kB). View file
|
|
|
agent/tools/search.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from langchain_core.tools import tool
|
| 3 |
+
from tavily import TavilyClient # type: ignore[import]
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
@tool
|
| 7 |
+
def web_search(query: str, depth: str = "advanced", max_results: int = 5) -> str:
|
| 8 |
+
"""
|
| 9 |
+
Search the web using Tavily and extract full Markdown content from top results.
|
| 10 |
+
Useful for in-depth analysis, table data, or detailed technical documentation.
|
| 11 |
+
|
| 12 |
+
Args:
|
| 13 |
+
query: The search query string.
|
| 14 |
+
depth: Search depth, either "basic" or "advanced".
|
| 15 |
+
max_results: Number of results to return (recommended 3-5 to save tokens).
|
| 16 |
+
"""
|
| 17 |
+
print(f"[Search & Extract] {query}")
|
| 18 |
+
client = TavilyClient(api_key=os.environ["TAVILY_API_KEY"])
|
| 19 |
+
|
| 20 |
+
# 1. Search and get URLs
|
| 21 |
+
search_response = client.search(
|
| 22 |
+
query=query, search_depth=depth, max_results=max_results
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
results = search_response.get("results", [])
|
| 26 |
+
if not results:
|
| 27 |
+
return "No results found."
|
| 28 |
+
|
| 29 |
+
urls = [r["url"] for r in results]
|
| 30 |
+
|
| 31 |
+
# 2. Extract full content via Extract API (max 20 URLs per call)
|
| 32 |
+
try:
|
| 33 |
+
extraction = client.extract(
|
| 34 |
+
urls=urls,
|
| 35 |
+
extract_depth="advanced", # For tables and structured data
|
| 36 |
+
format="markdown", # Most readable format for LLMs
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
extracted_results = {
|
| 40 |
+
item["url"]: item["raw_content"] for item in extraction.get("results", [])
|
| 41 |
+
}
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print(f"Extraction failed: {e}")
|
| 44 |
+
extracted_results = {}
|
| 45 |
+
|
| 46 |
+
# 3. Format output
|
| 47 |
+
final_output = []
|
| 48 |
+
for r in results:
|
| 49 |
+
url = r["url"]
|
| 50 |
+
title = r["title"]
|
| 51 |
+
snippet = r["content"] # Original search snippet
|
| 52 |
+
full_content = extracted_results.get(url, "Full content extraction failed.")
|
| 53 |
+
|
| 54 |
+
content_block = (
|
| 55 |
+
f"### Title: {title}\n"
|
| 56 |
+
f"**URL:** {url}\n"
|
| 57 |
+
f"**Snippet:** {snippet}\n\n"
|
| 58 |
+
f"**Full Markdown Content:**\n\n{full_content}\n"
|
| 59 |
+
f"{'=' * 50}"
|
| 60 |
+
)
|
| 61 |
+
final_output.append(content_block)
|
| 62 |
+
|
| 63 |
+
return "\n\n".join(final_output)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
from dotenv import load_dotenv
|
| 68 |
+
|
| 69 |
+
load_dotenv()
|
| 70 |
+
|
| 71 |
+
# Test run
|
| 72 |
+
test_query = "What is LangGraph?"
|
| 73 |
+
result = web_search.invoke({"query": test_query})
|
| 74 |
+
print(result)
|
app.py
CHANGED
|
@@ -8,16 +8,17 @@ import pandas as pd
|
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
-
# ---
|
| 12 |
-
|
|
|
|
| 13 |
class BasicAgent:
|
| 14 |
def __init__(self):
|
| 15 |
print("BasicAgent initialized.")
|
| 16 |
def __call__(self, question: str) -> str:
|
| 17 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
-
|
| 19 |
-
print(f"Agent returning
|
| 20 |
-
return
|
| 21 |
|
| 22 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 23 |
"""
|
|
|
|
| 8 |
# --- Constants ---
|
| 9 |
DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
| 10 |
|
| 11 |
+
# --- Agent Definition ---
|
| 12 |
+
from agent.agent import run as agent_run
|
| 13 |
+
|
| 14 |
class BasicAgent:
|
| 15 |
def __init__(self):
|
| 16 |
print("BasicAgent initialized.")
|
| 17 |
def __call__(self, question: str) -> str:
|
| 18 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 19 |
+
answer = agent_run(question)
|
| 20 |
+
print(f"Agent returning answer (first 50 chars): {answer[:50]}...")
|
| 21 |
+
return answer
|
| 22 |
|
| 23 |
def run_and_submit_all( profile: gr.OAuthProfile | None):
|
| 24 |
"""
|
main.py
DELETED
|
@@ -1,6 +0,0 @@
|
|
| 1 |
-
def main():
|
| 2 |
-
print("Hello from final-assignment-template!")
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
if __name__ == "__main__":
|
| 6 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pyproject.toml
DELETED
|
@@ -1,7 +0,0 @@
|
|
| 1 |
-
[project]
|
| 2 |
-
name = "final-assignment-template"
|
| 3 |
-
version = "0.1.0"
|
| 4 |
-
description = "Add your description here"
|
| 5 |
-
readme = "README.md"
|
| 6 |
-
requires-python = ">=3.14"
|
| 7 |
-
dependencies = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
requirements.txt
CHANGED
|
@@ -1,2 +1,9 @@
|
|
| 1 |
-
gradio
|
| 2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio==6.11.0
|
| 2 |
+
gradio[oauth]==6.11.0
|
| 3 |
+
requests==2.33.1
|
| 4 |
+
python-dotenv==1.2.2
|
| 5 |
+
langchain
|
| 6 |
+
langchain-core
|
| 7 |
+
langchain-google-genai
|
| 8 |
+
langgraph
|
| 9 |
+
tavily-python
|
uv.lock
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|