|
|
""" |
|
|
Example: DocumentAgent with ReAct-style Processing |
|
|
|
|
|
Demonstrates: |
|
|
1. Loading and processing documents |
|
|
2. Field extraction with evidence |
|
|
3. Document classification |
|
|
4. Question answering with grounding |
|
|
""" |
|
|
|
|
|
import asyncio |
|
|
from pathlib import Path |
|
|
from loguru import logger |
|
|
|
|
|
|
|
|
from src.agents.document_agent import ( |
|
|
DocumentAgent, |
|
|
AgentConfig, |
|
|
) |
|
|
from src.document.schemas.extraction import ( |
|
|
ExtractionSchema, |
|
|
FieldDefinition, |
|
|
) |
|
|
|
|
|
|
|
|
async def example_basic_agent(): |
|
|
"""Basic agent usage.""" |
|
|
print("=" * 50) |
|
|
print("Basic DocumentAgent Usage") |
|
|
print("=" * 50) |
|
|
|
|
|
|
|
|
config = AgentConfig( |
|
|
default_model="llama3.2:3b", |
|
|
max_iterations=10, |
|
|
temperature=0.1, |
|
|
) |
|
|
agent = DocumentAgent(config) |
|
|
|
|
|
|
|
|
sample_doc = Path("./data/sample.pdf") |
|
|
if not sample_doc.exists(): |
|
|
print(f"Sample document not found: {sample_doc}") |
|
|
print("Create a sample PDF at ./data/sample.pdf") |
|
|
return |
|
|
|
|
|
print(f"\nLoading document: {sample_doc}") |
|
|
await agent.load_document(str(sample_doc)) |
|
|
|
|
|
print(f"Document loaded: {agent.document.metadata.filename}") |
|
|
print(f"Pages: {agent.document.metadata.num_pages}") |
|
|
print(f"Chunks: {len(agent.document.chunks)}") |
|
|
|
|
|
|
|
|
async def example_field_extraction(): |
|
|
"""Extract structured fields with evidence.""" |
|
|
print("\n" + "=" * 50) |
|
|
print("Field Extraction with Evidence") |
|
|
print("=" * 50) |
|
|
|
|
|
agent = DocumentAgent() |
|
|
|
|
|
sample_doc = Path("./data/sample.pdf") |
|
|
if not sample_doc.exists(): |
|
|
print("Sample document not found") |
|
|
return |
|
|
|
|
|
await agent.load_document(str(sample_doc)) |
|
|
|
|
|
|
|
|
schema = ExtractionSchema( |
|
|
name="document_info", |
|
|
description="Extract key document information", |
|
|
fields=[ |
|
|
FieldDefinition( |
|
|
name="title", |
|
|
field_type="string", |
|
|
description="Document title", |
|
|
required=True, |
|
|
), |
|
|
FieldDefinition( |
|
|
name="author", |
|
|
field_type="string", |
|
|
description="Document author or organization", |
|
|
required=False, |
|
|
), |
|
|
FieldDefinition( |
|
|
name="date", |
|
|
field_type="string", |
|
|
description="Document date", |
|
|
required=False, |
|
|
), |
|
|
FieldDefinition( |
|
|
name="summary", |
|
|
field_type="string", |
|
|
description="Brief summary of document content", |
|
|
required=True, |
|
|
), |
|
|
], |
|
|
) |
|
|
|
|
|
|
|
|
print("\nExtracting fields...") |
|
|
result = await agent.extract_fields(schema) |
|
|
|
|
|
print(f"\nExtracted Fields:") |
|
|
for field, value in result.fields.items(): |
|
|
print(f" {field}: {value}") |
|
|
|
|
|
print(f"\nConfidence: {result.confidence:.2f}") |
|
|
|
|
|
if result.evidence: |
|
|
print(f"\nEvidence ({len(result.evidence)} sources):") |
|
|
for ev in result.evidence[:3]: |
|
|
print(f" - Page {ev.page + 1}: {ev.snippet[:80]}...") |
|
|
|
|
|
|
|
|
async def example_classification(): |
|
|
"""Classify document type.""" |
|
|
print("\n" + "=" * 50) |
|
|
print("Document Classification") |
|
|
print("=" * 50) |
|
|
|
|
|
agent = DocumentAgent() |
|
|
|
|
|
sample_doc = Path("./data/sample.pdf") |
|
|
if not sample_doc.exists(): |
|
|
print("Sample document not found") |
|
|
return |
|
|
|
|
|
await agent.load_document(str(sample_doc)) |
|
|
|
|
|
|
|
|
print("\nClassifying document...") |
|
|
classification = await agent.classify() |
|
|
|
|
|
print(f"\nDocument Type: {classification.document_type.value}") |
|
|
print(f"Confidence: {classification.confidence:.2f}") |
|
|
print(f"Reasoning: {classification.reasoning}") |
|
|
|
|
|
if classification.metadata: |
|
|
print(f"\nAdditional metadata:") |
|
|
for key, value in classification.metadata.items(): |
|
|
print(f" {key}: {value}") |
|
|
|
|
|
|
|
|
async def example_question_answering(): |
|
|
"""Answer questions about document with evidence.""" |
|
|
print("\n" + "=" * 50) |
|
|
print("Question Answering with Evidence") |
|
|
print("=" * 50) |
|
|
|
|
|
agent = DocumentAgent() |
|
|
|
|
|
sample_doc = Path("./data/sample.pdf") |
|
|
if not sample_doc.exists(): |
|
|
print("Sample document not found") |
|
|
return |
|
|
|
|
|
await agent.load_document(str(sample_doc)) |
|
|
|
|
|
|
|
|
questions = [ |
|
|
"What is this document about?", |
|
|
"What are the main findings or conclusions?", |
|
|
"Are there any tables or figures? What do they show?", |
|
|
] |
|
|
|
|
|
for question in questions: |
|
|
print(f"\nQ: {question}") |
|
|
print("-" * 40) |
|
|
|
|
|
answer, evidence = await agent.answer_question(question) |
|
|
|
|
|
print(f"A: {answer}") |
|
|
|
|
|
if evidence: |
|
|
print(f"\nEvidence:") |
|
|
for ev in evidence[:2]: |
|
|
print(f" - Page {ev.page + 1} ({ev.source_type}): {ev.snippet[:60]}...") |
|
|
|
|
|
|
|
|
async def example_react_task(): |
|
|
"""Run a complex task with ReAct-style reasoning.""" |
|
|
print("\n" + "=" * 50) |
|
|
print("ReAct-style Task Execution") |
|
|
print("=" * 50) |
|
|
|
|
|
agent = DocumentAgent() |
|
|
|
|
|
sample_doc = Path("./data/sample.pdf") |
|
|
if not sample_doc.exists(): |
|
|
print("Sample document not found") |
|
|
return |
|
|
|
|
|
await agent.load_document(str(sample_doc)) |
|
|
|
|
|
|
|
|
task = """ |
|
|
Analyze this document and provide: |
|
|
1. A brief summary of the content |
|
|
2. The document type and purpose |
|
|
3. Any key data points or figures mentioned |
|
|
4. Your confidence in the analysis |
|
|
""" |
|
|
|
|
|
print(f"\nTask: {task}") |
|
|
print("-" * 40) |
|
|
|
|
|
|
|
|
result, trace = await agent.run(task) |
|
|
|
|
|
print(f"\nResult:\n{result}") |
|
|
|
|
|
print(f"\n--- Agent Trace ---") |
|
|
print(f"Steps: {len(trace.steps)}") |
|
|
print(f"Tools used: {trace.tools_used}") |
|
|
print(f"Total time: {trace.total_time:.2f}s") |
|
|
|
|
|
|
|
|
print(f"\nReasoning trace:") |
|
|
for i, step in enumerate(trace.steps[:5], 1): |
|
|
print(f"\n[Step {i}] {step.action}") |
|
|
if step.thought: |
|
|
print(f" Thought: {step.thought[:100]}...") |
|
|
if step.observation: |
|
|
print(f" Observation: {step.observation[:100]}...") |
|
|
|
|
|
|
|
|
async def main(): |
|
|
"""Run all examples.""" |
|
|
await example_basic_agent() |
|
|
await example_field_extraction() |
|
|
await example_classification() |
|
|
await example_question_answering() |
|
|
await example_react_task() |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(main()) |
|
|
|