""" Example: DocumentAgent with ReAct-style Processing Demonstrates: 1. Loading and processing documents 2. Field extraction with evidence 3. Document classification 4. Question answering with grounding """ import asyncio from pathlib import Path from loguru import logger # Import DocumentAgent from src.agents.document_agent import ( DocumentAgent, AgentConfig, ) from src.document.schemas.extraction import ( ExtractionSchema, FieldDefinition, ) async def example_basic_agent(): """Basic agent usage.""" print("=" * 50) print("Basic DocumentAgent Usage") print("=" * 50) # Create agent with custom config config = AgentConfig( default_model="llama3.2:3b", max_iterations=10, temperature=0.1, ) agent = DocumentAgent(config) # Load document sample_doc = Path("./data/sample.pdf") if not sample_doc.exists(): print(f"Sample document not found: {sample_doc}") print("Create a sample PDF at ./data/sample.pdf") return print(f"\nLoading document: {sample_doc}") await agent.load_document(str(sample_doc)) print(f"Document loaded: {agent.document.metadata.filename}") print(f"Pages: {agent.document.metadata.num_pages}") print(f"Chunks: {len(agent.document.chunks)}") async def example_field_extraction(): """Extract structured fields with evidence.""" print("\n" + "=" * 50) print("Field Extraction with Evidence") print("=" * 50) agent = DocumentAgent() sample_doc = Path("./data/sample.pdf") if not sample_doc.exists(): print("Sample document not found") return await agent.load_document(str(sample_doc)) # Define extraction schema schema = ExtractionSchema( name="document_info", description="Extract key document information", fields=[ FieldDefinition( name="title", field_type="string", description="Document title", required=True, ), FieldDefinition( name="author", field_type="string", description="Document author or organization", required=False, ), FieldDefinition( name="date", field_type="string", description="Document date", required=False, ), FieldDefinition( name="summary", field_type="string", description="Brief summary of document content", required=True, ), ], ) # Extract fields print("\nExtracting fields...") result = await agent.extract_fields(schema) print(f"\nExtracted Fields:") for field, value in result.fields.items(): print(f" {field}: {value}") print(f"\nConfidence: {result.confidence:.2f}") if result.evidence: print(f"\nEvidence ({len(result.evidence)} sources):") for ev in result.evidence[:3]: print(f" - Page {ev.page + 1}: {ev.snippet[:80]}...") async def example_classification(): """Classify document type.""" print("\n" + "=" * 50) print("Document Classification") print("=" * 50) agent = DocumentAgent() sample_doc = Path("./data/sample.pdf") if not sample_doc.exists(): print("Sample document not found") return await agent.load_document(str(sample_doc)) # Classify print("\nClassifying document...") classification = await agent.classify() print(f"\nDocument Type: {classification.document_type.value}") print(f"Confidence: {classification.confidence:.2f}") print(f"Reasoning: {classification.reasoning}") if classification.metadata: print(f"\nAdditional metadata:") for key, value in classification.metadata.items(): print(f" {key}: {value}") async def example_question_answering(): """Answer questions about document with evidence.""" print("\n" + "=" * 50) print("Question Answering with Evidence") print("=" * 50) agent = DocumentAgent() sample_doc = Path("./data/sample.pdf") if not sample_doc.exists(): print("Sample document not found") return await agent.load_document(str(sample_doc)) # Questions to ask questions = [ "What is this document about?", "What are the main findings or conclusions?", "Are there any tables or figures? What do they show?", ] for question in questions: print(f"\nQ: {question}") print("-" * 40) answer, evidence = await agent.answer_question(question) print(f"A: {answer}") if evidence: print(f"\nEvidence:") for ev in evidence[:2]: print(f" - Page {ev.page + 1} ({ev.source_type}): {ev.snippet[:60]}...") async def example_react_task(): """Run a complex task with ReAct-style reasoning.""" print("\n" + "=" * 50) print("ReAct-style Task Execution") print("=" * 50) agent = DocumentAgent() sample_doc = Path("./data/sample.pdf") if not sample_doc.exists(): print("Sample document not found") return await agent.load_document(str(sample_doc)) # Complex task task = """ Analyze this document and provide: 1. A brief summary of the content 2. The document type and purpose 3. Any key data points or figures mentioned 4. Your confidence in the analysis """ print(f"\nTask: {task}") print("-" * 40) # Run with trace result, trace = await agent.run(task) print(f"\nResult:\n{result}") print(f"\n--- Agent Trace ---") print(f"Steps: {len(trace.steps)}") print(f"Tools used: {trace.tools_used}") print(f"Total time: {trace.total_time:.2f}s") # Show thinking process print(f"\nReasoning trace:") for i, step in enumerate(trace.steps[:5], 1): print(f"\n[Step {i}] {step.action}") if step.thought: print(f" Thought: {step.thought[:100]}...") if step.observation: print(f" Observation: {step.observation[:100]}...") async def main(): """Run all examples.""" await example_basic_agent() await example_field_extraction() await example_classification() await example_question_answering() await example_react_task() if __name__ == "__main__": asyncio.run(main())