Spaces:

MHamdan
/

SPARKNET

Sleeping

File size: 6,497 Bytes

d520909

"""
Example: DocumentAgent with ReAct-style Processing

Demonstrates:
1. Loading and processing documents
2. Field extraction with evidence
3. Document classification
4. Question answering with grounding
"""

import asyncio
from pathlib import Path
from loguru import logger

# Import DocumentAgent
from src.agents.document_agent import (
    DocumentAgent,
    AgentConfig,
)
from src.document.schemas.extraction import (
    ExtractionSchema,
    FieldDefinition,
)


async def example_basic_agent():
    """Basic agent usage."""
    print("=" * 50)
    print("Basic DocumentAgent Usage")
    print("=" * 50)

    # Create agent with custom config
    config = AgentConfig(
        default_model="llama3.2:3b",
        max_iterations=10,
        temperature=0.1,
    )
    agent = DocumentAgent(config)

    # Load document
    sample_doc = Path("./data/sample.pdf")
    if not sample_doc.exists():
        print(f"Sample document not found: {sample_doc}")
        print("Create a sample PDF at ./data/sample.pdf")
        return

    print(f"\nLoading document: {sample_doc}")
    await agent.load_document(str(sample_doc))

    print(f"Document loaded: {agent.document.metadata.filename}")
    print(f"Pages: {agent.document.metadata.num_pages}")
    print(f"Chunks: {len(agent.document.chunks)}")


async def example_field_extraction():
    """Extract structured fields with evidence."""
    print("\n" + "=" * 50)
    print("Field Extraction with Evidence")
    print("=" * 50)

    agent = DocumentAgent()

    sample_doc = Path("./data/sample.pdf")
    if not sample_doc.exists():
        print("Sample document not found")
        return

    await agent.load_document(str(sample_doc))

    # Define extraction schema
    schema = ExtractionSchema(
        name="document_info",
        description="Extract key document information",
        fields=[
            FieldDefinition(
                name="title",
                field_type="string",
                description="Document title",
                required=True,
            ),
            FieldDefinition(
                name="author",
                field_type="string",
                description="Document author or organization",
                required=False,
            ),
            FieldDefinition(
                name="date",
                field_type="string",
                description="Document date",
                required=False,
            ),
            FieldDefinition(
                name="summary",
                field_type="string",
                description="Brief summary of document content",
                required=True,
            ),
        ],
    )

    # Extract fields
    print("\nExtracting fields...")
    result = await agent.extract_fields(schema)

    print(f"\nExtracted Fields:")
    for field, value in result.fields.items():
        print(f"  {field}: {value}")

    print(f"\nConfidence: {result.confidence:.2f}")

    if result.evidence:
        print(f"\nEvidence ({len(result.evidence)} sources):")
        for ev in result.evidence[:3]:
            print(f"  - Page {ev.page + 1}: {ev.snippet[:80]}...")


async def example_classification():
    """Classify document type."""
    print("\n" + "=" * 50)
    print("Document Classification")
    print("=" * 50)

    agent = DocumentAgent()

    sample_doc = Path("./data/sample.pdf")
    if not sample_doc.exists():
        print("Sample document not found")
        return

    await agent.load_document(str(sample_doc))

    # Classify
    print("\nClassifying document...")
    classification = await agent.classify()

    print(f"\nDocument Type: {classification.document_type.value}")
    print(f"Confidence: {classification.confidence:.2f}")
    print(f"Reasoning: {classification.reasoning}")

    if classification.metadata:
        print(f"\nAdditional metadata:")
        for key, value in classification.metadata.items():
            print(f"  {key}: {value}")


async def example_question_answering():
    """Answer questions about document with evidence."""
    print("\n" + "=" * 50)
    print("Question Answering with Evidence")
    print("=" * 50)

    agent = DocumentAgent()

    sample_doc = Path("./data/sample.pdf")
    if not sample_doc.exists():
        print("Sample document not found")
        return

    await agent.load_document(str(sample_doc))

    # Questions to ask
    questions = [
        "What is this document about?",
        "What are the main findings or conclusions?",
        "Are there any tables or figures? What do they show?",
    ]

    for question in questions:
        print(f"\nQ: {question}")
        print("-" * 40)

        answer, evidence = await agent.answer_question(question)

        print(f"A: {answer}")

        if evidence:
            print(f"\nEvidence:")
            for ev in evidence[:2]:
                print(f"  - Page {ev.page + 1} ({ev.source_type}): {ev.snippet[:60]}...")


async def example_react_task():
    """Run a complex task with ReAct-style reasoning."""
    print("\n" + "=" * 50)
    print("ReAct-style Task Execution")
    print("=" * 50)

    agent = DocumentAgent()

    sample_doc = Path("./data/sample.pdf")
    if not sample_doc.exists():
        print("Sample document not found")
        return

    await agent.load_document(str(sample_doc))

    # Complex task
    task = """
    Analyze this document and provide:
    1. A brief summary of the content
    2. The document type and purpose
    3. Any key data points or figures mentioned
    4. Your confidence in the analysis
    """

    print(f"\nTask: {task}")
    print("-" * 40)

    # Run with trace
    result, trace = await agent.run(task)

    print(f"\nResult:\n{result}")

    print(f"\n--- Agent Trace ---")
    print(f"Steps: {len(trace.steps)}")
    print(f"Tools used: {trace.tools_used}")
    print(f"Total time: {trace.total_time:.2f}s")

    # Show thinking process
    print(f"\nReasoning trace:")
    for i, step in enumerate(trace.steps[:5], 1):
        print(f"\n[Step {i}] {step.action}")
        if step.thought:
            print(f"  Thought: {step.thought[:100]}...")
        if step.observation:
            print(f"  Observation: {step.observation[:100]}...")


async def main():
    """Run all examples."""
    await example_basic_agent()
    await example_field_extraction()
    await example_classification()
    await example_question_answering()
    await example_react_task()


if __name__ == "__main__":
    asyncio.run(main())