File size: 2,320 Bytes
a9dc537
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
"""
Debug script to test document analysis extraction
"""
import asyncio
import sys
from pathlib import Path

# Add src to path
sys.path.insert(0, str(Path(__file__).parent))

from src.llm.langchain_ollama_client import get_langchain_client
from src.agents.scenario1.document_analysis_agent import DocumentAnalysisAgent
from loguru import logger

async def main():
    # Get a test patent path
    patent_path = "uploads/patents"  # We'll need to find an actual patent file

    # Find an actual patent file
    patent_files = list(Path(patent_path).glob("*.pdf"))
    if not patent_files:
        logger.error(f"No patent PDFs found in {patent_path}")
        return

    test_patent = str(patent_files[0])
    logger.info(f"Testing with patent: {test_patent}")

    # Initialize LLM client and agent
    llm_client = get_langchain_client(default_complexity='standard')
    agent = DocumentAnalysisAgent(llm_client)

    # Extract text
    logger.info("Step 1: Extracting text...")
    patent_text = await agent._extract_patent_text(test_patent)
    logger.info(f"Extracted text length: {len(patent_text)} characters")
    logger.info(f"First 500 chars: {patent_text[:500]}")

    # Test structure extraction
    logger.info("\nStep 2: Extracting structure...")
    from langchain_core.output_parsers import JsonOutputParser
    parser = JsonOutputParser()

    try:
        structure = await agent.structure_chain.ainvoke({
            "patent_text": patent_text[:8000],
            "format_instructions": parser.get_format_instructions()
        })

        logger.info(f"\nExtracted structure:")
        logger.info(f"  Title: {structure.get('title', 'NOT FOUND')}")
        logger.info(f"  Abstract: {structure.get('abstract', 'NOT FOUND')[:200] if structure.get('abstract') else 'NOT FOUND'}")
        logger.info(f"  Patent ID: {structure.get('patent_id', 'NOT FOUND')}")
        logger.info(f"  Independent claims: {len(structure.get('independent_claims', []))}")
        logger.info(f"  Dependent claims: {len(structure.get('dependent_claims', []))}")
        logger.info(f"\nFull structure keys: {structure.keys()}")

    except Exception as e:
        logger.error(f"Structure extraction failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    asyncio.run(main())