|
|
""" |
|
|
Debug script to test document analysis extraction |
|
|
""" |
|
|
import asyncio |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
sys.path.insert(0, str(Path(__file__).parent)) |
|
|
|
|
|
from src.llm.langchain_ollama_client import get_langchain_client |
|
|
from src.agents.scenario1.document_analysis_agent import DocumentAnalysisAgent |
|
|
from loguru import logger |
|
|
|
|
|
async def main(): |
|
|
|
|
|
patent_path = "uploads/patents" |
|
|
|
|
|
|
|
|
patent_files = list(Path(patent_path).glob("*.pdf")) |
|
|
if not patent_files: |
|
|
logger.error(f"No patent PDFs found in {patent_path}") |
|
|
return |
|
|
|
|
|
test_patent = str(patent_files[0]) |
|
|
logger.info(f"Testing with patent: {test_patent}") |
|
|
|
|
|
|
|
|
llm_client = get_langchain_client(default_complexity='standard') |
|
|
agent = DocumentAnalysisAgent(llm_client) |
|
|
|
|
|
|
|
|
logger.info("Step 1: Extracting text...") |
|
|
patent_text = await agent._extract_patent_text(test_patent) |
|
|
logger.info(f"Extracted text length: {len(patent_text)} characters") |
|
|
logger.info(f"First 500 chars: {patent_text[:500]}") |
|
|
|
|
|
|
|
|
logger.info("\nStep 2: Extracting structure...") |
|
|
from langchain_core.output_parsers import JsonOutputParser |
|
|
parser = JsonOutputParser() |
|
|
|
|
|
try: |
|
|
structure = await agent.structure_chain.ainvoke({ |
|
|
"patent_text": patent_text[:8000], |
|
|
"format_instructions": parser.get_format_instructions() |
|
|
}) |
|
|
|
|
|
logger.info(f"\nExtracted structure:") |
|
|
logger.info(f" Title: {structure.get('title', 'NOT FOUND')}") |
|
|
logger.info(f" Abstract: {structure.get('abstract', 'NOT FOUND')[:200] if structure.get('abstract') else 'NOT FOUND'}") |
|
|
logger.info(f" Patent ID: {structure.get('patent_id', 'NOT FOUND')}") |
|
|
logger.info(f" Independent claims: {len(structure.get('independent_claims', []))}") |
|
|
logger.info(f" Dependent claims: {len(structure.get('dependent_claims', []))}") |
|
|
logger.info(f"\nFull structure keys: {structure.keys()}") |
|
|
|
|
|
except Exception as e: |
|
|
logger.error(f"Structure extraction failed: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(main()) |
|
|
|