"""
Document type validation utility
Helps identify if uploaded documents are actually patents
"""
import re
from pathlib import Path
from typing import Tuple, List
from loguru import logger


class DocumentValidator:
    """Validate that uploaded documents are patents"""

    # Keywords that should appear in patent documents
    PATENT_KEYWORDS = [
        'patent', 'claim', 'claims', 'invention', 'abstract',
        'field of invention', 'background', 'detailed description',
        'inventor', 'assignee', 'filing date', 'application'
    ]

    # Required sections in patents
    REQUIRED_SECTIONS = ['abstract', 'claim']

    @staticmethod
    def validate_patent_document(text: str) -> Tuple[bool, List[str]]:
        """
        Validate if document text appears to be a patent

        Args:
            text: Extracted document text

        Returns:
            Tuple of (is_valid, issues_found)
        """
        text_lower = text.lower()
        issues = []

        # Check minimum length
        if len(text) < 500:
            issues.append("Document too short (< 500 characters)")

        # Check for patent keywords
        keyword_matches = sum(1 for kw in DocumentValidator.PATENT_KEYWORDS
                             if kw in text_lower)

        if keyword_matches < 3:
            issues.append(f"Only {keyword_matches} patent keywords found (expected at least 3)")

        # Check for required sections
        missing_sections = [section for section in DocumentValidator.REQUIRED_SECTIONS
                           if section not in text_lower]

        if missing_sections:
            issues.append(f"Missing required sections: {', '.join(missing_sections)}")

        # Check for claim structure (claims usually numbered)
        claim_pattern = r'claim\s+\d+'
        claims_found = len(re.findall(claim_pattern, text_lower))

        if claims_found == 0:
            issues.append("No numbered claims found")

        # Determine if valid
        is_valid = len(issues) == 0 or (keyword_matches >= 3 and claims_found > 0)

        if not is_valid:
            logger.warning(f"Document validation failed: {issues}")

        return is_valid, issues

    @staticmethod
    def identify_document_type(text: str) -> str:
        """
        Try to identify what type of document this is

        Returns:
            Document type description
        """
        text_lower = text.lower()

        # Check for common non-patent document types
        if 'microsoft' in text_lower and 'windows' in text_lower:
            return "Microsoft Windows documentation"

        if any(term in text_lower for term in ['press release', 'news', 'announcement']):
            return "Press release or news article"

        if any(term in text_lower for term in ['whitepaper', 'white paper', 'technical report']):
            return "Technical whitepaper or report"

        if any(term in text_lower for term in ['terms of service', 'privacy policy', 'license agreement']):
            return "Legal agreement or policy document"

        if 'research paper' in text_lower or 'ieee' in text_lower or 'conference' in text_lower:
            return "Academic research paper"

        # Check if it's a patent
        is_patent, _ = DocumentValidator.validate_patent_document(text)
        if is_patent:
            return "Patent document"

        return "Unknown document type (not a patent)"


def validate_and_log(text: str, document_name: str = "document") -> bool:
    """
    Convenience function to validate and log results

    Args:
        text: Document text
        document_name: Name of document for logging

    Returns:
        True if valid patent, False otherwise
    """
    is_valid, issues = DocumentValidator.validate_patent_document(text)

    if not is_valid:
        doc_type = DocumentValidator.identify_document_type(text)
        logger.error(f"❌ {document_name} is NOT a valid patent")
        logger.error(f"   Detected type: {doc_type}")
        logger.error(f"   Issues: {', '.join(issues)}")
        return False

    logger.success(f"✅ {document_name} appears to be a valid patent")
    return True