Spaces:

MHamdan
/

SPARKNET

Sleeping

File size: 63,911 Bytes

d520909

#!/usr/bin/env python3
"""
SPARKNET Progress Report & Future Work PDF Generator
Generates a comprehensive stakeholder presentation document.
"""

from reportlab.lib import colors
from reportlab.lib.pagesizes import A4, landscape
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch, cm
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY, TA_RIGHT
from reportlab.platypus import (
    SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
    PageBreak, Image, ListFlowable, ListItem, KeepTogether,
    Flowable, HRFlowable
)
from reportlab.graphics.shapes import Drawing, Rect, String, Line, Polygon
from reportlab.graphics.charts.barcharts import VerticalBarChart
from reportlab.graphics.charts.piecharts import Pie
from reportlab.graphics import renderPDF
from reportlab.pdfgen import canvas
from datetime import datetime
import os

# Color Scheme - Professional Blue Theme
PRIMARY_BLUE = colors.HexColor('#1e3a5f')
SECONDARY_BLUE = colors.HexColor('#2d5a87')
ACCENT_BLUE = colors.HexColor('#4a90d9')
LIGHT_BLUE = colors.HexColor('#e8f4fc')
SUCCESS_GREEN = colors.HexColor('#28a745')
WARNING_ORANGE = colors.HexColor('#fd7e14')
DANGER_RED = colors.HexColor('#dc3545')
GRAY_DARK = colors.HexColor('#343a40')
GRAY_LIGHT = colors.HexColor('#f8f9fa')
WHITE = colors.white


class DiagramFlowable(Flowable):
    """Custom flowable for drawing architecture diagrams."""

    def __init__(self, width, height, diagram_type='architecture'):
        Flowable.__init__(self)
        self.width = width
        self.height = height
        self.diagram_type = diagram_type

    def draw(self):
        if self.diagram_type == 'architecture':
            self._draw_architecture()
        elif self.diagram_type == 'rag_pipeline':
            self._draw_rag_pipeline()
        elif self.diagram_type == 'document_pipeline':
            self._draw_document_pipeline()
        elif self.diagram_type == 'agent_interaction':
            self._draw_agent_interaction()
        elif self.diagram_type == 'data_flow':
            self._draw_data_flow()

    def _draw_box(self, x, y, w, h, text, fill_color, text_color=WHITE, font_size=9):
        """Draw a rounded box with text."""
        self.canv.setFillColor(fill_color)
        self.canv.roundRect(x, y, w, h, 5, fill=1, stroke=0)
        self.canv.setFillColor(text_color)
        self.canv.setFont('Helvetica-Bold', font_size)
        # Center text
        text_width = self.canv.stringWidth(text, 'Helvetica-Bold', font_size)
        self.canv.drawString(x + (w - text_width) / 2, y + h/2 - 3, text)

    def _draw_arrow(self, x1, y1, x2, y2, color=GRAY_DARK):
        """Draw an arrow from (x1,y1) to (x2,y2)."""
        self.canv.setStrokeColor(color)
        self.canv.setLineWidth(2)
        self.canv.line(x1, y1, x2, y2)
        # Arrow head
        import math
        angle = math.atan2(y2-y1, x2-x1)
        arrow_len = 8
        self.canv.line(x2, y2, x2 - arrow_len * math.cos(angle - 0.4), y2 - arrow_len * math.sin(angle - 0.4))
        self.canv.line(x2, y2, x2 - arrow_len * math.cos(angle + 0.4), y2 - arrow_len * math.sin(angle + 0.4))

    def _draw_architecture(self):
        """Draw the high-level SPARKNET architecture."""
        # Title
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 12)
        self.canv.drawCentredString(self.width/2, self.height - 20, 'SPARKNET Architecture Overview')

        # User Layer
        self._draw_box(self.width/2 - 60, self.height - 70, 120, 35, 'User Interface', ACCENT_BLUE)

        # Demo Layer
        self.canv.setFillColor(LIGHT_BLUE)
        self.canv.roundRect(30, self.height - 160, self.width - 60, 70, 8, fill=1, stroke=0)
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 10)
        self.canv.drawString(40, self.height - 100, 'Streamlit Demo Application')

        # Demo pages
        pages = ['Live\nProcessing', 'Interactive\nRAG', 'Doc\nComparison', 'Evidence\nViewer', 'Doc\nViewer']
        page_width = (self.width - 100) / 5
        for i, page in enumerate(pages):
            x = 45 + i * page_width
            self._draw_box(x, self.height - 150, page_width - 10, 35, page.replace('\n', ' '), SECONDARY_BLUE, font_size=7)

        # Arrow from UI to Demo
        self._draw_arrow(self.width/2, self.height - 70, self.width/2, self.height - 90, ACCENT_BLUE)

        # Core Services Layer
        self.canv.setFillColor(LIGHT_BLUE)
        self.canv.roundRect(30, self.height - 280, self.width - 60, 100, 8, fill=1, stroke=0)
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 10)
        self.canv.drawString(40, self.height - 190, 'Core Services')

        # Core boxes
        self._draw_box(50, self.height - 230, 100, 30, 'Document Intel', PRIMARY_BLUE, font_size=8)
        self._draw_box(170, self.height - 230, 100, 30, 'Multi-Agent RAG', PRIMARY_BLUE, font_size=8)
        self._draw_box(290, self.height - 230, 100, 30, 'Vector Store', PRIMARY_BLUE, font_size=8)
        self._draw_box(410, self.height - 230, 80, 30, 'LLM Layer', PRIMARY_BLUE, font_size=8)

        # Sub-components
        self._draw_box(50, self.height - 270, 100, 30, 'OCR + Layout', SECONDARY_BLUE, font_size=7)
        self._draw_box(170, self.height - 270, 100, 30, '5 Agents', SECONDARY_BLUE, font_size=7)
        self._draw_box(290, self.height - 270, 100, 30, 'ChromaDB', SECONDARY_BLUE, font_size=7)
        self._draw_box(410, self.height - 270, 80, 30, 'Ollama', SECONDARY_BLUE, font_size=7)

        # Arrow from Demo to Core
        self._draw_arrow(self.width/2, self.height - 160, self.width/2, self.height - 180, ACCENT_BLUE)

        # Storage Layer
        self.canv.setFillColor(GRAY_LIGHT)
        self.canv.roundRect(30, self.height - 340, self.width - 60, 45, 8, fill=1, stroke=0)
        self.canv.setFillColor(GRAY_DARK)
        self.canv.setFont('Helvetica-Bold', 10)
        self.canv.drawString(40, self.height - 310, 'Persistent Storage')

        self._draw_box(150, self.height - 335, 80, 25, 'Embeddings', GRAY_DARK, font_size=7)
        self._draw_box(250, self.height - 335, 80, 25, 'Documents', GRAY_DARK, font_size=7)
        self._draw_box(350, self.height - 335, 80, 25, 'Cache', GRAY_DARK, font_size=7)

        # Arrow
        self._draw_arrow(self.width/2, self.height - 280, self.width/2, self.height - 295, GRAY_DARK)

    def _draw_rag_pipeline(self):
        """Draw the Multi-Agent RAG Pipeline."""
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 12)
        self.canv.drawCentredString(self.width/2, self.height - 20, 'Multi-Agent RAG Pipeline')

        # Query input
        self._draw_box(20, self.height - 70, 80, 30, 'User Query', ACCENT_BLUE, font_size=8)

        # Agents in sequence
        agents = [
            ('QueryPlanner', PRIMARY_BLUE, 'Intent Classification\nQuery Decomposition'),
            ('Retriever', SECONDARY_BLUE, 'Hybrid Search\nDense + Sparse'),
            ('Reranker', SECONDARY_BLUE, 'Cross-Encoder\nMMR Diversity'),
            ('Synthesizer', PRIMARY_BLUE, 'Answer Generation\nCitation Tracking'),
            ('Critic', WARNING_ORANGE, 'Hallucination Check\nValidation'),
        ]

        x_start = 120
        box_width = 80
        spacing = 10

        for i, (name, color, desc) in enumerate(agents):
            x = x_start + i * (box_width + spacing)
            self._draw_box(x, self.height - 70, box_width, 30, name, color, font_size=7)
            # Description below
            self.canv.setFillColor(GRAY_DARK)
            self.canv.setFont('Helvetica', 6)
            lines = desc.split('\n')
            for j, line in enumerate(lines):
                self.canv.drawCentredString(x + box_width/2, self.height - 85 - j*8, line)

            # Arrow to next
            if i < len(agents) - 1:
                self._draw_arrow(x + box_width, self.height - 55, x + box_width + spacing, self.height - 55, GRAY_DARK)

        # Arrow from query to first agent
        self._draw_arrow(100, self.height - 55, 120, self.height - 55, ACCENT_BLUE)

        # Revision loop
        self.canv.setStrokeColor(WARNING_ORANGE)
        self.canv.setLineWidth(1.5)
        self.canv.setDash(3, 3)
        # Draw curved line for revision
        critic_x = x_start + 4 * (box_width + spacing) + box_width
        synth_x = x_start + 3 * (box_width + spacing)
        self.canv.line(critic_x - 40, self.height - 100, synth_x + 40, self.height - 100)
        self.canv.setDash()

        self.canv.setFillColor(WARNING_ORANGE)
        self.canv.setFont('Helvetica-Oblique', 7)
        self.canv.drawCentredString((critic_x + synth_x)/2, self.height - 115, 'Revision Loop (if validation fails)')

        # Final output
        self._draw_box(critic_x + 20, self.height - 70, 80, 30, 'Response', SUCCESS_GREEN, font_size=8)
        self._draw_arrow(critic_x, self.height - 55, critic_x + 20, self.height - 55, SUCCESS_GREEN)

        # State tracking bar
        self.canv.setFillColor(LIGHT_BLUE)
        self.canv.roundRect(20, self.height - 160, self.width - 40, 35, 5, fill=1, stroke=0)
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 8)
        self.canv.drawString(30, self.height - 145, 'RAGState: Query → Plan → Retrieved Chunks → Reranked → Answer → Validation → Citations')

    def _draw_document_pipeline(self):
        """Draw Document Processing Pipeline."""
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 12)
        self.canv.drawCentredString(self.width/2, self.height - 20, 'Document Processing Pipeline')

        stages = [
            ('Input', 'PDF/Image\nUpload', ACCENT_BLUE),
            ('OCR', 'PaddleOCR\nTesseract', PRIMARY_BLUE),
            ('Layout', 'Region\nDetection', PRIMARY_BLUE),
            ('Reading\nOrder', 'Sequence\nReconstruction', SECONDARY_BLUE),
            ('Chunking', 'Semantic\nSplitting', SECONDARY_BLUE),
            ('Indexing', 'ChromaDB\nEmbedding', SUCCESS_GREEN),
        ]

        box_width = 70
        box_height = 45
        spacing = 15
        total_width = len(stages) * box_width + (len(stages) - 1) * spacing
        x_start = (self.width - total_width) / 2
        y_pos = self.height - 90

        for i, (name, desc, color) in enumerate(stages):
            x = x_start + i * (box_width + spacing)
            # Main box
            self._draw_box(x, y_pos, box_width, box_height, name.replace('\n', ' '), color, font_size=8)
            # Description
            self.canv.setFillColor(GRAY_DARK)
            self.canv.setFont('Helvetica', 6)
            lines = desc.split('\n')
            for j, line in enumerate(lines):
                self.canv.drawCentredString(x + box_width/2, y_pos - 15 - j*8, line)

            # Arrow
            if i < len(stages) - 1:
                self._draw_arrow(x + box_width, y_pos + box_height/2, x + box_width + spacing, y_pos + box_height/2)

        # Output description
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 9)
        self.canv.drawCentredString(self.width/2, self.height - 160, 'Output: ProcessedDocument with chunks, OCR regions, layout data, bounding boxes')

    def _draw_agent_interaction(self):
        """Draw Agent Interaction Diagram."""
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 12)
        self.canv.drawCentredString(self.width/2, self.height - 20, 'Agent Interaction & Data Flow')

        # Central orchestrator
        center_x, center_y = self.width/2, self.height/2 - 20
        self._draw_box(center_x - 50, center_y - 20, 100, 40, 'Orchestrator', PRIMARY_BLUE, font_size=9)

        # Surrounding agents
        import math
        agents = [
            ('QueryPlanner', -120, 60),
            ('Retriever', 0, 90),
            ('Reranker', 120, 60),
            ('Synthesizer', 120, -60),
            ('Critic', 0, -90),
        ]

        for name, dx, dy in agents:
            x = center_x + dx - 45
            y = center_y + dy - 15
            self._draw_box(x, y, 90, 30, name, SECONDARY_BLUE, font_size=8)
            # Arrow to/from orchestrator
            if dy > 0:
                self._draw_arrow(center_x, center_y + 20, center_x + dx*0.3, center_y + dy - 15, ACCENT_BLUE)
            else:
                self._draw_arrow(center_x + dx*0.3, center_y + dy + 15, center_x, center_y - 20, ACCENT_BLUE)

        # External connections
        # Vector Store
        self._draw_box(30, center_y - 15, 70, 30, 'ChromaDB', SUCCESS_GREEN, font_size=8)
        self._draw_arrow(100, center_y, center_x - 50, center_y, SUCCESS_GREEN)

        # LLM
        self._draw_box(self.width - 100, center_y - 15, 70, 30, 'Ollama LLM', WARNING_ORANGE, font_size=8)
        self._draw_arrow(self.width - 100, center_y, center_x + 50, center_y, WARNING_ORANGE)

    def _draw_data_flow(self):
        """Draw Data Flow Diagram."""
        self.canv.setFillColor(PRIMARY_BLUE)
        self.canv.setFont('Helvetica-Bold', 12)
        self.canv.drawCentredString(self.width/2, self.height - 20, 'End-to-End Data Flow')

        # Vertical flow
        items = [
            ('Document Upload', ACCENT_BLUE, 'PDF, Images, Text files'),
            ('Document Processor', PRIMARY_BLUE, 'OCR → Layout → Chunking'),
            ('State Manager', SECONDARY_BLUE, 'ProcessedDocument storage'),
            ('Embedder', SECONDARY_BLUE, 'mxbai-embed-large (1024d)'),
            ('ChromaDB', SUCCESS_GREEN, 'Vector indexing & storage'),
            ('RAG Query', WARNING_ORANGE, 'User question processing'),
            ('Multi-Agent Pipeline', PRIMARY_BLUE, '5-agent collaboration'),
            ('Response', SUCCESS_GREEN, 'Answer with citations'),
        ]

        box_height = 28
        spacing = 8
        total_height = len(items) * box_height + (len(items) - 1) * spacing
        y_start = self.height - 50
        box_width = 160
        x_center = self.width / 2 - box_width / 2

        for i, (name, color, desc) in enumerate(items):
            y = y_start - i * (box_height + spacing)
            self._draw_box(x_center, y - box_height, box_width, box_height, name, color, font_size=8)
            # Description on right
            self.canv.setFillColor(GRAY_DARK)
            self.canv.setFont('Helvetica', 7)
            self.canv.drawString(x_center + box_width + 15, y - box_height/2 - 3, desc)

            # Arrow
            if i < len(items) - 1:
                self._draw_arrow(x_center + box_width/2, y - box_height, x_center + box_width/2, y - box_height - spacing + 2)


def create_styles():
    """Create custom paragraph styles."""
    styles = getSampleStyleSheet()

    # Title style
    styles.add(ParagraphStyle(
        name='MainTitle',
        parent=styles['Title'],
        fontSize=28,
        textColor=PRIMARY_BLUE,
        spaceAfter=30,
        alignment=TA_CENTER,
        fontName='Helvetica-Bold'
    ))

    # Subtitle
    styles.add(ParagraphStyle(
        name='Subtitle',
        parent=styles['Normal'],
        fontSize=16,
        textColor=SECONDARY_BLUE,
        spaceAfter=20,
        alignment=TA_CENTER,
        fontName='Helvetica'
    ))

    # Section Header
    styles.add(ParagraphStyle(
        name='SectionHeader',
        parent=styles['Heading1'],
        fontSize=18,
        textColor=PRIMARY_BLUE,
        spaceBefore=25,
        spaceAfter=15,
        fontName='Helvetica-Bold',
        borderColor=ACCENT_BLUE,
        borderWidth=2,
        borderPadding=5,
    ))

    # Subsection Header
    styles.add(ParagraphStyle(
        name='SubsectionHeader',
        parent=styles['Heading2'],
        fontSize=14,
        textColor=SECONDARY_BLUE,
        spaceBefore=15,
        spaceAfter=10,
        fontName='Helvetica-Bold'
    ))

    # Body text
    styles.add(ParagraphStyle(
        name='CustomBody',
        parent=styles['Normal'],
        fontSize=10,
        textColor=GRAY_DARK,
        spaceAfter=8,
        alignment=TA_JUSTIFY,
        leading=14
    ))

    # Bullet style
    styles.add(ParagraphStyle(
        name='BulletText',
        parent=styles['Normal'],
        fontSize=10,
        textColor=GRAY_DARK,
        leftIndent=20,
        spaceAfter=5,
        leading=13
    ))

    # Caption
    styles.add(ParagraphStyle(
        name='Caption',
        parent=styles['Normal'],
        fontSize=9,
        textColor=GRAY_DARK,
        alignment=TA_CENTER,
        spaceAfter=15,
        fontName='Helvetica-Oblique'
    ))

    # Highlight box text
    styles.add(ParagraphStyle(
        name='HighlightText',
        parent=styles['Normal'],
        fontSize=10,
        textColor=PRIMARY_BLUE,
        spaceAfter=5,
        fontName='Helvetica-Bold'
    ))

    return styles


def create_highlight_box(text, styles, color=LIGHT_BLUE):
    """Create a highlighted text box."""
    data = [[Paragraph(text, styles['HighlightText'])]]
    table = Table(data, colWidths=[450])
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, -1), color),
        ('BOX', (0, 0), (-1, -1), 1, ACCENT_BLUE),
        ('PADDING', (0, 0), (-1, -1), 12),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
    ]))
    return table


def create_status_table(items, styles):
    """Create a status table with colored indicators."""
    data = [['Component', 'Status', 'Completion']]
    for item, status, completion in items:
        if status == 'Complete':
            status_color = SUCCESS_GREEN
        elif status == 'In Progress':
            status_color = WARNING_ORANGE
        else:
            status_color = DANGER_RED
        data.append([item, status, completion])

    table = Table(data, colWidths=[250, 100, 100])
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('ALIGN', (1, 0), (-1, -1), 'CENTER'),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 8),
    ]))
    return table


def create_metrics_table(metrics, styles):
    """Create a metrics display table."""
    data = []
    for metric, value, change in metrics:
        data.append([metric, value, change])

    table = Table(data, colWidths=[200, 150, 100])
    table.setStyle(TableStyle([
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 11),
        ('TEXTCOLOR', (1, 0), (1, -1), PRIMARY_BLUE),
        ('ALIGN', (1, 0), (-1, -1), 'CENTER'),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('PADDING', (0, 0), (-1, -1), 10),
        ('ROWBACKGROUNDS', (0, 0), (-1, -1), [LIGHT_BLUE, WHITE]),
    ]))
    return table


def generate_report():
    """Generate the complete SPARKNET progress report PDF."""

    filename = '/home/mhamdan/SPARKNET/docs/SPARKNET_Progress_Report.pdf'
    os.makedirs(os.path.dirname(filename), exist_ok=True)

    doc = SimpleDocTemplate(
        filename,
        pagesize=A4,
        rightMargin=50,
        leftMargin=50,
        topMargin=60,
        bottomMargin=60
    )

    styles = create_styles()
    story = []

    # ========== TITLE PAGE ==========
    story.append(Spacer(1, 100))
    story.append(Paragraph('SPARKNET', styles['MainTitle']))
    story.append(Paragraph('Multi-Agentic Document Intelligence Framework', styles['Subtitle']))
    story.append(Spacer(1, 30))
    story.append(Paragraph('Progress Report & Future Roadmap', styles['Subtitle']))
    story.append(Spacer(1, 50))

    # Version info box
    version_data = [
        ['Version', '1.0.0-beta'],
        ['Report Date', datetime.now().strftime('%B %d, %Y')],
        ['Document Type', 'Stakeholder Progress Report'],
        ['Classification', 'Internal / Confidential'],
    ]
    version_table = Table(version_data, colWidths=[150, 200])
    version_table.setStyle(TableStyle([
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('TEXTCOLOR', (0, 0), (-1, -1), GRAY_DARK),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('GRID', (0, 0), (-1, -1), 0.5, ACCENT_BLUE),
        ('PADDING', (0, 0), (-1, -1), 8),
        ('BACKGROUND', (0, 0), (-1, -1), LIGHT_BLUE),
    ]))
    story.append(version_table)

    story.append(PageBreak())

    # ========== TABLE OF CONTENTS ==========
    story.append(Paragraph('Table of Contents', styles['SectionHeader']))
    story.append(Spacer(1, 20))

    toc_items = [
        ('1. Executive Summary', '3'),
        ('2. Project Overview', '4'),
        ('3. Technical Architecture', '5'),
        ('4. Component Deep Dive', '8'),
        ('5. Current Progress & Achievements', '12'),
        ('6. Gap Analysis', '14'),
        ('7. Future Work & Roadmap', '17'),
        ('8. Risk Assessment', '20'),
        ('9. Resource Requirements', '21'),
        ('10. Conclusion & Recommendations', '22'),
    ]

    toc_data = [[Paragraph(f'<b>{item}</b>', styles['CustomBody']), page] for item, page in toc_items]
    toc_table = Table(toc_data, colWidths=[400, 50])
    toc_table.setStyle(TableStyle([
        ('FONTSIZE', (0, 0), (-1, -1), 11),
        ('ALIGN', (1, 0), (1, -1), 'RIGHT'),
        ('BOTTOMPADDING', (0, 0), (-1, -1), 8),
        ('LINEBELOW', (0, 0), (-1, -2), 0.5, colors.lightgrey),
    ]))
    story.append(toc_table)

    story.append(PageBreak())

    # ========== 1. EXECUTIVE SUMMARY ==========
    story.append(Paragraph('1. Executive Summary', styles['SectionHeader']))

    story.append(Paragraph(
        '''SPARKNET represents a next-generation document intelligence platform that combines
        advanced OCR capabilities, sophisticated layout analysis, and a state-of-the-art
        Multi-Agent Retrieval-Augmented Generation (RAG) system. This report provides a
        comprehensive overview of the project's current state, technical achievements,
        identified gaps, and the strategic roadmap for future development.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>Key Highlights</b>', styles['SubsectionHeader']))

    highlights = [
        '<b>Multi-Agent RAG Architecture:</b> Successfully implemented a 5-agent pipeline (QueryPlanner, Retriever, Reranker, Synthesizer, Critic) with self-correction capabilities.',
        '<b>Document Processing Pipeline:</b> Complete end-to-end document processing with OCR, layout detection, and semantic chunking.',
        '<b>Production-Ready Demo:</b> Fully functional Streamlit application with 5 interactive modules for document intelligence workflows.',
        '<b>Hallucination Detection:</b> Built-in validation and criticism system to ensure factual accuracy of generated responses.',
        '<b>Unified State Management:</b> Cross-module communication enabling seamless user experience across all application components.',
    ]

    for h in highlights:
        story.append(Paragraph(f'• {h}', styles['BulletText']))

    story.append(Spacer(1, 20))

    # Key Metrics
    story.append(Paragraph('<b>Current System Metrics</b>', styles['SubsectionHeader']))
    metrics = [
        ('RAG Pipeline Agents', '5 Specialized Agents', '✓ Complete'),
        ('Document Formats Supported', 'PDF, Images', '2 formats'),
        ('Vector Dimensions', '1024 (mxbai-embed-large)', 'Production'),
        ('Demo Application Pages', '5 Interactive Modules', '✓ Complete'),
        ('LLM Integration', 'Ollama (Local)', 'Self-hosted'),
    ]
    story.append(create_metrics_table(metrics, styles))

    story.append(PageBreak())

    # ========== 2. PROJECT OVERVIEW ==========
    story.append(Paragraph('2. Project Overview', styles['SectionHeader']))

    story.append(Paragraph('<b>2.1 Vision & Objectives</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''SPARKNET aims to revolutionize document intelligence by providing an integrated
        platform that can understand, process, and intelligently query complex documents.
        The system leverages cutting-edge AI techniques including multi-agent collaboration,
        hybrid retrieval, and sophisticated answer synthesis with built-in validation.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>Core Objectives:</b>', styles['CustomBody']))

    objectives = [
        '<b>Intelligent Document Understanding:</b> Extract and structure information from diverse document formats with high accuracy.',
        '<b>Conversational Intelligence:</b> Enable natural language querying over document collections with citation-backed responses.',
        '<b>Reliability & Trust:</b> Implement hallucination detection and self-correction to ensure factual accuracy.',
        '<b>Scalability:</b> Design for enterprise-scale document processing and retrieval workloads.',
        '<b>Extensibility:</b> Modular architecture allowing easy integration of new capabilities and models.',
    ]

    for obj in objectives:
        story.append(Paragraph(f'• {obj}', styles['BulletText']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>2.2 Target Use Cases</b>', styles['SubsectionHeader']))

    use_cases = [
        ['Use Case', 'Description', 'Status'],
        ['Legal Document Analysis', 'Contract review, clause extraction, compliance checking', 'Supported'],
        ['Research Paper Synthesis', 'Multi-paper querying, citation tracking, summary generation', 'Supported'],
        ['Technical Documentation', 'API docs, manuals, knowledge base querying', 'Supported'],
        ['Financial Reports', 'Annual reports, SEC filings, financial data extraction', 'Planned'],
        ['Medical Records', 'Clinical notes, diagnostic reports (HIPAA compliance needed)', 'Future'],
    ]

    uc_table = Table(use_cases, colWidths=[130, 230, 90])
    uc_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 6),
        ('ALIGN', (2, 0), (2, -1), 'CENTER'),
    ]))
    story.append(uc_table)

    story.append(PageBreak())

    # ========== 3. TECHNICAL ARCHITECTURE ==========
    story.append(Paragraph('3. Technical Architecture', styles['SectionHeader']))

    story.append(Paragraph('<b>3.1 High-Level Architecture</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''SPARKNET follows a layered microservices-inspired architecture with clear separation
        of concerns. The system is organized into presentation, service, and persistence layers,
        with a central orchestration mechanism coordinating multi-agent workflows.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # Architecture Diagram
    arch_diagram = DiagramFlowable(500, 350, 'architecture')
    story.append(arch_diagram)
    story.append(Paragraph('Figure 1: SPARKNET High-Level Architecture', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>3.2 Multi-Agent RAG Pipeline</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The heart of SPARKNET is its Multi-Agent RAG system, which orchestrates five
        specialized agents in a sophisticated pipeline with self-correction capabilities.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # RAG Pipeline Diagram
    rag_diagram = DiagramFlowable(500, 180, 'rag_pipeline')
    story.append(rag_diagram)
    story.append(Paragraph('Figure 2: Multi-Agent RAG Pipeline with Revision Loop', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>3.3 Document Processing Pipeline</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''Documents undergo a multi-stage processing pipeline that extracts text, identifies
        layout structure, establishes reading order, and creates semantically coherent chunks
        optimized for retrieval.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # Document Pipeline Diagram
    doc_diagram = DiagramFlowable(500, 180, 'document_pipeline')
    story.append(doc_diagram)
    story.append(Paragraph('Figure 3: Document Processing Pipeline', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>3.4 Agent Interaction Model</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The orchestrator coordinates all agents, managing state transitions and ensuring
        proper data flow between components. External services (Vector Store, LLM) are
        accessed through well-defined interfaces.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # Agent Interaction Diagram
    agent_diagram = DiagramFlowable(500, 250, 'agent_interaction')
    story.append(agent_diagram)
    story.append(Paragraph('Figure 4: Agent Interaction Model', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>3.5 Data Flow Architecture</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The end-to-end data flow illustrates how documents are processed from upload
        through indexing, and how queries are handled through the multi-agent pipeline
        to produce validated, citation-backed responses.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # Data Flow Diagram
    flow_diagram = DiagramFlowable(500, 320, 'data_flow')
    story.append(flow_diagram)
    story.append(Paragraph('Figure 5: End-to-End Data Flow', styles['Caption']))

    story.append(PageBreak())

    # ========== 4. COMPONENT DEEP DIVE ==========
    story.append(Paragraph('4. Component Deep Dive', styles['SectionHeader']))

    story.append(Paragraph('<b>4.1 Query Planning Agent</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The QueryPlannerAgent is responsible for understanding user intent, classifying
        query types, and decomposing complex queries into manageable sub-queries.''',
        styles['CustomBody']
    ))

    # Query types table
    query_types = [
        ['Intent Type', 'Description', 'Example'],
        ['FACTOID', 'Simple fact lookup', '"What is the revenue for Q4?"'],
        ['COMPARISON', 'Multi-entity comparison', '"Compare product A vs B features"'],
        ['AGGREGATION', 'Cross-document summary', '"Summarize all quarterly reports"'],
        ['CAUSAL', 'Why/how explanations', '"Why did revenue decline?"'],
        ['PROCEDURAL', 'Step-by-step instructions', '"How to configure the system?"'],
        ['MULTI_HOP', 'Multi-step reasoning', '"Which supplier has the lowest cost for product X?"'],
    ]

    qt_table = Table(query_types, colWidths=[90, 180, 180])
    qt_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
    ]))
    story.append(qt_table)
    story.append(Paragraph('Table 1: Supported Query Intent Types', styles['Caption']))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>4.2 Hybrid Retrieval System</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The RetrieverAgent implements a sophisticated hybrid search combining dense
        semantic retrieval with sparse keyword matching, using Reciprocal Rank Fusion (RRF)
        to merge results optimally.''',
        styles['CustomBody']
    ))

    retrieval_features = [
        '<b>Dense Retrieval:</b> Embedding-based semantic search using mxbai-embed-large (1024 dimensions)',
        '<b>Sparse Retrieval:</b> BM25-style keyword matching for precise term matching',
        '<b>RRF Fusion:</b> Combines rankings using formula: RRF = Σ(1 / (k + rank))',
        '<b>Intent-Adaptive Weights:</b> Adjusts dense/sparse balance based on query type (e.g., 80/20 for definitions, 50/50 for comparisons)',
    ]

    for feat in retrieval_features:
        story.append(Paragraph(f'• {feat}', styles['BulletText']))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>4.3 Cross-Encoder Reranking</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The RerankerAgent applies LLM-based cross-encoder scoring to refine retrieval
        results, implementing deduplication and Maximal Marginal Relevance (MMR) for
        diversity promotion.''',
        styles['CustomBody']
    ))

    reranker_config = [
        ['Parameter', 'Value', 'Purpose'],
        ['top_k', '5', 'Final result count'],
        ['min_relevance_score', '0.3', 'Quality threshold'],
        ['dedup_threshold', '0.9', 'Similarity for duplicate detection'],
        ['MMR lambda', '0.7', 'Relevance vs diversity balance'],
    ]

    rr_table = Table(reranker_config, colWidths=[140, 80, 230])
    rr_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('PADDING', (0, 0), (-1, -1), 6),
    ]))
    story.append(rr_table)
    story.append(Paragraph('Table 2: Reranker Configuration', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>4.4 Answer Synthesis</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The SynthesizerAgent generates comprehensive answers with automatic citation
        tracking, supporting multiple output formats and implementing intelligent abstention
        when evidence is insufficient.''',
        styles['CustomBody']
    ))

    story.append(Paragraph('<b>Supported Answer Formats:</b>', styles['CustomBody']))
    formats = ['PROSE - Flowing paragraph narrative', 'BULLET_POINTS - Enumerated key points',
               'TABLE - Comparative tabular format', 'STEP_BY_STEP - Procedural instructions']
    for fmt in formats:
        story.append(Paragraph(f'• {fmt}', styles['BulletText']))

    story.append(Paragraph('<b>Confidence Calculation:</b>', styles['CustomBody']))
    story.append(Paragraph('confidence = 0.5 × source_relevance + 0.3 × source_count_factor + 0.2 × consistency', styles['BulletText']))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>4.5 Validation & Hallucination Detection</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The CriticAgent performs comprehensive validation including hallucination detection,
        citation verification, and factual consistency checking. It can trigger revision
        cycles when issues are detected.''',
        styles['CustomBody']
    ))

    issue_types = [
        ['Issue Type', 'Description', 'Severity'],
        ['HALLUCINATION', 'Information not supported by sources', 'Critical'],
        ['UNSUPPORTED_CLAIM', 'Statement without citation', 'High'],
        ['INCORRECT_CITATION', 'Citation references wrong source', 'High'],
        ['CONTRADICTION', 'Internal inconsistency in answer', 'Medium'],
        ['INCOMPLETE', 'Missing important information', 'Medium'],
        ['FACTUAL_ERROR', 'Verifiable factual mistake', 'Critical'],
    ]

    it_table = Table(issue_types, colWidths=[130, 230, 90])
    it_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
    ]))
    story.append(it_table)
    story.append(Paragraph('Table 3: Validation Issue Types', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>4.6 Document Processing Components</b>', styles['SubsectionHeader']))

    story.append(Paragraph('<b>OCR Engines:</b>', styles['CustomBody']))
    ocr_comparison = [
        ['Feature', 'PaddleOCR', 'Tesseract'],
        ['GPU Acceleration', '✓ Yes', '✗ No'],
        ['Multi-language', '✓ 80+ languages', '✓ 100+ languages'],
        ['Accuracy (Clean)', '~95%', '~90%'],
        ['Accuracy (Complex)', '~85%', '~75%'],
        ['Speed', 'Fast', 'Moderate'],
        ['Confidence Scores', '✓ Per-region', '✓ Per-word'],
    ]

    ocr_table = Table(ocr_comparison, colWidths=[130, 160, 160])
    ocr_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('PADDING', (0, 0), (-1, -1), 5),
    ]))
    story.append(ocr_table)
    story.append(Paragraph('Table 4: OCR Engine Comparison', styles['Caption']))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>Layout Detection:</b>', styles['CustomBody']))
    layout_types = ['TEXT, TITLE, HEADING, PARAGRAPH - Text regions',
                    'TABLE, FIGURE, CHART - Visual elements',
                    'CAPTION, FOOTNOTE - Supplementary text',
                    'HEADER, FOOTER - Page elements',
                    'FORMULA - Mathematical expressions']
    for lt in layout_types:
        story.append(Paragraph(f'• {lt}', styles['BulletText']))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>Chunking Configuration:</b>', styles['CustomBody']))
    chunk_config = [
        ['Parameter', 'Default', 'Description'],
        ['max_chunk_chars', '1000', 'Maximum characters per chunk'],
        ['min_chunk_chars', '50', 'Minimum viable chunk size'],
        ['overlap_chars', '100', 'Overlap between consecutive chunks'],
        ['Strategy', 'Semantic', 'Respects layout boundaries'],
    ]

    cc_table = Table(chunk_config, colWidths=[120, 80, 250])
    cc_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('PADDING', (0, 0), (-1, -1), 5),
    ]))
    story.append(cc_table)
    story.append(Paragraph('Table 5: Chunking Configuration', styles['Caption']))

    story.append(PageBreak())

    # ========== 5. CURRENT PROGRESS ==========
    story.append(Paragraph('5. Current Progress & Achievements', styles['SectionHeader']))

    story.append(Paragraph('<b>5.1 Development Milestones</b>', styles['SubsectionHeader']))

    milestones = [
        ['Milestone', 'Status', 'Completion'],
        ['Core RAG Pipeline', 'Complete', '100%'],
        ['5-Agent Architecture', 'Complete', '100%'],
        ['Document Processing Pipeline', 'Complete', '100%'],
        ['ChromaDB Integration', 'Complete', '100%'],
        ['Ollama LLM Integration', 'Complete', '100%'],
        ['Streamlit Demo Application', 'Complete', '100%'],
        ['State Management System', 'Complete', '100%'],
        ['Hallucination Detection', 'Complete', '100%'],
        ['PDF Processing', 'Complete', '100%'],
        ['Self-Correction Loop', 'Complete', '100%'],
    ]

    ms_table = Table(milestones, colWidths=[220, 120, 110])
    ms_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 6),
        ('ALIGN', (1, 0), (-1, -1), 'CENTER'),
    ]))
    story.append(ms_table)
    story.append(Paragraph('Table 6: Development Milestones', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>5.2 Demo Application Features</b>', styles['SubsectionHeader']))

    demo_features = [
        ['Page', 'Features', 'Status'],
        ['Live Processing', 'Real-time document processing, progress tracking, auto-indexing', '✓ Complete'],
        ['Interactive RAG', 'Query interface, document filtering, chunk preview, citations', '✓ Complete'],
        ['Document Comparison', 'Semantic similarity, structure analysis, content diff', '✓ Complete'],
        ['Evidence Viewer', 'Confidence coloring, bounding boxes, OCR regions, export', '✓ Complete'],
        ['Document Viewer', 'Multi-tab view, chunk display, layout visualization', '✓ Complete'],
    ]

    df_table = Table(demo_features, colWidths=[110, 270, 70])
    df_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
        ('ALIGN', (2, 0), (2, -1), 'CENTER'),
    ]))
    story.append(df_table)
    story.append(Paragraph('Table 7: Demo Application Features', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>5.3 Technical Achievements</b>', styles['SubsectionHeader']))

    achievements = [
        '<b>Hybrid Retrieval:</b> Successfully combined dense and sparse retrieval with RRF fusion, achieving better recall than either method alone.',
        '<b>Self-Correction:</b> Implemented revision loop allowing the system to automatically fix issues detected by the Critic agent.',
        '<b>Citation Tracking:</b> Automatic citation generation with [N] notation linking answers to source documents.',
        '<b>Confidence Scoring:</b> Multi-factor confidence calculation providing transparency into answer reliability.',
        '<b>Streaming Support:</b> Real-time response streaming for improved user experience during long generations.',
        '<b>Cross-Module Communication:</b> Unified state manager enabling seamless navigation between application modules.',
    ]

    for ach in achievements:
        story.append(Paragraph(f'• {ach}', styles['BulletText']))

    story.append(PageBreak())

    # ========== 6. GAP ANALYSIS ==========
    story.append(Paragraph('6. Gap Analysis', styles['SectionHeader']))

    story.append(Paragraph(
        '''This section identifies current limitations and gaps in the SPARKNET system
        that represent opportunities for improvement and future development.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>6.1 Functional Gaps</b>', styles['SubsectionHeader']))

    functional_gaps = [
        ['Gap ID', 'Category', 'Description', 'Impact', 'Priority'],
        ['FG-001', 'Document Support', 'Limited to PDF and images; no Word, Excel, PowerPoint support', 'High', 'P1'],
        ['FG-002', 'Table Extraction', 'Table structure not preserved during chunking', 'High', 'P1'],
        ['FG-003', 'Multi-modal', 'No image/chart understanding within documents', 'Medium', 'P2'],
        ['FG-004', 'Languages', 'Primarily English; limited multi-language support', 'Medium', 'P2'],
        ['FG-005', 'Batch Processing', 'No bulk document upload/processing capability', 'Medium', 'P2'],
        ['FG-006', 'Document Updates', 'No incremental update; full reprocessing required', 'Medium', 'P2'],
        ['FG-007', 'User Feedback', 'No mechanism to learn from user corrections', 'Low', 'P3'],
    ]

    fg_table = Table(functional_gaps, colWidths=[50, 85, 200, 55, 55])
    fg_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DANGER_RED),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
        ('ALIGN', (0, 0), (0, -1), 'CENTER'),
        ('ALIGN', (3, 0), (-1, -1), 'CENTER'),
    ]))
    story.append(fg_table)
    story.append(Paragraph('Table 8: Functional Gaps', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>6.2 Technical Gaps</b>', styles['SubsectionHeader']))

    technical_gaps = [
        ['Gap ID', 'Category', 'Description', 'Impact', 'Priority'],
        ['TG-001', 'Scalability', 'Single-node architecture; no distributed processing', 'High', 'P1'],
        ['TG-002', 'Authentication', 'No user authentication or access control', 'High', 'P1'],
        ['TG-003', 'API', 'No REST API for external integration', 'High', 'P1'],
        ['TG-004', 'Caching', 'Limited query result caching; redundant LLM calls', 'Medium', 'P2'],
        ['TG-005', 'Monitoring', 'Basic logging only; no metrics/alerting system', 'Medium', 'P2'],
        ['TG-006', 'Testing', 'Limited test coverage; no integration tests', 'Medium', 'P2'],
        ['TG-007', 'Cloud Deploy', 'Not containerized; no Kubernetes manifests', 'Medium', 'P2'],
        ['TG-008', 'GPU Sharing', 'Single GPU utilization; no multi-GPU support', 'Low', 'P3'],
    ]

    tg_table = Table(technical_gaps, colWidths=[50, 80, 205, 55, 55])
    tg_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
        ('ALIGN', (0, 0), (0, -1), 'CENTER'),
        ('ALIGN', (3, 0), (-1, -1), 'CENTER'),
    ]))
    story.append(tg_table)
    story.append(Paragraph('Table 9: Technical Gaps', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>6.3 Performance Gaps</b>', styles['SubsectionHeader']))

    perf_gaps = [
        ['Gap ID', 'Metric', 'Current', 'Target', 'Gap'],
        ['PG-001', 'Query Latency (simple)', '3-5 seconds', '<2 seconds', '~2x improvement needed'],
        ['PG-002', 'Query Latency (complex)', '10-20 seconds', '<5 seconds', '~3x improvement needed'],
        ['PG-003', 'Document Processing', '30-60 sec/page', '<10 sec/page', '~4x improvement needed'],
        ['PG-004', 'Concurrent Users', '1-5', '50+', 'Major scaling required'],
        ['PG-005', 'Index Size', '10K chunks', '1M+ chunks', 'Architecture redesign'],
        ['PG-006', 'Accuracy (hallucination)', '~85%', '>95%', '~10% improvement'],
    ]

    pg_table = Table(perf_gaps, colWidths=[50, 120, 90, 90, 100])
    pg_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ]))
    story.append(pg_table)
    story.append(Paragraph('Table 10: Performance Gaps', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>6.4 Security & Compliance Gaps</b>', styles['SubsectionHeader']))

    security_gaps = [
        '<b>No Authentication:</b> Currently no user login or session management',
        '<b>No Authorization:</b> Missing role-based access control (RBAC) for documents',
        '<b>Data Encryption:</b> Documents and embeddings stored unencrypted at rest',
        '<b>Audit Logging:</b> No comprehensive audit trail for compliance requirements',
        '<b>PII Detection:</b> No automatic detection/redaction of personally identifiable information',
        '<b>GDPR/HIPAA:</b> Not compliant with major data protection regulations',
    ]

    for sg in security_gaps:
        story.append(Paragraph(f'• {sg}', styles['BulletText']))

    story.append(PageBreak())

    # ========== 7. FUTURE WORK & ROADMAP ==========
    story.append(Paragraph('7. Future Work & Roadmap', styles['SectionHeader']))

    story.append(Paragraph('<b>7.1 Strategic Roadmap Overview</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''The SPARKNET roadmap is organized into three phases, each building upon the
        previous to transform the current prototype into a production-ready enterprise
        solution.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))

    # Roadmap phases
    roadmap = [
        ['Phase', 'Timeline', 'Focus Areas', 'Key Deliverables'],
        ['Phase 1:\nFoundation', 'Q1-Q2 2026',
         'Stability, Core Features,\nBasic Security',
         '• REST API\n• Authentication\n• Extended document formats\n• Basic containerization'],
        ['Phase 2:\nScale', 'Q3-Q4 2026',
         'Performance, Scalability,\nEnterprise Features',
         '• Distributed processing\n• Advanced caching\n• Multi-tenancy\n• Monitoring & alerting'],
        ['Phase 3:\nInnovation', 'Q1-Q2 2027',
         'Advanced AI, Compliance,\nEcosystem',
         '• Multi-modal understanding\n• Compliance frameworks\n• Plugin architecture\n• Advanced analytics'],
    ]

    rm_table = Table(roadmap, colWidths=[70, 80, 130, 170])
    rm_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [LIGHT_BLUE, WHITE]),
        ('PADDING', (0, 0), (-1, -1), 6),
        ('VALIGN', (0, 0), (-1, -1), 'TOP'),
    ]))
    story.append(rm_table)
    story.append(Paragraph('Table 11: Strategic Roadmap', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>7.2 Phase 1: Foundation (Q1-Q2 2026)</b>', styles['SubsectionHeader']))

    phase1_items = [
        ['Item', 'Description', 'Effort', 'Dependencies'],
        ['REST API Development', 'FastAPI-based API for all core functions', '4 weeks', 'None'],
        ['User Authentication', 'JWT-based auth with OAuth2 support', '3 weeks', 'API'],
        ['Document Format Extension', 'Add Word, Excel, PowerPoint support', '4 weeks', 'None'],
        ['Table Extraction', 'Preserve table structure in processing', '3 weeks', 'None'],
        ['Docker Containerization', 'Production-ready Docker images', '2 weeks', 'None'],
        ['Basic CI/CD Pipeline', 'Automated testing and deployment', '2 weeks', 'Docker'],
        ['Query Result Caching', 'Redis-based caching layer', '2 weeks', 'API'],
        ['Unit Test Coverage', 'Achieve 80% code coverage', '3 weeks', 'Ongoing'],
    ]

    p1_table = Table(phase1_items, colWidths=[130, 180, 60, 80])
    p1_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SUCCESS_GREEN),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
    ]))
    story.append(p1_table)
    story.append(Paragraph('Table 12: Phase 1 Deliverables', styles['Caption']))

    story.append(PageBreak())

    story.append(Paragraph('<b>7.3 Phase 2: Scale (Q3-Q4 2026)</b>', styles['SubsectionHeader']))

    phase2_items = [
        ['Item', 'Description', 'Effort', 'Dependencies'],
        ['Distributed Processing', 'Celery/Ray for parallel document processing', '6 weeks', 'Phase 1'],
        ['Vector Store Scaling', 'Milvus/Pinecone for large-scale indices', '4 weeks', 'Phase 1'],
        ['Multi-tenancy', 'Organization-based data isolation', '4 weeks', 'Auth'],
        ['Kubernetes Deployment', 'Full K8s manifests and Helm charts', '3 weeks', 'Docker'],
        ['Monitoring Stack', 'Prometheus, Grafana, ELK integration', '3 weeks', 'K8s'],
        ['Batch Processing', 'Bulk document upload and processing', '3 weeks', 'Distributed'],
        ['Advanced Caching', 'Semantic caching for similar queries', '3 weeks', 'Cache'],
        ['Performance Optimization', 'Achieve <2s simple query latency', '4 weeks', 'Caching'],
    ]

    p2_table = Table(phase2_items, colWidths=[130, 180, 60, 80])
    p2_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
    ]))
    story.append(p2_table)
    story.append(Paragraph('Table 13: Phase 2 Deliverables', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>7.4 Phase 3: Innovation (Q1-Q2 2027)</b>', styles['SubsectionHeader']))

    phase3_items = [
        ['Item', 'Description', 'Effort', 'Dependencies'],
        ['Multi-modal Understanding', 'GPT-4V/Claude Vision for image analysis', '6 weeks', 'Phase 2'],
        ['Advanced Table QA', 'SQL-like queries over extracted tables', '4 weeks', 'Table Extract'],
        ['PII Detection/Redaction', 'Automatic sensitive data handling', '4 weeks', 'None'],
        ['Compliance Framework', 'GDPR, HIPAA, SOC2 compliance', '8 weeks', 'PII'],
        ['Plugin Architecture', 'Extensible agent and tool system', '4 weeks', 'Phase 2'],
        ['Analytics Dashboard', 'Usage analytics and insights', '3 weeks', 'Monitoring'],
        ['Multi-language Support', 'Full support for top 10 languages', '4 weeks', 'None'],
        ['Feedback Learning', 'Learn from user corrections', '4 weeks', 'Analytics'],
    ]

    p3_table = Table(phase3_items, colWidths=[130, 180, 60, 80])
    p3_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), ACCENT_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 4),
    ]))
    story.append(p3_table)
    story.append(Paragraph('Table 14: Phase 3 Deliverables', styles['Caption']))

    story.append(PageBreak())

    # ========== 8. RISK ASSESSMENT ==========
    story.append(Paragraph('8. Risk Assessment', styles['SectionHeader']))

    story.append(Paragraph('<b>8.1 Technical Risks</b>', styles['SubsectionHeader']))

    tech_risks = [
        ['Risk', 'Probability', 'Impact', 'Mitigation'],
        ['LLM API Changes', 'Medium', 'High', 'Abstract LLM interface; support multiple providers'],
        ['Scaling Bottlenecks', 'High', 'High', 'Early load testing; phased rollout'],
        ['Model Accuracy Plateau', 'Medium', 'Medium', 'Ensemble approaches; fine-tuning capability'],
        ['Dependency Vulnerabilities', 'Medium', 'Medium', 'Regular dependency audits; Dependabot'],
        ['Data Loss', 'Low', 'Critical', 'Automated backups; disaster recovery plan'],
    ]

    tr_table = Table(tech_risks, colWidths=[120, 70, 70, 190])
    tr_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), DANGER_RED),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
        ('ALIGN', (1, 0), (2, -1), 'CENTER'),
    ]))
    story.append(tr_table)
    story.append(Paragraph('Table 15: Technical Risks', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>8.2 Project Risks</b>', styles['SubsectionHeader']))

    proj_risks = [
        ['Risk', 'Probability', 'Impact', 'Mitigation'],
        ['Scope Creep', 'High', 'Medium', 'Strict phase gates; change control process'],
        ['Resource Constraints', 'Medium', 'High', 'Prioritized backlog; MVP focus'],
        ['Timeline Slippage', 'Medium', 'Medium', 'Buffer time; parallel workstreams'],
        ['Knowledge Silos', 'Medium', 'Medium', 'Documentation; pair programming; code reviews'],
        ['Stakeholder Alignment', 'Low', 'High', 'Regular demos; feedback cycles'],
    ]

    pr_table = Table(proj_risks, colWidths=[120, 70, 70, 190])
    pr_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
        ('ALIGN', (1, 0), (2, -1), 'CENTER'),
    ]))
    story.append(pr_table)
    story.append(Paragraph('Table 16: Project Risks', styles['Caption']))

    story.append(PageBreak())

    # ========== 9. RESOURCE REQUIREMENTS ==========
    story.append(Paragraph('9. Resource Requirements', styles['SectionHeader']))

    story.append(Paragraph('<b>9.1 Team Structure (Recommended)</b>', styles['SubsectionHeader']))

    team = [
        ['Role', 'Count', 'Phase 1', 'Phase 2', 'Phase 3'],
        ['Senior ML Engineer', '2', '✓', '✓', '✓'],
        ['Backend Developer', '2', '✓', '✓', '✓'],
        ['Frontend Developer', '1', '✓', '✓', '✓'],
        ['DevOps Engineer', '1', '✓', '✓', '✓'],
        ['QA Engineer', '1', '—', '✓', '✓'],
        ['Technical Lead', '1', '✓', '✓', '✓'],
        ['Product Manager', '1', '✓', '✓', '✓'],
    ]

    team_table = Table(team, colWidths=[130, 60, 70, 70, 70])
    team_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 9),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 6),
        ('ALIGN', (1, 0), (-1, -1), 'CENTER'),
    ]))
    story.append(team_table)
    story.append(Paragraph('Table 17: Team Structure', styles['Caption']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>9.2 Infrastructure Requirements</b>', styles['SubsectionHeader']))

    infra = [
        ['Component', 'Development', 'Staging', 'Production'],
        ['GPU Servers', '1x A100 40GB', '2x A100 40GB', '4x A100 80GB'],
        ['CPU Servers', '4 vCPU, 16GB', '8 vCPU, 32GB', '16 vCPU, 64GB x3'],
        ['Storage', '500GB SSD', '2TB SSD', '10TB SSD + S3'],
        ['Vector DB', 'ChromaDB local', 'Milvus single', 'Milvus cluster'],
        ['Cache', 'In-memory', 'Redis single', 'Redis cluster'],
        ['Load Balancer', 'None', 'Nginx', 'AWS ALB / GCP LB'],
    ]

    infra_table = Table(infra, colWidths=[100, 120, 120, 110])
    infra_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
        ('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTSIZE', (0, 0), (-1, -1), 8),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
        ('PADDING', (0, 0), (-1, -1), 5),
    ]))
    story.append(infra_table)
    story.append(Paragraph('Table 18: Infrastructure Requirements', styles['Caption']))

    story.append(PageBreak())

    # ========== 10. CONCLUSION ==========
    story.append(Paragraph('10. Conclusion & Recommendations', styles['SectionHeader']))

    story.append(Paragraph('<b>10.1 Summary</b>', styles['SubsectionHeader']))
    story.append(Paragraph(
        '''SPARKNET has achieved significant progress as a proof-of-concept for multi-agentic
        document intelligence. The core RAG pipeline is functional, demonstrating the viability
        of the 5-agent architecture with self-correction capabilities. The system successfully
        processes documents, performs hybrid retrieval, and generates citation-backed responses.''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 10))
    story.append(Paragraph('<b>10.2 Key Recommendations</b>', styles['SubsectionHeader']))

    recommendations = [
        '<b>Prioritize API Development:</b> Enable external integrations and unlock enterprise adoption.',
        '<b>Invest in Security:</b> Authentication and authorization are prerequisites for any production deployment.',
        '<b>Focus on Performance:</b> Current latency is acceptable for demos but needs significant improvement for production use.',
        '<b>Expand Document Support:</b> Office formats (Word, Excel, PowerPoint) are critical for enterprise adoption.',
        '<b>Implement Monitoring:</b> Observability is essential for maintaining and scaling the system.',
        '<b>Plan for Scale Early:</b> Architectural decisions made now will impact scalability; consider distributed architecture.',
    ]

    for rec in recommendations:
        story.append(Paragraph(f'• {rec}', styles['BulletText']))

    story.append(Spacer(1, 15))
    story.append(Paragraph('<b>10.3 Immediate Next Steps</b>', styles['SubsectionHeader']))

    next_steps = [
        '1. Finalize Phase 1 scope and create detailed sprint plans',
        '2. Set up development infrastructure and CI/CD pipeline',
        '3. Begin REST API development (target: 4 weeks)',
        '4. Initiate security assessment and authentication design',
        '5. Start documentation and knowledge transfer activities',
        '6. Schedule bi-weekly stakeholder demos for continuous feedback',
    ]

    for step in next_steps:
        story.append(Paragraph(step, styles['BulletText']))

    story.append(Spacer(1, 30))

    # Final signature block
    story.append(HRFlowable(width='100%', thickness=1, color=PRIMARY_BLUE))
    story.append(Spacer(1, 15))

    story.append(Paragraph(
        f'''<b>Document prepared by:</b> SPARKNET Development Team<br/>
        <b>Report Date:</b> {datetime.now().strftime('%B %d, %Y')}<br/>
        <b>Version:</b> 1.0<br/>
        <b>Classification:</b> Internal / Confidential''',
        styles['CustomBody']
    ))

    story.append(Spacer(1, 20))
    story.append(Paragraph(
        '<i>This document contains confidential information intended for stakeholder review. '
        'Please do not distribute without authorization.</i>',
        styles['Caption']
    ))

    # Build PDF
    doc.build(story)
    print(f"Report generated: {filename}")
    return filename


if __name__ == '__main__':
    generate_report()