Spaces:

MHamdan
/

SPARKNET

Sleeping

App Files Files Community

SPARKNET / docs /SPARKNET_Progress_Report.py

MHamdan

Initial commit: SPARKNET framework

d520909 12 days ago

raw

history blame contribute delete

63.9 kB

	#!/usr/bin/env python3
	"""
	SPARKNET Progress Report & Future Work PDF Generator
	Generates a comprehensive stakeholder presentation document.
	"""

	from reportlab.lib import colors
	from reportlab.lib.pagesizes import A4, landscape
	from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
	from reportlab.lib.units import inch, cm
	from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY, TA_RIGHT
	from reportlab.platypus import (
	SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle,
	PageBreak, Image, ListFlowable, ListItem, KeepTogether,
	Flowable, HRFlowable
	)
	from reportlab.graphics.shapes import Drawing, Rect, String, Line, Polygon
	from reportlab.graphics.charts.barcharts import VerticalBarChart
	from reportlab.graphics.charts.piecharts import Pie
	from reportlab.graphics import renderPDF
	from reportlab.pdfgen import canvas
	from datetime import datetime
	import os

	# Color Scheme - Professional Blue Theme
	PRIMARY_BLUE = colors.HexColor('#1e3a5f')
	SECONDARY_BLUE = colors.HexColor('#2d5a87')
	ACCENT_BLUE = colors.HexColor('#4a90d9')
	LIGHT_BLUE = colors.HexColor('#e8f4fc')
	SUCCESS_GREEN = colors.HexColor('#28a745')
	WARNING_ORANGE = colors.HexColor('#fd7e14')
	DANGER_RED = colors.HexColor('#dc3545')
	GRAY_DARK = colors.HexColor('#343a40')
	GRAY_LIGHT = colors.HexColor('#f8f9fa')
	WHITE = colors.white


	class DiagramFlowable(Flowable):
	"""Custom flowable for drawing architecture diagrams."""

	def __init__(self, width, height, diagram_type='architecture'):
	Flowable.__init__(self)
	self.width = width
	self.height = height
	self.diagram_type = diagram_type

	def draw(self):
	if self.diagram_type == 'architecture':
	self._draw_architecture()
	elif self.diagram_type == 'rag_pipeline':
	self._draw_rag_pipeline()
	elif self.diagram_type == 'document_pipeline':
	self._draw_document_pipeline()
	elif self.diagram_type == 'agent_interaction':
	self._draw_agent_interaction()
	elif self.diagram_type == 'data_flow':
	self._draw_data_flow()

	def _draw_box(self, x, y, w, h, text, fill_color, text_color=WHITE, font_size=9):
	"""Draw a rounded box with text."""
	self.canv.setFillColor(fill_color)
	self.canv.roundRect(x, y, w, h, 5, fill=1, stroke=0)
	self.canv.setFillColor(text_color)
	self.canv.setFont('Helvetica-Bold', font_size)
	# Center text
	text_width = self.canv.stringWidth(text, 'Helvetica-Bold', font_size)
	self.canv.drawString(x + (w - text_width) / 2, y + h/2 - 3, text)

	def _draw_arrow(self, x1, y1, x2, y2, color=GRAY_DARK):
	"""Draw an arrow from (x1,y1) to (x2,y2)."""
	self.canv.setStrokeColor(color)
	self.canv.setLineWidth(2)
	self.canv.line(x1, y1, x2, y2)
	# Arrow head
	import math
	angle = math.atan2(y2-y1, x2-x1)
	arrow_len = 8
	self.canv.line(x2, y2, x2 - arrow_len * math.cos(angle - 0.4), y2 - arrow_len * math.sin(angle - 0.4))
	self.canv.line(x2, y2, x2 - arrow_len * math.cos(angle + 0.4), y2 - arrow_len * math.sin(angle + 0.4))

	def _draw_architecture(self):
	"""Draw the high-level SPARKNET architecture."""
	# Title
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 12)
	self.canv.drawCentredString(self.width/2, self.height - 20, 'SPARKNET Architecture Overview')

	# User Layer
	self._draw_box(self.width/2 - 60, self.height - 70, 120, 35, 'User Interface', ACCENT_BLUE)

	# Demo Layer
	self.canv.setFillColor(LIGHT_BLUE)
	self.canv.roundRect(30, self.height - 160, self.width - 60, 70, 8, fill=1, stroke=0)
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 10)
	self.canv.drawString(40, self.height - 100, 'Streamlit Demo Application')

	# Demo pages
	pages = ['Live\nProcessing', 'Interactive\nRAG', 'Doc\nComparison', 'Evidence\nViewer', 'Doc\nViewer']
	page_width = (self.width - 100) / 5
	for i, page in enumerate(pages):
	x = 45 + i * page_width
	self._draw_box(x, self.height - 150, page_width - 10, 35, page.replace('\n', ' '), SECONDARY_BLUE, font_size=7)

	# Arrow from UI to Demo
	self._draw_arrow(self.width/2, self.height - 70, self.width/2, self.height - 90, ACCENT_BLUE)

	# Core Services Layer
	self.canv.setFillColor(LIGHT_BLUE)
	self.canv.roundRect(30, self.height - 280, self.width - 60, 100, 8, fill=1, stroke=0)
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 10)
	self.canv.drawString(40, self.height - 190, 'Core Services')

	# Core boxes
	self._draw_box(50, self.height - 230, 100, 30, 'Document Intel', PRIMARY_BLUE, font_size=8)
	self._draw_box(170, self.height - 230, 100, 30, 'Multi-Agent RAG', PRIMARY_BLUE, font_size=8)
	self._draw_box(290, self.height - 230, 100, 30, 'Vector Store', PRIMARY_BLUE, font_size=8)
	self._draw_box(410, self.height - 230, 80, 30, 'LLM Layer', PRIMARY_BLUE, font_size=8)

	# Sub-components
	self._draw_box(50, self.height - 270, 100, 30, 'OCR + Layout', SECONDARY_BLUE, font_size=7)
	self._draw_box(170, self.height - 270, 100, 30, '5 Agents', SECONDARY_BLUE, font_size=7)
	self._draw_box(290, self.height - 270, 100, 30, 'ChromaDB', SECONDARY_BLUE, font_size=7)
	self._draw_box(410, self.height - 270, 80, 30, 'Ollama', SECONDARY_BLUE, font_size=7)

	# Arrow from Demo to Core
	self._draw_arrow(self.width/2, self.height - 160, self.width/2, self.height - 180, ACCENT_BLUE)

	# Storage Layer
	self.canv.setFillColor(GRAY_LIGHT)
	self.canv.roundRect(30, self.height - 340, self.width - 60, 45, 8, fill=1, stroke=0)
	self.canv.setFillColor(GRAY_DARK)
	self.canv.setFont('Helvetica-Bold', 10)
	self.canv.drawString(40, self.height - 310, 'Persistent Storage')

	self._draw_box(150, self.height - 335, 80, 25, 'Embeddings', GRAY_DARK, font_size=7)
	self._draw_box(250, self.height - 335, 80, 25, 'Documents', GRAY_DARK, font_size=7)
	self._draw_box(350, self.height - 335, 80, 25, 'Cache', GRAY_DARK, font_size=7)

	# Arrow
	self._draw_arrow(self.width/2, self.height - 280, self.width/2, self.height - 295, GRAY_DARK)

	def _draw_rag_pipeline(self):
	"""Draw the Multi-Agent RAG Pipeline."""
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 12)
	self.canv.drawCentredString(self.width/2, self.height - 20, 'Multi-Agent RAG Pipeline')

	# Query input
	self._draw_box(20, self.height - 70, 80, 30, 'User Query', ACCENT_BLUE, font_size=8)

	# Agents in sequence
	agents = [
	('QueryPlanner', PRIMARY_BLUE, 'Intent Classification\nQuery Decomposition'),
	('Retriever', SECONDARY_BLUE, 'Hybrid Search\nDense + Sparse'),
	('Reranker', SECONDARY_BLUE, 'Cross-Encoder\nMMR Diversity'),
	('Synthesizer', PRIMARY_BLUE, 'Answer Generation\nCitation Tracking'),
	('Critic', WARNING_ORANGE, 'Hallucination Check\nValidation'),
	]

	x_start = 120
	box_width = 80
	spacing = 10

	for i, (name, color, desc) in enumerate(agents):
	x = x_start + i * (box_width + spacing)
	self._draw_box(x, self.height - 70, box_width, 30, name, color, font_size=7)
	# Description below
	self.canv.setFillColor(GRAY_DARK)
	self.canv.setFont('Helvetica', 6)
	lines = desc.split('\n')
	for j, line in enumerate(lines):
	self.canv.drawCentredString(x + box_width/2, self.height - 85 - j*8, line)

	# Arrow to next
	if i < len(agents) - 1:
	self._draw_arrow(x + box_width, self.height - 55, x + box_width + spacing, self.height - 55, GRAY_DARK)

	# Arrow from query to first agent
	self._draw_arrow(100, self.height - 55, 120, self.height - 55, ACCENT_BLUE)

	# Revision loop
	self.canv.setStrokeColor(WARNING_ORANGE)
	self.canv.setLineWidth(1.5)
	self.canv.setDash(3, 3)
	# Draw curved line for revision
	critic_x = x_start + 4 * (box_width + spacing) + box_width
	synth_x = x_start + 3 * (box_width + spacing)
	self.canv.line(critic_x - 40, self.height - 100, synth_x + 40, self.height - 100)
	self.canv.setDash()

	self.canv.setFillColor(WARNING_ORANGE)
	self.canv.setFont('Helvetica-Oblique', 7)
	self.canv.drawCentredString((critic_x + synth_x)/2, self.height - 115, 'Revision Loop (if validation fails)')

	# Final output
	self._draw_box(critic_x + 20, self.height - 70, 80, 30, 'Response', SUCCESS_GREEN, font_size=8)
	self._draw_arrow(critic_x, self.height - 55, critic_x + 20, self.height - 55, SUCCESS_GREEN)

	# State tracking bar
	self.canv.setFillColor(LIGHT_BLUE)
	self.canv.roundRect(20, self.height - 160, self.width - 40, 35, 5, fill=1, stroke=0)
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 8)
	self.canv.drawString(30, self.height - 145, 'RAGState: Query → Plan → Retrieved Chunks → Reranked → Answer → Validation → Citations')

	def _draw_document_pipeline(self):
	"""Draw Document Processing Pipeline."""
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 12)
	self.canv.drawCentredString(self.width/2, self.height - 20, 'Document Processing Pipeline')

	stages = [
	('Input', 'PDF/Image\nUpload', ACCENT_BLUE),
	('OCR', 'PaddleOCR\nTesseract', PRIMARY_BLUE),
	('Layout', 'Region\nDetection', PRIMARY_BLUE),
	('Reading\nOrder', 'Sequence\nReconstruction', SECONDARY_BLUE),
	('Chunking', 'Semantic\nSplitting', SECONDARY_BLUE),
	('Indexing', 'ChromaDB\nEmbedding', SUCCESS_GREEN),
	]

	box_width = 70
	box_height = 45
	spacing = 15
	total_width = len(stages) * box_width + (len(stages) - 1) * spacing
	x_start = (self.width - total_width) / 2
	y_pos = self.height - 90

	for i, (name, desc, color) in enumerate(stages):
	x = x_start + i * (box_width + spacing)
	# Main box
	self._draw_box(x, y_pos, box_width, box_height, name.replace('\n', ' '), color, font_size=8)
	# Description
	self.canv.setFillColor(GRAY_DARK)
	self.canv.setFont('Helvetica', 6)
	lines = desc.split('\n')
	for j, line in enumerate(lines):
	self.canv.drawCentredString(x + box_width/2, y_pos - 15 - j*8, line)

	# Arrow
	if i < len(stages) - 1:
	self._draw_arrow(x + box_width, y_pos + box_height/2, x + box_width + spacing, y_pos + box_height/2)

	# Output description
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 9)
	self.canv.drawCentredString(self.width/2, self.height - 160, 'Output: ProcessedDocument with chunks, OCR regions, layout data, bounding boxes')

	def _draw_agent_interaction(self):
	"""Draw Agent Interaction Diagram."""
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 12)
	self.canv.drawCentredString(self.width/2, self.height - 20, 'Agent Interaction & Data Flow')

	# Central orchestrator
	center_x, center_y = self.width/2, self.height/2 - 20
	self._draw_box(center_x - 50, center_y - 20, 100, 40, 'Orchestrator', PRIMARY_BLUE, font_size=9)

	# Surrounding agents
	import math
	agents = [
	('QueryPlanner', -120, 60),
	('Retriever', 0, 90),
	('Reranker', 120, 60),
	('Synthesizer', 120, -60),
	('Critic', 0, -90),
	]

	for name, dx, dy in agents:
	x = center_x + dx - 45
	y = center_y + dy - 15
	self._draw_box(x, y, 90, 30, name, SECONDARY_BLUE, font_size=8)
	# Arrow to/from orchestrator
	if dy > 0:
	self._draw_arrow(center_x, center_y + 20, center_x + dx*0.3, center_y + dy - 15, ACCENT_BLUE)
	else:
	self._draw_arrow(center_x + dx*0.3, center_y + dy + 15, center_x, center_y - 20, ACCENT_BLUE)

	# External connections
	# Vector Store
	self._draw_box(30, center_y - 15, 70, 30, 'ChromaDB', SUCCESS_GREEN, font_size=8)
	self._draw_arrow(100, center_y, center_x - 50, center_y, SUCCESS_GREEN)

	# LLM
	self._draw_box(self.width - 100, center_y - 15, 70, 30, 'Ollama LLM', WARNING_ORANGE, font_size=8)
	self._draw_arrow(self.width - 100, center_y, center_x + 50, center_y, WARNING_ORANGE)

	def _draw_data_flow(self):
	"""Draw Data Flow Diagram."""
	self.canv.setFillColor(PRIMARY_BLUE)
	self.canv.setFont('Helvetica-Bold', 12)
	self.canv.drawCentredString(self.width/2, self.height - 20, 'End-to-End Data Flow')

	# Vertical flow
	items = [
	('Document Upload', ACCENT_BLUE, 'PDF, Images, Text files'),
	('Document Processor', PRIMARY_BLUE, 'OCR → Layout → Chunking'),
	('State Manager', SECONDARY_BLUE, 'ProcessedDocument storage'),
	('Embedder', SECONDARY_BLUE, 'mxbai-embed-large (1024d)'),
	('ChromaDB', SUCCESS_GREEN, 'Vector indexing & storage'),
	('RAG Query', WARNING_ORANGE, 'User question processing'),
	('Multi-Agent Pipeline', PRIMARY_BLUE, '5-agent collaboration'),
	('Response', SUCCESS_GREEN, 'Answer with citations'),
	]

	box_height = 28
	spacing = 8
	total_height = len(items) * box_height + (len(items) - 1) * spacing
	y_start = self.height - 50
	box_width = 160
	x_center = self.width / 2 - box_width / 2

	for i, (name, color, desc) in enumerate(items):
	y = y_start - i * (box_height + spacing)
	self._draw_box(x_center, y - box_height, box_width, box_height, name, color, font_size=8)
	# Description on right
	self.canv.setFillColor(GRAY_DARK)
	self.canv.setFont('Helvetica', 7)
	self.canv.drawString(x_center + box_width + 15, y - box_height/2 - 3, desc)

	# Arrow
	if i < len(items) - 1:
	self._draw_arrow(x_center + box_width/2, y - box_height, x_center + box_width/2, y - box_height - spacing + 2)


	def create_styles():
	"""Create custom paragraph styles."""
	styles = getSampleStyleSheet()

	# Title style
	styles.add(ParagraphStyle(
	name='MainTitle',
	parent=styles['Title'],
	fontSize=28,
	textColor=PRIMARY_BLUE,
	spaceAfter=30,
	alignment=TA_CENTER,
	fontName='Helvetica-Bold'
	))

	# Subtitle
	styles.add(ParagraphStyle(
	name='Subtitle',
	parent=styles['Normal'],
	fontSize=16,
	textColor=SECONDARY_BLUE,
	spaceAfter=20,
	alignment=TA_CENTER,
	fontName='Helvetica'
	))

	# Section Header
	styles.add(ParagraphStyle(
	name='SectionHeader',
	parent=styles['Heading1'],
	fontSize=18,
	textColor=PRIMARY_BLUE,
	spaceBefore=25,
	spaceAfter=15,
	fontName='Helvetica-Bold',
	borderColor=ACCENT_BLUE,
	borderWidth=2,
	borderPadding=5,
	))

	# Subsection Header
	styles.add(ParagraphStyle(
	name='SubsectionHeader',
	parent=styles['Heading2'],
	fontSize=14,
	textColor=SECONDARY_BLUE,
	spaceBefore=15,
	spaceAfter=10,
	fontName='Helvetica-Bold'
	))

	# Body text
	styles.add(ParagraphStyle(
	name='CustomBody',
	parent=styles['Normal'],
	fontSize=10,
	textColor=GRAY_DARK,
	spaceAfter=8,
	alignment=TA_JUSTIFY,
	leading=14
	))

	# Bullet style
	styles.add(ParagraphStyle(
	name='BulletText',
	parent=styles['Normal'],
	fontSize=10,
	textColor=GRAY_DARK,
	leftIndent=20,
	spaceAfter=5,
	leading=13
	))

	# Caption
	styles.add(ParagraphStyle(
	name='Caption',
	parent=styles['Normal'],
	fontSize=9,
	textColor=GRAY_DARK,
	alignment=TA_CENTER,
	spaceAfter=15,
	fontName='Helvetica-Oblique'
	))

	# Highlight box text
	styles.add(ParagraphStyle(
	name='HighlightText',
	parent=styles['Normal'],
	fontSize=10,
	textColor=PRIMARY_BLUE,
	spaceAfter=5,
	fontName='Helvetica-Bold'
	))

	return styles


	def create_highlight_box(text, styles, color=LIGHT_BLUE):
	"""Create a highlighted text box."""
	data = [[Paragraph(text, styles['HighlightText'])]]
	table = Table(data, colWidths=[450])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, -1), color),
	('BOX', (0, 0), (-1, -1), 1, ACCENT_BLUE),
	('PADDING', (0, 0), (-1, -1), 12),
	('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
	]))
	return table


	def create_status_table(items, styles):
	"""Create a status table with colored indicators."""
	data = [['Component', 'Status', 'Completion']]
	for item, status, completion in items:
	if status == 'Complete':
	status_color = SUCCESS_GREEN
	elif status == 'In Progress':
	status_color = WARNING_ORANGE
	else:
	status_color = DANGER_RED
	data.append([item, status, completion])

	table = Table(data, colWidths=[250, 100, 100])
	table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 10),
	('ALIGN', (1, 0), (-1, -1), 'CENTER'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 8),
	]))
	return table


	def create_metrics_table(metrics, styles):
	"""Create a metrics display table."""
	data = []
	for metric, value, change in metrics:
	data.append([metric, value, change])

	table = Table(data, colWidths=[200, 150, 100])
	table.setStyle(TableStyle([
	('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 11),
	('TEXTCOLOR', (1, 0), (1, -1), PRIMARY_BLUE),
	('ALIGN', (1, 0), (-1, -1), 'CENTER'),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('PADDING', (0, 0), (-1, -1), 10),
	('ROWBACKGROUNDS', (0, 0), (-1, -1), [LIGHT_BLUE, WHITE]),
	]))
	return table


	def generate_report():
	"""Generate the complete SPARKNET progress report PDF."""

	filename = '/home/mhamdan/SPARKNET/docs/SPARKNET_Progress_Report.pdf'
	os.makedirs(os.path.dirname(filename), exist_ok=True)

	doc = SimpleDocTemplate(
	filename,
	pagesize=A4,
	rightMargin=50,
	leftMargin=50,
	topMargin=60,
	bottomMargin=60
	)

	styles = create_styles()
	story = []

	# ========== TITLE PAGE ==========
	story.append(Spacer(1, 100))
	story.append(Paragraph('SPARKNET', styles['MainTitle']))
	story.append(Paragraph('Multi-Agentic Document Intelligence Framework', styles['Subtitle']))
	story.append(Spacer(1, 30))
	story.append(Paragraph('Progress Report & Future Roadmap', styles['Subtitle']))
	story.append(Spacer(1, 50))

	# Version info box
	version_data = [
	['Version', '1.0.0-beta'],
	['Report Date', datetime.now().strftime('%B %d, %Y')],
	['Document Type', 'Stakeholder Progress Report'],
	['Classification', 'Internal / Confidential'],
	]
	version_table = Table(version_data, colWidths=[150, 200])
	version_table.setStyle(TableStyle([
	('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 10),
	('TEXTCOLOR', (0, 0), (-1, -1), GRAY_DARK),
	('ALIGN', (0, 0), (-1, -1), 'CENTER'),
	('GRID', (0, 0), (-1, -1), 0.5, ACCENT_BLUE),
	('PADDING', (0, 0), (-1, -1), 8),
	('BACKGROUND', (0, 0), (-1, -1), LIGHT_BLUE),
	]))
	story.append(version_table)

	story.append(PageBreak())

	# ========== TABLE OF CONTENTS ==========
	story.append(Paragraph('Table of Contents', styles['SectionHeader']))
	story.append(Spacer(1, 20))

	toc_items = [
	('1. Executive Summary', '3'),
	('2. Project Overview', '4'),
	('3. Technical Architecture', '5'),
	('4. Component Deep Dive', '8'),
	('5. Current Progress & Achievements', '12'),
	('6. Gap Analysis', '14'),
	('7. Future Work & Roadmap', '17'),
	('8. Risk Assessment', '20'),
	('9. Resource Requirements', '21'),
	('10. Conclusion & Recommendations', '22'),
	]

	toc_data = [[Paragraph(f'<b>{item}</b>', styles['CustomBody']), page] for item, page in toc_items]
	toc_table = Table(toc_data, colWidths=[400, 50])
	toc_table.setStyle(TableStyle([
	('FONTSIZE', (0, 0), (-1, -1), 11),
	('ALIGN', (1, 0), (1, -1), 'RIGHT'),
	('BOTTOMPADDING', (0, 0), (-1, -1), 8),
	('LINEBELOW', (0, 0), (-1, -2), 0.5, colors.lightgrey),
	]))
	story.append(toc_table)

	story.append(PageBreak())

	# ========== 1. EXECUTIVE SUMMARY ==========
	story.append(Paragraph('1. Executive Summary', styles['SectionHeader']))

	story.append(Paragraph(
	'''SPARKNET represents a next-generation document intelligence platform that combines
	advanced OCR capabilities, sophisticated layout analysis, and a state-of-the-art
	Multi-Agent Retrieval-Augmented Generation (RAG) system. This report provides a
	comprehensive overview of the project's current state, technical achievements,
	identified gaps, and the strategic roadmap for future development.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>Key Highlights</b>', styles['SubsectionHeader']))

	highlights = [
	'<b>Multi-Agent RAG Architecture:</b> Successfully implemented a 5-agent pipeline (QueryPlanner, Retriever, Reranker, Synthesizer, Critic) with self-correction capabilities.',
	'<b>Document Processing Pipeline:</b> Complete end-to-end document processing with OCR, layout detection, and semantic chunking.',
	'<b>Production-Ready Demo:</b> Fully functional Streamlit application with 5 interactive modules for document intelligence workflows.',
	'<b>Hallucination Detection:</b> Built-in validation and criticism system to ensure factual accuracy of generated responses.',
	'<b>Unified State Management:</b> Cross-module communication enabling seamless user experience across all application components.',
	]

	for h in highlights:
	story.append(Paragraph(f'• {h}', styles['BulletText']))

	story.append(Spacer(1, 20))

	# Key Metrics
	story.append(Paragraph('<b>Current System Metrics</b>', styles['SubsectionHeader']))
	metrics = [
	('RAG Pipeline Agents', '5 Specialized Agents', '✓ Complete'),
	('Document Formats Supported', 'PDF, Images', '2 formats'),
	('Vector Dimensions', '1024 (mxbai-embed-large)', 'Production'),
	('Demo Application Pages', '5 Interactive Modules', '✓ Complete'),
	('LLM Integration', 'Ollama (Local)', 'Self-hosted'),
	]
	story.append(create_metrics_table(metrics, styles))

	story.append(PageBreak())

	# ========== 2. PROJECT OVERVIEW ==========
	story.append(Paragraph('2. Project Overview', styles['SectionHeader']))

	story.append(Paragraph('<b>2.1 Vision & Objectives</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''SPARKNET aims to revolutionize document intelligence by providing an integrated
	platform that can understand, process, and intelligently query complex documents.
	The system leverages cutting-edge AI techniques including multi-agent collaboration,
	hybrid retrieval, and sophisticated answer synthesis with built-in validation.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>Core Objectives:</b>', styles['CustomBody']))

	objectives = [
	'<b>Intelligent Document Understanding:</b> Extract and structure information from diverse document formats with high accuracy.',
	'<b>Conversational Intelligence:</b> Enable natural language querying over document collections with citation-backed responses.',
	'<b>Reliability & Trust:</b> Implement hallucination detection and self-correction to ensure factual accuracy.',
	'<b>Scalability:</b> Design for enterprise-scale document processing and retrieval workloads.',
	'<b>Extensibility:</b> Modular architecture allowing easy integration of new capabilities and models.',
	]

	for obj in objectives:
	story.append(Paragraph(f'• {obj}', styles['BulletText']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>2.2 Target Use Cases</b>', styles['SubsectionHeader']))

	use_cases = [
	['Use Case', 'Description', 'Status'],
	['Legal Document Analysis', 'Contract review, clause extraction, compliance checking', 'Supported'],
	['Research Paper Synthesis', 'Multi-paper querying, citation tracking, summary generation', 'Supported'],
	['Technical Documentation', 'API docs, manuals, knowledge base querying', 'Supported'],
	['Financial Reports', 'Annual reports, SEC filings, financial data extraction', 'Planned'],
	['Medical Records', 'Clinical notes, diagnostic reports (HIPAA compliance needed)', 'Future'],
	]

	uc_table = Table(use_cases, colWidths=[130, 230, 90])
	uc_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 6),
	('ALIGN', (2, 0), (2, -1), 'CENTER'),
	]))
	story.append(uc_table)

	story.append(PageBreak())

	# ========== 3. TECHNICAL ARCHITECTURE ==========
	story.append(Paragraph('3. Technical Architecture', styles['SectionHeader']))

	story.append(Paragraph('<b>3.1 High-Level Architecture</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''SPARKNET follows a layered microservices-inspired architecture with clear separation
	of concerns. The system is organized into presentation, service, and persistence layers,
	with a central orchestration mechanism coordinating multi-agent workflows.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# Architecture Diagram
	arch_diagram = DiagramFlowable(500, 350, 'architecture')
	story.append(arch_diagram)
	story.append(Paragraph('Figure 1: SPARKNET High-Level Architecture', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>3.2 Multi-Agent RAG Pipeline</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The heart of SPARKNET is its Multi-Agent RAG system, which orchestrates five
	specialized agents in a sophisticated pipeline with self-correction capabilities.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# RAG Pipeline Diagram
	rag_diagram = DiagramFlowable(500, 180, 'rag_pipeline')
	story.append(rag_diagram)
	story.append(Paragraph('Figure 2: Multi-Agent RAG Pipeline with Revision Loop', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>3.3 Document Processing Pipeline</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''Documents undergo a multi-stage processing pipeline that extracts text, identifies
	layout structure, establishes reading order, and creates semantically coherent chunks
	optimized for retrieval.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# Document Pipeline Diagram
	doc_diagram = DiagramFlowable(500, 180, 'document_pipeline')
	story.append(doc_diagram)
	story.append(Paragraph('Figure 3: Document Processing Pipeline', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>3.4 Agent Interaction Model</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The orchestrator coordinates all agents, managing state transitions and ensuring
	proper data flow between components. External services (Vector Store, LLM) are
	accessed through well-defined interfaces.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# Agent Interaction Diagram
	agent_diagram = DiagramFlowable(500, 250, 'agent_interaction')
	story.append(agent_diagram)
	story.append(Paragraph('Figure 4: Agent Interaction Model', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>3.5 Data Flow Architecture</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The end-to-end data flow illustrates how documents are processed from upload
	through indexing, and how queries are handled through the multi-agent pipeline
	to produce validated, citation-backed responses.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# Data Flow Diagram
	flow_diagram = DiagramFlowable(500, 320, 'data_flow')
	story.append(flow_diagram)
	story.append(Paragraph('Figure 5: End-to-End Data Flow', styles['Caption']))

	story.append(PageBreak())

	# ========== 4. COMPONENT DEEP DIVE ==========
	story.append(Paragraph('4. Component Deep Dive', styles['SectionHeader']))

	story.append(Paragraph('<b>4.1 Query Planning Agent</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The QueryPlannerAgent is responsible for understanding user intent, classifying
	query types, and decomposing complex queries into manageable sub-queries.''',
	styles['CustomBody']
	))

	# Query types table
	query_types = [
	['Intent Type', 'Description', 'Example'],
	['FACTOID', 'Simple fact lookup', '"What is the revenue for Q4?"'],
	['COMPARISON', 'Multi-entity comparison', '"Compare product A vs B features"'],
	['AGGREGATION', 'Cross-document summary', '"Summarize all quarterly reports"'],
	['CAUSAL', 'Why/how explanations', '"Why did revenue decline?"'],
	['PROCEDURAL', 'Step-by-step instructions', '"How to configure the system?"'],
	['MULTI_HOP', 'Multi-step reasoning', '"Which supplier has the lowest cost for product X?"'],
	]

	qt_table = Table(query_types, colWidths=[90, 180, 180])
	qt_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	]))
	story.append(qt_table)
	story.append(Paragraph('Table 1: Supported Query Intent Types', styles['Caption']))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>4.2 Hybrid Retrieval System</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The RetrieverAgent implements a sophisticated hybrid search combining dense
	semantic retrieval with sparse keyword matching, using Reciprocal Rank Fusion (RRF)
	to merge results optimally.''',
	styles['CustomBody']
	))

	retrieval_features = [
	'<b>Dense Retrieval:</b> Embedding-based semantic search using mxbai-embed-large (1024 dimensions)',
	'<b>Sparse Retrieval:</b> BM25-style keyword matching for precise term matching',
	'<b>RRF Fusion:</b> Combines rankings using formula: RRF = Σ(1 / (k + rank))',
	'<b>Intent-Adaptive Weights:</b> Adjusts dense/sparse balance based on query type (e.g., 80/20 for definitions, 50/50 for comparisons)',
	]

	for feat in retrieval_features:
	story.append(Paragraph(f'• {feat}', styles['BulletText']))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>4.3 Cross-Encoder Reranking</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The RerankerAgent applies LLM-based cross-encoder scoring to refine retrieval
	results, implementing deduplication and Maximal Marginal Relevance (MMR) for
	diversity promotion.''',
	styles['CustomBody']
	))

	reranker_config = [
	['Parameter', 'Value', 'Purpose'],
	['top_k', '5', 'Final result count'],
	['min_relevance_score', '0.3', 'Quality threshold'],
	['dedup_threshold', '0.9', 'Similarity for duplicate detection'],
	['MMR lambda', '0.7', 'Relevance vs diversity balance'],
	]

	rr_table = Table(reranker_config, colWidths=[140, 80, 230])
	rr_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('PADDING', (0, 0), (-1, -1), 6),
	]))
	story.append(rr_table)
	story.append(Paragraph('Table 2: Reranker Configuration', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>4.4 Answer Synthesis</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The SynthesizerAgent generates comprehensive answers with automatic citation
	tracking, supporting multiple output formats and implementing intelligent abstention
	when evidence is insufficient.''',
	styles['CustomBody']
	))

	story.append(Paragraph('<b>Supported Answer Formats:</b>', styles['CustomBody']))
	formats = ['PROSE - Flowing paragraph narrative', 'BULLET_POINTS - Enumerated key points',
	'TABLE - Comparative tabular format', 'STEP_BY_STEP - Procedural instructions']
	for fmt in formats:
	story.append(Paragraph(f'• {fmt}', styles['BulletText']))

	story.append(Paragraph('<b>Confidence Calculation:</b>', styles['CustomBody']))
	story.append(Paragraph('confidence = 0.5 × source_relevance + 0.3 × source_count_factor + 0.2 × consistency', styles['BulletText']))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>4.5 Validation & Hallucination Detection</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The CriticAgent performs comprehensive validation including hallucination detection,
	citation verification, and factual consistency checking. It can trigger revision
	cycles when issues are detected.''',
	styles['CustomBody']
	))

	issue_types = [
	['Issue Type', 'Description', 'Severity'],
	['HALLUCINATION', 'Information not supported by sources', 'Critical'],
	['UNSUPPORTED_CLAIM', 'Statement without citation', 'High'],
	['INCORRECT_CITATION', 'Citation references wrong source', 'High'],
	['CONTRADICTION', 'Internal inconsistency in answer', 'Medium'],
	['INCOMPLETE', 'Missing important information', 'Medium'],
	['FACTUAL_ERROR', 'Verifiable factual mistake', 'Critical'],
	]

	it_table = Table(issue_types, colWidths=[130, 230, 90])
	it_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	]))
	story.append(it_table)
	story.append(Paragraph('Table 3: Validation Issue Types', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>4.6 Document Processing Components</b>', styles['SubsectionHeader']))

	story.append(Paragraph('<b>OCR Engines:</b>', styles['CustomBody']))
	ocr_comparison = [
	['Feature', 'PaddleOCR', 'Tesseract'],
	['GPU Acceleration', '✓ Yes', '✗ No'],
	['Multi-language', '✓ 80+ languages', '✓ 100+ languages'],
	['Accuracy (Clean)', '~95%', '~90%'],
	['Accuracy (Complex)', '~85%', '~75%'],
	['Speed', 'Fast', 'Moderate'],
	['Confidence Scores', '✓ Per-region', '✓ Per-word'],
	]

	ocr_table = Table(ocr_comparison, colWidths=[130, 160, 160])
	ocr_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('PADDING', (0, 0), (-1, -1), 5),
	]))
	story.append(ocr_table)
	story.append(Paragraph('Table 4: OCR Engine Comparison', styles['Caption']))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>Layout Detection:</b>', styles['CustomBody']))
	layout_types = ['TEXT, TITLE, HEADING, PARAGRAPH - Text regions',
	'TABLE, FIGURE, CHART - Visual elements',
	'CAPTION, FOOTNOTE - Supplementary text',
	'HEADER, FOOTER - Page elements',
	'FORMULA - Mathematical expressions']
	for lt in layout_types:
	story.append(Paragraph(f'• {lt}', styles['BulletText']))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>Chunking Configuration:</b>', styles['CustomBody']))
	chunk_config = [
	['Parameter', 'Default', 'Description'],
	['max_chunk_chars', '1000', 'Maximum characters per chunk'],
	['min_chunk_chars', '50', 'Minimum viable chunk size'],
	['overlap_chars', '100', 'Overlap between consecutive chunks'],
	['Strategy', 'Semantic', 'Respects layout boundaries'],
	]

	cc_table = Table(chunk_config, colWidths=[120, 80, 250])
	cc_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('PADDING', (0, 0), (-1, -1), 5),
	]))
	story.append(cc_table)
	story.append(Paragraph('Table 5: Chunking Configuration', styles['Caption']))

	story.append(PageBreak())

	# ========== 5. CURRENT PROGRESS ==========
	story.append(Paragraph('5. Current Progress & Achievements', styles['SectionHeader']))

	story.append(Paragraph('<b>5.1 Development Milestones</b>', styles['SubsectionHeader']))

	milestones = [
	['Milestone', 'Status', 'Completion'],
	['Core RAG Pipeline', 'Complete', '100%'],
	['5-Agent Architecture', 'Complete', '100%'],
	['Document Processing Pipeline', 'Complete', '100%'],
	['ChromaDB Integration', 'Complete', '100%'],
	['Ollama LLM Integration', 'Complete', '100%'],
	['Streamlit Demo Application', 'Complete', '100%'],
	['State Management System', 'Complete', '100%'],
	['Hallucination Detection', 'Complete', '100%'],
	['PDF Processing', 'Complete', '100%'],
	['Self-Correction Loop', 'Complete', '100%'],
	]

	ms_table = Table(milestones, colWidths=[220, 120, 110])
	ms_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 6),
	('ALIGN', (1, 0), (-1, -1), 'CENTER'),
	]))
	story.append(ms_table)
	story.append(Paragraph('Table 6: Development Milestones', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>5.2 Demo Application Features</b>', styles['SubsectionHeader']))

	demo_features = [
	['Page', 'Features', 'Status'],
	['Live Processing', 'Real-time document processing, progress tracking, auto-indexing', '✓ Complete'],
	['Interactive RAG', 'Query interface, document filtering, chunk preview, citations', '✓ Complete'],
	['Document Comparison', 'Semantic similarity, structure analysis, content diff', '✓ Complete'],
	['Evidence Viewer', 'Confidence coloring, bounding boxes, OCR regions, export', '✓ Complete'],
	['Document Viewer', 'Multi-tab view, chunk display, layout visualization', '✓ Complete'],
	]

	df_table = Table(demo_features, colWidths=[110, 270, 70])
	df_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	('ALIGN', (2, 0), (2, -1), 'CENTER'),
	]))
	story.append(df_table)
	story.append(Paragraph('Table 7: Demo Application Features', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>5.3 Technical Achievements</b>', styles['SubsectionHeader']))

	achievements = [
	'<b>Hybrid Retrieval:</b> Successfully combined dense and sparse retrieval with RRF fusion, achieving better recall than either method alone.',
	'<b>Self-Correction:</b> Implemented revision loop allowing the system to automatically fix issues detected by the Critic agent.',
	'<b>Citation Tracking:</b> Automatic citation generation with [N] notation linking answers to source documents.',
	'<b>Confidence Scoring:</b> Multi-factor confidence calculation providing transparency into answer reliability.',
	'<b>Streaming Support:</b> Real-time response streaming for improved user experience during long generations.',
	'<b>Cross-Module Communication:</b> Unified state manager enabling seamless navigation between application modules.',
	]

	for ach in achievements:
	story.append(Paragraph(f'• {ach}', styles['BulletText']))

	story.append(PageBreak())

	# ========== 6. GAP ANALYSIS ==========
	story.append(Paragraph('6. Gap Analysis', styles['SectionHeader']))

	story.append(Paragraph(
	'''This section identifies current limitations and gaps in the SPARKNET system
	that represent opportunities for improvement and future development.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>6.1 Functional Gaps</b>', styles['SubsectionHeader']))

	functional_gaps = [
	['Gap ID', 'Category', 'Description', 'Impact', 'Priority'],
	['FG-001', 'Document Support', 'Limited to PDF and images; no Word, Excel, PowerPoint support', 'High', 'P1'],
	['FG-002', 'Table Extraction', 'Table structure not preserved during chunking', 'High', 'P1'],
	['FG-003', 'Multi-modal', 'No image/chart understanding within documents', 'Medium', 'P2'],
	['FG-004', 'Languages', 'Primarily English; limited multi-language support', 'Medium', 'P2'],
	['FG-005', 'Batch Processing', 'No bulk document upload/processing capability', 'Medium', 'P2'],
	['FG-006', 'Document Updates', 'No incremental update; full reprocessing required', 'Medium', 'P2'],
	['FG-007', 'User Feedback', 'No mechanism to learn from user corrections', 'Low', 'P3'],
	]

	fg_table = Table(functional_gaps, colWidths=[50, 85, 200, 55, 55])
	fg_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), DANGER_RED),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	('ALIGN', (0, 0), (0, -1), 'CENTER'),
	('ALIGN', (3, 0), (-1, -1), 'CENTER'),
	]))
	story.append(fg_table)
	story.append(Paragraph('Table 8: Functional Gaps', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>6.2 Technical Gaps</b>', styles['SubsectionHeader']))

	technical_gaps = [
	['Gap ID', 'Category', 'Description', 'Impact', 'Priority'],
	['TG-001', 'Scalability', 'Single-node architecture; no distributed processing', 'High', 'P1'],
	['TG-002', 'Authentication', 'No user authentication or access control', 'High', 'P1'],
	['TG-003', 'API', 'No REST API for external integration', 'High', 'P1'],
	['TG-004', 'Caching', 'Limited query result caching; redundant LLM calls', 'Medium', 'P2'],
	['TG-005', 'Monitoring', 'Basic logging only; no metrics/alerting system', 'Medium', 'P2'],
	['TG-006', 'Testing', 'Limited test coverage; no integration tests', 'Medium', 'P2'],
	['TG-007', 'Cloud Deploy', 'Not containerized; no Kubernetes manifests', 'Medium', 'P2'],
	['TG-008', 'GPU Sharing', 'Single GPU utilization; no multi-GPU support', 'Low', 'P3'],
	]

	tg_table = Table(technical_gaps, colWidths=[50, 80, 205, 55, 55])
	tg_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	('ALIGN', (0, 0), (0, -1), 'CENTER'),
	('ALIGN', (3, 0), (-1, -1), 'CENTER'),
	]))
	story.append(tg_table)
	story.append(Paragraph('Table 9: Technical Gaps', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>6.3 Performance Gaps</b>', styles['SubsectionHeader']))

	perf_gaps = [
	['Gap ID', 'Metric', 'Current', 'Target', 'Gap'],
	['PG-001', 'Query Latency (simple)', '3-5 seconds', '<2 seconds', '~2x improvement needed'],
	['PG-002', 'Query Latency (complex)', '10-20 seconds', '<5 seconds', '~3x improvement needed'],
	['PG-003', 'Document Processing', '30-60 sec/page', '<10 sec/page', '~4x improvement needed'],
	['PG-004', 'Concurrent Users', '1-5', '50+', 'Major scaling required'],
	['PG-005', 'Index Size', '10K chunks', '1M+ chunks', 'Architecture redesign'],
	['PG-006', 'Accuracy (hallucination)', '~85%', '>95%', '~10% improvement'],
	]

	pg_table = Table(perf_gaps, colWidths=[50, 120, 90, 90, 100])
	pg_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	('ALIGN', (0, 0), (-1, -1), 'CENTER'),
	]))
	story.append(pg_table)
	story.append(Paragraph('Table 10: Performance Gaps', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>6.4 Security & Compliance Gaps</b>', styles['SubsectionHeader']))

	security_gaps = [
	'<b>No Authentication:</b> Currently no user login or session management',
	'<b>No Authorization:</b> Missing role-based access control (RBAC) for documents',
	'<b>Data Encryption:</b> Documents and embeddings stored unencrypted at rest',
	'<b>Audit Logging:</b> No comprehensive audit trail for compliance requirements',
	'<b>PII Detection:</b> No automatic detection/redaction of personally identifiable information',
	'<b>GDPR/HIPAA:</b> Not compliant with major data protection regulations',
	]

	for sg in security_gaps:
	story.append(Paragraph(f'• {sg}', styles['BulletText']))

	story.append(PageBreak())

	# ========== 7. FUTURE WORK & ROADMAP ==========
	story.append(Paragraph('7. Future Work & Roadmap', styles['SectionHeader']))

	story.append(Paragraph('<b>7.1 Strategic Roadmap Overview</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''The SPARKNET roadmap is organized into three phases, each building upon the
	previous to transform the current prototype into a production-ready enterprise
	solution.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))

	# Roadmap phases
	roadmap = [
	['Phase', 'Timeline', 'Focus Areas', 'Key Deliverables'],
	['Phase 1:\nFoundation', 'Q1-Q2 2026',
	'Stability, Core Features,\nBasic Security',
	'• REST API\n• Authentication\n• Extended document formats\n• Basic containerization'],
	['Phase 2:\nScale', 'Q3-Q4 2026',
	'Performance, Scalability,\nEnterprise Features',
	'• Distributed processing\n• Advanced caching\n• Multi-tenancy\n• Monitoring & alerting'],
	['Phase 3:\nInnovation', 'Q1-Q2 2027',
	'Advanced AI, Compliance,\nEcosystem',
	'• Multi-modal understanding\n• Compliance frameworks\n• Plugin architecture\n• Advanced analytics'],
	]

	rm_table = Table(roadmap, colWidths=[70, 80, 130, 170])
	rm_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [LIGHT_BLUE, WHITE]),
	('PADDING', (0, 0), (-1, -1), 6),
	('VALIGN', (0, 0), (-1, -1), 'TOP'),
	]))
	story.append(rm_table)
	story.append(Paragraph('Table 11: Strategic Roadmap', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>7.2 Phase 1: Foundation (Q1-Q2 2026)</b>', styles['SubsectionHeader']))

	phase1_items = [
	['Item', 'Description', 'Effort', 'Dependencies'],
	['REST API Development', 'FastAPI-based API for all core functions', '4 weeks', 'None'],
	['User Authentication', 'JWT-based auth with OAuth2 support', '3 weeks', 'API'],
	['Document Format Extension', 'Add Word, Excel, PowerPoint support', '4 weeks', 'None'],
	['Table Extraction', 'Preserve table structure in processing', '3 weeks', 'None'],
	['Docker Containerization', 'Production-ready Docker images', '2 weeks', 'None'],
	['Basic CI/CD Pipeline', 'Automated testing and deployment', '2 weeks', 'Docker'],
	['Query Result Caching', 'Redis-based caching layer', '2 weeks', 'API'],
	['Unit Test Coverage', 'Achieve 80% code coverage', '3 weeks', 'Ongoing'],
	]

	p1_table = Table(phase1_items, colWidths=[130, 180, 60, 80])
	p1_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SUCCESS_GREEN),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	]))
	story.append(p1_table)
	story.append(Paragraph('Table 12: Phase 1 Deliverables', styles['Caption']))

	story.append(PageBreak())

	story.append(Paragraph('<b>7.3 Phase 2: Scale (Q3-Q4 2026)</b>', styles['SubsectionHeader']))

	phase2_items = [
	['Item', 'Description', 'Effort', 'Dependencies'],
	['Distributed Processing', 'Celery/Ray for parallel document processing', '6 weeks', 'Phase 1'],
	['Vector Store Scaling', 'Milvus/Pinecone for large-scale indices', '4 weeks', 'Phase 1'],
	['Multi-tenancy', 'Organization-based data isolation', '4 weeks', 'Auth'],
	['Kubernetes Deployment', 'Full K8s manifests and Helm charts', '3 weeks', 'Docker'],
	['Monitoring Stack', 'Prometheus, Grafana, ELK integration', '3 weeks', 'K8s'],
	['Batch Processing', 'Bulk document upload and processing', '3 weeks', 'Distributed'],
	['Advanced Caching', 'Semantic caching for similar queries', '3 weeks', 'Cache'],
	['Performance Optimization', 'Achieve <2s simple query latency', '4 weeks', 'Caching'],
	]

	p2_table = Table(phase2_items, colWidths=[130, 180, 60, 80])
	p2_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	]))
	story.append(p2_table)
	story.append(Paragraph('Table 13: Phase 2 Deliverables', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>7.4 Phase 3: Innovation (Q1-Q2 2027)</b>', styles['SubsectionHeader']))

	phase3_items = [
	['Item', 'Description', 'Effort', 'Dependencies'],
	['Multi-modal Understanding', 'GPT-4V/Claude Vision for image analysis', '6 weeks', 'Phase 2'],
	['Advanced Table QA', 'SQL-like queries over extracted tables', '4 weeks', 'Table Extract'],
	['PII Detection/Redaction', 'Automatic sensitive data handling', '4 weeks', 'None'],
	['Compliance Framework', 'GDPR, HIPAA, SOC2 compliance', '8 weeks', 'PII'],
	['Plugin Architecture', 'Extensible agent and tool system', '4 weeks', 'Phase 2'],
	['Analytics Dashboard', 'Usage analytics and insights', '3 weeks', 'Monitoring'],
	['Multi-language Support', 'Full support for top 10 languages', '4 weeks', 'None'],
	['Feedback Learning', 'Learn from user corrections', '4 weeks', 'Analytics'],
	]

	p3_table = Table(phase3_items, colWidths=[130, 180, 60, 80])
	p3_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), ACCENT_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 4),
	]))
	story.append(p3_table)
	story.append(Paragraph('Table 14: Phase 3 Deliverables', styles['Caption']))

	story.append(PageBreak())

	# ========== 8. RISK ASSESSMENT ==========
	story.append(Paragraph('8. Risk Assessment', styles['SectionHeader']))

	story.append(Paragraph('<b>8.1 Technical Risks</b>', styles['SubsectionHeader']))

	tech_risks = [
	['Risk', 'Probability', 'Impact', 'Mitigation'],
	['LLM API Changes', 'Medium', 'High', 'Abstract LLM interface; support multiple providers'],
	['Scaling Bottlenecks', 'High', 'High', 'Early load testing; phased rollout'],
	['Model Accuracy Plateau', 'Medium', 'Medium', 'Ensemble approaches; fine-tuning capability'],
	['Dependency Vulnerabilities', 'Medium', 'Medium', 'Regular dependency audits; Dependabot'],
	['Data Loss', 'Low', 'Critical', 'Automated backups; disaster recovery plan'],
	]

	tr_table = Table(tech_risks, colWidths=[120, 70, 70, 190])
	tr_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), DANGER_RED),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	('ALIGN', (1, 0), (2, -1), 'CENTER'),
	]))
	story.append(tr_table)
	story.append(Paragraph('Table 15: Technical Risks', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>8.2 Project Risks</b>', styles['SubsectionHeader']))

	proj_risks = [
	['Risk', 'Probability', 'Impact', 'Mitigation'],
	['Scope Creep', 'High', 'Medium', 'Strict phase gates; change control process'],
	['Resource Constraints', 'Medium', 'High', 'Prioritized backlog; MVP focus'],
	['Timeline Slippage', 'Medium', 'Medium', 'Buffer time; parallel workstreams'],
	['Knowledge Silos', 'Medium', 'Medium', 'Documentation; pair programming; code reviews'],
	['Stakeholder Alignment', 'Low', 'High', 'Regular demos; feedback cycles'],
	]

	pr_table = Table(proj_risks, colWidths=[120, 70, 70, 190])
	pr_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), WARNING_ORANGE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	('ALIGN', (1, 0), (2, -1), 'CENTER'),
	]))
	story.append(pr_table)
	story.append(Paragraph('Table 16: Project Risks', styles['Caption']))

	story.append(PageBreak())

	# ========== 9. RESOURCE REQUIREMENTS ==========
	story.append(Paragraph('9. Resource Requirements', styles['SectionHeader']))

	story.append(Paragraph('<b>9.1 Team Structure (Recommended)</b>', styles['SubsectionHeader']))

	team = [
	['Role', 'Count', 'Phase 1', 'Phase 2', 'Phase 3'],
	['Senior ML Engineer', '2', '✓', '✓', '✓'],
	['Backend Developer', '2', '✓', '✓', '✓'],
	['Frontend Developer', '1', '✓', '✓', '✓'],
	['DevOps Engineer', '1', '✓', '✓', '✓'],
	['QA Engineer', '1', '—', '✓', '✓'],
	['Technical Lead', '1', '✓', '✓', '✓'],
	['Product Manager', '1', '✓', '✓', '✓'],
	]

	team_table = Table(team, colWidths=[130, 60, 70, 70, 70])
	team_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), PRIMARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 9),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 6),
	('ALIGN', (1, 0), (-1, -1), 'CENTER'),
	]))
	story.append(team_table)
	story.append(Paragraph('Table 17: Team Structure', styles['Caption']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>9.2 Infrastructure Requirements</b>', styles['SubsectionHeader']))

	infra = [
	['Component', 'Development', 'Staging', 'Production'],
	['GPU Servers', '1x A100 40GB', '2x A100 40GB', '4x A100 80GB'],
	['CPU Servers', '4 vCPU, 16GB', '8 vCPU, 32GB', '16 vCPU, 64GB x3'],
	['Storage', '500GB SSD', '2TB SSD', '10TB SSD + S3'],
	['Vector DB', 'ChromaDB local', 'Milvus single', 'Milvus cluster'],
	['Cache', 'In-memory', 'Redis single', 'Redis cluster'],
	['Load Balancer', 'None', 'Nginx', 'AWS ALB / GCP LB'],
	]

	infra_table = Table(infra, colWidths=[100, 120, 120, 110])
	infra_table.setStyle(TableStyle([
	('BACKGROUND', (0, 0), (-1, 0), SECONDARY_BLUE),
	('TEXTCOLOR', (0, 0), (-1, 0), WHITE),
	('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
	('FONTSIZE', (0, 0), (-1, -1), 8),
	('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
	('ROWBACKGROUNDS', (0, 1), (-1, -1), [WHITE, GRAY_LIGHT]),
	('PADDING', (0, 0), (-1, -1), 5),
	]))
	story.append(infra_table)
	story.append(Paragraph('Table 18: Infrastructure Requirements', styles['Caption']))

	story.append(PageBreak())

	# ========== 10. CONCLUSION ==========
	story.append(Paragraph('10. Conclusion & Recommendations', styles['SectionHeader']))

	story.append(Paragraph('<b>10.1 Summary</b>', styles['SubsectionHeader']))
	story.append(Paragraph(
	'''SPARKNET has achieved significant progress as a proof-of-concept for multi-agentic
	document intelligence. The core RAG pipeline is functional, demonstrating the viability
	of the 5-agent architecture with self-correction capabilities. The system successfully
	processes documents, performs hybrid retrieval, and generates citation-backed responses.''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 10))
	story.append(Paragraph('<b>10.2 Key Recommendations</b>', styles['SubsectionHeader']))

	recommendations = [
	'<b>Prioritize API Development:</b> Enable external integrations and unlock enterprise adoption.',
	'<b>Invest in Security:</b> Authentication and authorization are prerequisites for any production deployment.',
	'<b>Focus on Performance:</b> Current latency is acceptable for demos but needs significant improvement for production use.',
	'<b>Expand Document Support:</b> Office formats (Word, Excel, PowerPoint) are critical for enterprise adoption.',
	'<b>Implement Monitoring:</b> Observability is essential for maintaining and scaling the system.',
	'<b>Plan for Scale Early:</b> Architectural decisions made now will impact scalability; consider distributed architecture.',
	]

	for rec in recommendations:
	story.append(Paragraph(f'• {rec}', styles['BulletText']))

	story.append(Spacer(1, 15))
	story.append(Paragraph('<b>10.3 Immediate Next Steps</b>', styles['SubsectionHeader']))

	next_steps = [
	'1. Finalize Phase 1 scope and create detailed sprint plans',
	'2. Set up development infrastructure and CI/CD pipeline',
	'3. Begin REST API development (target: 4 weeks)',
	'4. Initiate security assessment and authentication design',
	'5. Start documentation and knowledge transfer activities',
	'6. Schedule bi-weekly stakeholder demos for continuous feedback',
	]

	for step in next_steps:
	story.append(Paragraph(step, styles['BulletText']))

	story.append(Spacer(1, 30))

	# Final signature block
	story.append(HRFlowable(width='100%', thickness=1, color=PRIMARY_BLUE))
	story.append(Spacer(1, 15))

	story.append(Paragraph(
	f'''<b>Document prepared by:</b> SPARKNET Development Team<br/>
	<b>Report Date:</b> {datetime.now().strftime('%B %d, %Y')}<br/>
	<b>Version:</b> 1.0<br/>
	<b>Classification:</b> Internal / Confidential''',
	styles['CustomBody']
	))

	story.append(Spacer(1, 20))
	story.append(Paragraph(
	'<i>This document contains confidential information intended for stakeholder review. '
	'Please do not distribute without authorization.</i>',
	styles['Caption']
	))

	# Build PDF
	doc.build(story)
	print(f"Report generated: {filename}")
	return filename


	if __name__ == '__main__':
	generate_report()