import io import mimetypes import os import tempfile from html import escape from pathlib import Path from string import Template import PyPDF2 import requests import streamlit as st from dotenv import load_dotenv load_dotenv() # LlamaIndex imports for RAG retrieval try: from llama_index.core import Settings, StorageContext, load_index_from_storage from llama_index.embeddings.huggingface import HuggingFaceEmbedding LLAMA_INDEX_AVAILABLE = True except ImportError: LLAMA_INDEX_AVAILABLE = False # GitHub repo that hosts study materials via Releases + manifest.json # Format: "owner/repo" MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials") MANIFEST_BRANCH = "main" THEME_MODE_STATE_KEY = "plexi_theme_mode" THEME_MODE_WIDGET_KEY = "_plexi_theme_mode_widget" APP_ICON_PATH = str(Path(__file__).resolve().with_name("plexi-mcp-square-logo.svg")) LIGHT_PALETTE = { "ink": "#16312c", "muted": "#5b6c66", "bg": "#f5f0e8", "panel": "rgba(255, 252, 247, 0.88)", "panel_strong": "#fffaf1", "line": "rgba(22, 49, 44, 0.11)", "accent": "#1d7a63", "accent_soft": "#d7efe4", "highlight": "#f4b860", "shadow": "0 18px 60px rgba(30, 48, 43, 0.08)", "app_background": """ radial-gradient(circle at top left, rgba(244, 184, 96, 0.18), transparent 28%), radial-gradient(circle at top right, rgba(29, 122, 99, 0.14), transparent 30%), linear-gradient(180deg, #fbf7ef 0%, #f4ecde 100%) """, "hero_background": """ linear-gradient(135deg, rgba(29, 122, 99, 0.08), rgba(255, 250, 241, 0.92)), rgba(255, 252, 247, 0.88) """, "chip_background": "rgba(29, 122, 99, 0.08)", "chip_border": "rgba(29, 122, 99, 0.12)", "button_border": "rgba(29, 122, 99, 0.14)", "button_surface": "#f8fbfa", "button_hover": "#eef7f2", "primary_button": "linear-gradient(135deg, #1d7a63, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(255, 251, 245, 0.98), rgba(246, 238, 224, 0.96)) """, "expander_background": "rgba(255, 251, 245, 0.72)", "meta_background": "rgba(255, 251, 245, 0.72)", "divider": "linear-gradient(90deg, rgba(29, 122, 99, 0.25), transparent)", "meta_row_border": "rgba(22, 49, 44, 0.08)", "bottom_background": "#fbf7ef", } DARK_PALETTE = { "ink": "#eef4ef", "muted": "#b8c6c0", "bg": "#0d1715", "panel": "rgba(20, 31, 29, 0.9)", "panel_strong": "#15211f", "line": "rgba(196, 223, 211, 0.14)", "accent": "#54c6a2", "accent_soft": "#17392f", "highlight": "#f0b564", "shadow": "0 22px 70px rgba(0, 0, 0, 0.32)", "app_background": """ radial-gradient(circle at top left, rgba(240, 181, 100, 0.12), transparent 28%), radial-gradient(circle at top right, rgba(84, 198, 162, 0.12), transparent 32%), linear-gradient(180deg, #0f1b19 0%, #09110f 100%) """, "hero_background": """ linear-gradient(135deg, rgba(84, 198, 162, 0.12), rgba(16, 28, 25, 0.92)), rgba(20, 31, 29, 0.9) """, "chip_background": "rgba(84, 198, 162, 0.12)", "chip_border": "rgba(84, 198, 162, 0.18)", "button_border": "rgba(84, 198, 162, 0.18)", "button_surface": "rgba(84, 198, 162, 0.14)", "button_hover": "rgba(84, 198, 162, 0.22)", "primary_button": "linear-gradient(135deg, #2ea483, #245e74)", "sidebar_background": """ linear-gradient(180deg, rgba(17, 28, 26, 0.98), rgba(12, 20, 18, 0.97)) """, "expander_background": "rgba(17, 28, 26, 0.84)", "meta_background": "rgba(19, 31, 28, 0.84)", "divider": "linear-gradient(90deg, rgba(84, 198, 162, 0.32), transparent)", "meta_row_border": "rgba(196, 223, 211, 0.1)", "bottom_background": "#09110f", } def get_theme_mode(): """Return the selected appearance mode.""" if THEME_MODE_STATE_KEY not in st.session_state: st.session_state[THEME_MODE_STATE_KEY] = "system" return st.session_state[THEME_MODE_STATE_KEY] def sync_theme_mode(): """Persist the appearance selector value across page switches.""" st.session_state[THEME_MODE_STATE_KEY] = st.session_state.get( THEME_MODE_WIDGET_KEY, "System" ).lower() def _css_vars_block(palette): """Return CSS custom property definitions for a palette.""" return "\n".join( [ f" --plexi-ink: {palette['ink']};", f" --plexi-muted: {palette['muted']};", f" --plexi-bg: {palette['bg']};", f" --plexi-panel: {palette['panel']};", f" --plexi-panel-strong: {palette['panel_strong']};", f" --plexi-line: {palette['line']};", f" --plexi-accent: {palette['accent']};", f" --plexi-accent-soft: {palette['accent_soft']};", f" --plexi-highlight: {palette['highlight']};", f" --plexi-shadow: {palette['shadow']};", f" --plexi-app-background: {palette['app_background']};", f" --plexi-hero-background: {palette['hero_background']};", f" --plexi-chip-background: {palette['chip_background']};", f" --plexi-chip-border: {palette['chip_border']};", f" --plexi-button-border: {palette['button_border']};", f" --plexi-button-surface: {palette['button_surface']};", f" --plexi-button-hover: {palette['button_hover']};", f" --plexi-primary-button: {palette['primary_button']};", f" --plexi-sidebar-background: {palette['sidebar_background']};", f" --plexi-expander-background: {palette['expander_background']};", f" --plexi-meta-background: {palette['meta_background']};", f" --plexi-divider: {palette['divider']};", f" --plexi-meta-row-border: {palette['meta_row_border']};", f" --plexi-bottom-background: {palette['bottom_background']};", ] ) def inject_theme(): """Inject the shared visual language for the Streamlit app.""" theme_mode = get_theme_mode() palette = DARK_PALETTE if theme_mode == "dark" else LIGHT_PALETTE system_css = "" color_scheme = "dark" if theme_mode == "dark" else "light" if theme_mode == "system": system_css = f""" @media (prefers-color-scheme: dark) {{ :root {{ {_css_vars_block(DARK_PALETTE)} }} html {{ color-scheme: dark; }} }} """ css = Template( """ """ ).substitute( { "palette_vars": _css_vars_block(palette), "color_scheme": color_scheme, "system_css": system_css, } ) st.markdown(css, unsafe_allow_html=True) def summarize_manifest(manifest): """Return top-level counts for the materials catalog.""" subject_total = sum(len(subjects) for subjects in manifest.values()) file_total = sum( len(files) for subjects in manifest.values() for types in subjects.values() for files in types.values() ) material_types = sorted( { material_type for subjects in manifest.values() for types in subjects.values() for material_type in types.keys() } ) return { "semester_count": len(manifest), "subject_count": subject_total, "file_count": file_total, "material_types": material_types, } def summarize_subject_catalog(subject_data): """Return counts for one selected subject catalog.""" return { "type_count": len(subject_data), "file_count": sum(len(files) for files in subject_data.values()), "types": sorted(subject_data.keys()), } def render_page_header(kicker, title, subtitle, badges=None): """Render a shared hero block for each page.""" badge_html = "" if badges: badge_html = "".join( f'{escape(str(badge))}' for badge in badges if badge ) badge_html = f'
{badge_html}
' st.markdown( f"""
{escape(kicker)}

{escape(title)}

{escape(subtitle)}

{badge_html}
""", unsafe_allow_html=True, ) def render_stat_cards(cards): """Render compact metrics in a responsive grid.""" if not cards: return cols = st.columns(len(cards)) for col, card in zip(cols, cards): label = escape(str(card.get("label", ""))) value = escape(str(card.get("value", ""))) note = escape(str(card.get("note", ""))) with col: st.markdown( f"""
{label}
{value}
{note}
""", unsafe_allow_html=True, ) def render_panel(title, body, tone="default"): """Render a simple informational panel.""" panel_class = "plexi-callout" if tone == "callout" else "plexi-panel" st.markdown( f"""
{escape(title)}
{escape(body)}
""", unsafe_allow_html=True, ) def _manifest_url(): """Raw GitHub URL for manifest.json.""" return f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json" @st.cache_data(ttl=300, show_spinner=False) def get_manifest(): """Fetch the materials manifest from GitHub. Cached for 5 minutes.""" url = _manifest_url() resp = requests.get(url, timeout=15) resp.raise_for_status() return resp.json() def download_github_file(download_url, max_retries=3): """Download a file from a GitHub Release asset URL with retry logic.""" for attempt in range(max_retries): try: resp = requests.get(download_url, timeout=60) resp.raise_for_status() return resp.content except requests.RequestException as err: print(f"Download error (attempt {attempt + 1}): {err}") if attempt == max_retries - 1: raise return None def get_mime_type(filename): """Guess MIME type from filename extension.""" mime, _ = mimetypes.guess_type(filename) return mime or "application/octet-stream" # ── Lightweight Office → PDF conversion (pure Python) ────────────────── # # Uses python-pptx / python-docx to extract content and fpdf2 to render # PDF pages. No system packages (LibreOffice, etc.) required. # The output is a readable *preview*, not a pixel-perfect replica. def _pptx_to_pdf(file_bytes): """Convert PPTX bytes to PDF bytes using python-pptx + fpdf2.""" from pptx import Presentation as PptxPresentation from pptx.util import Emu from fpdf import FPDF prs = PptxPresentation(io.BytesIO(file_bytes)) # Slide dimensions in mm (default is 10" × 7.5") slide_w_mm = prs.slide_width / Emu(914400) * 25.4 # EMU → inches → mm slide_h_mm = prs.slide_height / Emu(914400) * 25.4 pdf = FPDF(orientation="L", unit="mm", format=(slide_h_mm, slide_w_mm)) pdf.set_auto_page_break(auto=True, margin=12) # Use built-in Helvetica (no font file needed) TITLE_SIZE = 18 BODY_SIZE = 11 MARGIN = 14 for slide_idx, slide in enumerate(prs.slides, start=1): pdf.add_page() pdf.set_left_margin(MARGIN) pdf.set_right_margin(MARGIN) pdf.set_y(MARGIN) # ── Slide number chip ── pdf.set_font("Helvetica", "I", 8) pdf.set_text_color(120, 120, 120) pdf.cell(0, 5, f"Slide {slide_idx}", ln=True) pdf.ln(2) # ── Extract text from shapes ── title_text = "" body_parts = [] for shape in slide.shapes: if shape.has_text_frame: for para in shape.text_frame.paragraphs: text = para.text.strip() if not text: continue # Heuristic: first non-empty text in a title placeholder if not title_text and hasattr(shape, "placeholder_format"): ph = shape.placeholder_format if ph is not None and ph.idx in (0, 1): title_text = text continue body_parts.append(text) # ── Embedded images ── if shape.shape_type == 13: # MSO_SHAPE_TYPE.PICTURE try: img_bytes = shape.image.blob img_stream = io.BytesIO(img_bytes) # Scale image to fit page width (with margins) max_w = slide_w_mm - 2 * MARGIN pdf.image(img_stream, x=MARGIN, w=min(max_w, 120)) pdf.ln(4) except Exception: pass # skip unreadable images # ── Render title ── if title_text: pdf.set_font("Helvetica", "B", TITLE_SIZE) pdf.set_text_color(22, 49, 44) # plexi-ink dark pdf.multi_cell(0, TITLE_SIZE * 0.5, title_text) pdf.ln(4) # ── Render body text ── if body_parts: pdf.set_font("Helvetica", "", BODY_SIZE) pdf.set_text_color(50, 50, 50) for part in body_parts: pdf.multi_cell(0, BODY_SIZE * 0.45, part) pdf.ln(2) if len(prs.slides) == 0: pdf.add_page() pdf.set_font("Helvetica", "I", 12) pdf.cell(0, 10, "This presentation has no slides.", ln=True) return bytes(pdf.output()) def _docx_to_pdf(file_bytes): """Convert DOCX bytes to PDF bytes using python-docx + fpdf2.""" from docx import Document as DocxDocument from fpdf import FPDF doc = DocxDocument(io.BytesIO(file_bytes)) pdf = FPDF(orientation="P", unit="mm", format="A4") pdf.set_auto_page_break(auto=True, margin=15) pdf.add_page() MARGIN = 16 pdf.set_left_margin(MARGIN) pdf.set_right_margin(MARGIN) HEADING_SIZES = {"Heading 1": 20, "Heading 2": 16, "Heading 3": 14} BODY_SIZE = 11 for para in doc.paragraphs: text = para.text.strip() if not text: pdf.ln(3) continue style_name = para.style.name if para.style else "" if style_name in HEADING_SIZES: size = HEADING_SIZES[style_name] pdf.set_font("Helvetica", "B", size) pdf.set_text_color(22, 49, 44) pdf.ln(4) pdf.multi_cell(0, size * 0.5, text) pdf.ln(3) else: is_bold = any( run.bold for run in para.runs if run.bold is not None ) pdf.set_font("Helvetica", "B" if is_bold else "", BODY_SIZE) pdf.set_text_color(50, 50, 50) pdf.multi_cell(0, BODY_SIZE * 0.45, text) pdf.ln(1.5) # ── Inline images ── for rel in doc.part.rels.values(): if "image" in rel.reltype: try: img_stream = io.BytesIO(rel.target_part.blob) pdf.image(img_stream, x=MARGIN, w=100) pdf.ln(4) except Exception: pass return bytes(pdf.output()) def convert_office_to_pdf(file_bytes, filename): """Convert an Office document (PPTX/DOCX/PPT/DOC) to PDF bytes. Uses pure-Python libraries (python-pptx, python-docx, fpdf2) so no system packages like LibreOffice are needed. The output is a readable preview rather than a pixel-perfect replica. Returns ------- bytes | None PDF bytes on success, or ``None`` on failure. """ ext = Path(filename).suffix.lower() try: if ext in (".pptx", ".ppt"): return _pptx_to_pdf(file_bytes) elif ext in (".docx", ".doc"): return _docx_to_pdf(file_bytes) except Exception as err: print(f"Office-to-PDF conversion error ({filename}): {err}") return None def render_sidebar_intro(): """Render the shared sidebar intro card.""" with st.sidebar: st.markdown( """
Plexi
Grounded study assistant
Browse materials, preview files, and ask questions backed by the currently loaded course content.
""", unsafe_allow_html=True, ) def render_sidebar_footer(): """Render shared appearance controls and outbound links at the end of the sidebar.""" with st.sidebar: current_mode = get_theme_mode() widget_value = current_mode.capitalize() if st.session_state.get(THEME_MODE_WIDGET_KEY) != widget_value: st.session_state[THEME_MODE_WIDGET_KEY] = widget_value st.markdown( '
Appearance
', unsafe_allow_html=True, ) st.selectbox( "Theme", ["System", "Light", "Dark"], key=THEME_MODE_WIDGET_KEY, on_change=sync_theme_mode, help="System follows your device preference unless you override it here.", ) st.caption("Built by **Kunal Gupta** (LazyHuman)") cols = st.columns(3) with cols[0]: st.link_button("Web", "https://lazyhideout.tech", use_container_width=True) with cols[1]: st.link_button( "GitHub", "https://github.com/kunalgupta25", use_container_width=True ) with cols[2]: st.link_button( "Ko-fi", "https://ko-fi.com/lazy_human", use_container_width=True ) st.markdown('
', unsafe_allow_html=True) def render_sidebar(): """Render the shared sidebar for pages without extra sidebar sections.""" render_sidebar_intro() render_sidebar_footer() def read_pdf_text(pdf_bytes): """Extract text from PDF bytes with error handling.""" text = [] try: reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes)) for page in reader.pages: try: page_text = page.extract_text() if page_text: filtered = page_text.encode("utf-16", "surrogatepass").decode( "utf-16", "ignore" ) text.append(filtered) except Exception: pass return "\n".join(text) except Exception: return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else "" def load_subject_context(manifest, semester, subject): """Download and extract text from all files for a given semester + subject. Returns (context_string, source_list) where: - context_string: numbered source blocks for the system prompt - source_list: list of dicts with 'id', 'name', 'type' for citation display """ subject_data = manifest.get(semester, {}).get(subject, {}) parts = [] sources = [] source_id = 0 for file_type, file_list in subject_data.items(): for file_entry in file_list: name = file_entry["name"] mime = get_mime_type(name) if not (mime.startswith("text/") or mime == "application/pdf"): continue try: content = download_github_file(file_entry["download_url"]) if not content: continue if mime == "application/pdf": text = read_pdf_text(content) else: text = content.decode("utf-8", errors="ignore") if text.strip(): source_id += 1 sources.append({"id": source_id, "name": name, "type": file_type}) parts.append( f"[Source {source_id}: {name} ({file_type})]\n{text}\n[End Source {source_id}]" ) except Exception as err: print(f"Error loading {name}: {err}") return "\n\n".join(parts), sources # RAG index loading from GitHub # The index is pre-built by GitHub Actions (build_index.py) and # committed to the materials repo. We just download and load it. EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match build_index.py INDEX_FILES = [ "default__vector_store.json", "docstore.json", "graph_store.json", "image__vector_store.json", "index_store.json", ] @st.cache_resource(show_spinner="Loading RAG index...") def fetch_rag_index(): """ Download the pre-built LlamaIndex from the materials repo and return a ready-to-use VectorStoreIndex. Cached once per Streamlit session. Returns (index, error_msg) - index is None if loading failed. """ if not LLAMA_INDEX_AVAILABLE: return ( None, "LlamaIndex not installed - install llama-index-core and dependencies.", ) index_base_url = ( f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index" ) index_dir = tempfile.mkdtemp(prefix="plexi_index_") try: for filename in INDEX_FILES: url = f"{index_base_url}/{filename}" resp = requests.get(url, timeout=30) resp.raise_for_status() with open(os.path.join(index_dir, filename), "wb") as file_handle: file_handle.write(resp.content) except Exception as err: return None, f"Failed to download index files: {err}" try: embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID) Settings.embed_model = embed_model Settings.llm = None storage_context = StorageContext.from_defaults(persist_dir=index_dir) index = load_index_from_storage(storage_context) return index, None except Exception as err: return None, f"Failed to load index: {err}"