plexi / utils.py
LazyHuman10
sync: fix chat reset and payload errors
49bf421
import io
import mimetypes
import os
import tempfile
from html import escape
from pathlib import Path
from string import Template
import PyPDF2
import requests
import streamlit as st
from dotenv import load_dotenv
load_dotenv()
# LlamaIndex imports for RAG retrieval
try:
from llama_index.core import Settings, StorageContext, load_index_from_storage
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
LLAMA_INDEX_AVAILABLE = True
except ImportError:
LLAMA_INDEX_AVAILABLE = False
# GitHub repo that hosts study materials via Releases + manifest.json
# Format: "owner/repo"
MATERIALS_REPO = os.getenv("MATERIALS_REPO", "KunalGupta25/plexi-materials")
MANIFEST_BRANCH = "main"
THEME_MODE_STATE_KEY = "plexi_theme_mode"
THEME_MODE_WIDGET_KEY = "_plexi_theme_mode_widget"
APP_ICON_PATH = str(Path(__file__).resolve().with_name("plexi-mcp-square-logo.svg"))
LIGHT_PALETTE = {
"ink": "#16312c",
"muted": "#5b6c66",
"bg": "#f5f0e8",
"panel": "rgba(255, 252, 247, 0.88)",
"panel_strong": "#fffaf1",
"line": "rgba(22, 49, 44, 0.11)",
"accent": "#1d7a63",
"accent_soft": "#d7efe4",
"highlight": "#f4b860",
"shadow": "0 18px 60px rgba(30, 48, 43, 0.08)",
"app_background": """
radial-gradient(circle at top left, rgba(244, 184, 96, 0.18), transparent 28%),
radial-gradient(circle at top right, rgba(29, 122, 99, 0.14), transparent 30%),
linear-gradient(180deg, #fbf7ef 0%, #f4ecde 100%)
""",
"hero_background": """
linear-gradient(135deg, rgba(29, 122, 99, 0.08), rgba(255, 250, 241, 0.92)),
rgba(255, 252, 247, 0.88)
""",
"chip_background": "rgba(29, 122, 99, 0.08)",
"chip_border": "rgba(29, 122, 99, 0.12)",
"button_border": "rgba(29, 122, 99, 0.14)",
"button_surface": "#f8fbfa",
"button_hover": "#eef7f2",
"primary_button": "linear-gradient(135deg, #1d7a63, #245e74)",
"sidebar_background": """
linear-gradient(180deg, rgba(255, 251, 245, 0.98), rgba(246, 238, 224, 0.96))
""",
"expander_background": "rgba(255, 251, 245, 0.72)",
"meta_background": "rgba(255, 251, 245, 0.72)",
"divider": "linear-gradient(90deg, rgba(29, 122, 99, 0.25), transparent)",
"meta_row_border": "rgba(22, 49, 44, 0.08)",
"bottom_background": "#fbf7ef",
}
DARK_PALETTE = {
"ink": "#eef4ef",
"muted": "#b8c6c0",
"bg": "#0d1715",
"panel": "rgba(20, 31, 29, 0.9)",
"panel_strong": "#15211f",
"line": "rgba(196, 223, 211, 0.14)",
"accent": "#54c6a2",
"accent_soft": "#17392f",
"highlight": "#f0b564",
"shadow": "0 22px 70px rgba(0, 0, 0, 0.32)",
"app_background": """
radial-gradient(circle at top left, rgba(240, 181, 100, 0.12), transparent 28%),
radial-gradient(circle at top right, rgba(84, 198, 162, 0.12), transparent 32%),
linear-gradient(180deg, #0f1b19 0%, #09110f 100%)
""",
"hero_background": """
linear-gradient(135deg, rgba(84, 198, 162, 0.12), rgba(16, 28, 25, 0.92)),
rgba(20, 31, 29, 0.9)
""",
"chip_background": "rgba(84, 198, 162, 0.12)",
"chip_border": "rgba(84, 198, 162, 0.18)",
"button_border": "rgba(84, 198, 162, 0.18)",
"button_surface": "rgba(84, 198, 162, 0.14)",
"button_hover": "rgba(84, 198, 162, 0.22)",
"primary_button": "linear-gradient(135deg, #2ea483, #245e74)",
"sidebar_background": """
linear-gradient(180deg, rgba(17, 28, 26, 0.98), rgba(12, 20, 18, 0.97))
""",
"expander_background": "rgba(17, 28, 26, 0.84)",
"meta_background": "rgba(19, 31, 28, 0.84)",
"divider": "linear-gradient(90deg, rgba(84, 198, 162, 0.32), transparent)",
"meta_row_border": "rgba(196, 223, 211, 0.1)",
"bottom_background": "#09110f",
}
def get_theme_mode():
"""Return the selected appearance mode."""
if THEME_MODE_STATE_KEY not in st.session_state:
st.session_state[THEME_MODE_STATE_KEY] = "system"
return st.session_state[THEME_MODE_STATE_KEY]
def sync_theme_mode():
"""Persist the appearance selector value across page switches."""
st.session_state[THEME_MODE_STATE_KEY] = st.session_state.get(
THEME_MODE_WIDGET_KEY, "System"
).lower()
def _css_vars_block(palette):
"""Return CSS custom property definitions for a palette."""
return "\n".join(
[
f" --plexi-ink: {palette['ink']};",
f" --plexi-muted: {palette['muted']};",
f" --plexi-bg: {palette['bg']};",
f" --plexi-panel: {palette['panel']};",
f" --plexi-panel-strong: {palette['panel_strong']};",
f" --plexi-line: {palette['line']};",
f" --plexi-accent: {palette['accent']};",
f" --plexi-accent-soft: {palette['accent_soft']};",
f" --plexi-highlight: {palette['highlight']};",
f" --plexi-shadow: {palette['shadow']};",
f" --plexi-app-background: {palette['app_background']};",
f" --plexi-hero-background: {palette['hero_background']};",
f" --plexi-chip-background: {palette['chip_background']};",
f" --plexi-chip-border: {palette['chip_border']};",
f" --plexi-button-border: {palette['button_border']};",
f" --plexi-button-surface: {palette['button_surface']};",
f" --plexi-button-hover: {palette['button_hover']};",
f" --plexi-primary-button: {palette['primary_button']};",
f" --plexi-sidebar-background: {palette['sidebar_background']};",
f" --plexi-expander-background: {palette['expander_background']};",
f" --plexi-meta-background: {palette['meta_background']};",
f" --plexi-divider: {palette['divider']};",
f" --plexi-meta-row-border: {palette['meta_row_border']};",
f" --plexi-bottom-background: {palette['bottom_background']};",
]
)
def inject_theme():
"""Inject the shared visual language for the Streamlit app."""
theme_mode = get_theme_mode()
palette = DARK_PALETTE if theme_mode == "dark" else LIGHT_PALETTE
system_css = ""
color_scheme = "dark" if theme_mode == "dark" else "light"
if theme_mode == "system":
system_css = f"""
@media (prefers-color-scheme: dark) {{
:root {{
{_css_vars_block(DARK_PALETTE)}
}}
html {{
color-scheme: dark;
}}
}}
"""
css = Template(
"""
<style>
@import url('https://fonts.googleapis.com/css2?family=DM+Serif+Display:ital@0;1&family=Space+Grotesk:wght@400;500;700&display=swap');
:root {
$palette_vars
}
html, body, [class*="css"] {
font-family: "Space Grotesk", "Segoe UI", sans-serif;
}
html {
color-scheme: $color_scheme;
}
.stApp {
background: var(--plexi-app-background);
color: var(--plexi-ink);
}
header[data-testid="stHeader"] {
background: transparent !important;
}
div[data-testid="stToolbar"] {
background: transparent !important;
}
div[data-testid="stAppViewContainer"] {
background: transparent;
}
.block-container {
padding-top: 2.2rem;
padding-bottom: 3rem;
}
h1, h2, h3 {
color: var(--plexi-ink);
}
h1, .plexi-title {
font-family: "DM Serif Display", Georgia, serif;
letter-spacing: -0.03em;
}
p, li, .stMarkdown, .stCaption, .stChatMessage {
color: var(--plexi-ink);
}
.plexi-hero,
.plexi-panel,
.plexi-stat,
.plexi-sidecard,
.plexi-callout {
background: var(--plexi-panel);
border: 1px solid var(--plexi-line);
border-radius: 24px;
box-shadow: var(--plexi-shadow);
}
.plexi-hero {
padding: 1.8rem 1.9rem;
margin-bottom: 1.1rem;
background: var(--plexi-hero-background);
}
.plexi-kicker {
text-transform: uppercase;
letter-spacing: 0.16em;
font-size: 0.72rem;
font-weight: 700;
color: var(--plexi-accent);
margin-bottom: 0.65rem;
}
.plexi-title {
font-size: clamp(2.2rem, 5vw, 4.2rem);
margin: 0;
line-height: 0.95;
}
.plexi-subtitle {
max-width: 48rem;
margin: 0.8rem 0 0;
color: var(--plexi-muted);
font-size: 1rem;
line-height: 1.65;
}
.plexi-chip-row {
display: flex;
gap: 0.55rem;
flex-wrap: wrap;
margin-top: 1rem;
}
.plexi-chip {
display: inline-flex;
align-items: center;
gap: 0.35rem;
padding: 0.45rem 0.8rem;
border-radius: 999px;
background: var(--plexi-chip-background);
border: 1px solid var(--plexi-chip-border);
font-size: 0.82rem;
color: var(--plexi-ink);
}
.plexi-panel,
.plexi-callout,
.plexi-sidecard {
padding: 1.15rem 1.2rem;
margin-bottom: 1rem;
}
.plexi-stat {
padding: 1rem 1.15rem;
min-height: 8.5rem;
overflow: hidden;
}
.plexi-stat-label {
color: var(--plexi-muted);
font-size: 0.82rem;
text-transform: uppercase;
letter-spacing: 0.08em;
}
.plexi-stat-value {
font-family: "DM Serif Display", Georgia, serif;
font-size: clamp(1.5rem, 2.1vw, 2.1rem);
line-height: 1.08;
margin: 0.35rem 0 0.4rem;
overflow-wrap: anywhere;
}
.plexi-stat-note,
.plexi-muted {
color: var(--plexi-muted);
font-size: 0.92rem;
line-height: 1.55;
}
.plexi-section-label {
margin: 1.6rem 0 0.8rem;
text-transform: uppercase;
letter-spacing: 0.12em;
color: var(--plexi-accent);
font-size: 0.74rem;
font-weight: 700;
}
.plexi-list {
margin: 0;
padding-left: 1rem;
color: var(--plexi-muted);
line-height: 1.7;
}
.plexi-cta-grid {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 1rem;
margin: 1rem 0 1.4rem;
align-items: stretch;
}
.plexi-cta-button {
display: flex;
align-items: center;
justify-content: center;
width: 100%;
min-height: 3.6rem;
padding: 0.9rem 1.2rem;
border-radius: 999px;
text-decoration: none !important;
font-weight: 700;
font-size: 1.02rem;
color: #ffffff !important;
background: linear-gradient(135deg, #3bb192, #2b728a);
box-shadow: 0 16px 40px rgba(38, 109, 107, 0.22);
border: none;
transition: transform 120ms ease, box-shadow 120ms ease, opacity 120ms ease;
}
.plexi-cta-button:hover {
color: #ffffff !important;
transform: translateY(-1px);
box-shadow: 0 18px 44px rgba(38, 109, 107, 0.28);
}
.plexi-prompt button,
.stButton > button,
.stDownloadButton > button,
.stLinkButton > a {
border-radius: 999px !important;
}
.stButton > button,
.stDownloadButton > button,
.stLinkButton > a {
border: 1px solid var(--plexi-button-border);
min-height: 2.85rem;
background: var(--plexi-button-surface);
color: var(--plexi-ink) !important;
box-shadow: none !important;
}
.stButton > button[kind="primary"],
.stDownloadButton > button[kind="primary"] {
background: var(--plexi-primary-button);
color: white;
border: none;
}
.stLinkButton > a {
display: flex;
align-items: center;
justify-content: center;
text-decoration: none !important;
}
.stLinkButton > a:hover,
.stButton > button:hover,
.stDownloadButton > button:hover {
border-color: var(--plexi-accent);
background: var(--plexi-button-hover);
color: var(--plexi-ink) !important;
}
.stButton > button:disabled,
.stDownloadButton > button:disabled,
.stLinkButton > a[disabled] {
opacity: 0.55;
color: var(--plexi-muted) !important;
}
.stTextInput input,
.stSelectbox [data-baseweb="select"] > div,
.stTextArea textarea,
.stChatInput textarea {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border-color: var(--plexi-line) !important;
}
.stSelectbox [data-baseweb="select"] *,
.stTextInput input::placeholder,
.stTextArea textarea::placeholder,
.stChatInput textarea::placeholder {
color: var(--plexi-muted) !important;
}
div[data-baseweb="select"] svg,
div[data-baseweb="select"] path {
color: var(--plexi-accent) !important;
fill: var(--plexi-accent) !important;
}
.stChatInputContainer,
div[data-testid="stChatMessage"] {
border-radius: 22px;
}
div[data-testid="stBottomBlockContainer"],
div[data-testid="stBottomBlockContainer"] > div,
div[data-testid="stBottomBlockContainer"] > div > div,
div[data-testid="stChatInput"],
div[data-testid="stChatInput"] > div,
div[data-testid="stChatInput"] form,
div[data-testid="stChatInput"] form > div,
.stChatInputContainer {
background: var(--plexi-bottom-background) !important;
}
div[data-testid="stChatInput"] {
border-top: none !important;
padding-top: 0.5rem;
}
div[data-testid="stChatInput"] textarea,
div[data-testid="stChatInput"] section,
div[data-testid="stChatInput"] [data-baseweb="textarea"] {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border-color: var(--plexi-line) !important;
}
div[data-testid="stChatInput"] button {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-accent) !important;
border: 1px solid var(--plexi-button-border) !important;
}
div[data-testid="stChatInput"] button svg,
div[data-testid="stChatInput"] button path {
fill: currentColor !important;
}
div[data-testid="stSidebar"],
div[data-testid="stSidebar"] > div,
section[data-testid="stSidebar"] {
background: var(--plexi-sidebar-background);
border-right: 1px solid var(--plexi-line);
}
div[data-testid="stSidebar"] .block-container {
padding-top: 1.2rem;
}
div[data-testid="stSidebarNav"],
div[data-testid="stSidebarNav"] ul,
div[data-testid="stSidebarNav"] li,
div[data-testid="stSidebarUserContent"] {
background: transparent !important;
}
div[data-testid="stSidebarNav"] a,
div[data-testid="stSidebarNav"] span,
div[data-testid="stSidebarNav"] button {
color: var(--plexi-ink) !important;
}
div[data-testid="stSidebarNav"] a:hover {
background: rgba(255, 255, 255, 0.04);
}
div[data-testid="stExpander"] {
border-radius: 18px;
border-color: var(--plexi-line);
background: var(--plexi-expander-background);
}
div[data-testid="stToast"] {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border: 1px solid var(--plexi-line) !important;
box-shadow: var(--plexi-shadow) !important;
}
div[data-testid="toastContainer"] * {
color: var(--plexi-ink) !important;
}
div[data-testid="stSpinner"] > div,
div[data-testid="stSpinner"] * {
color: var(--plexi-ink) !important;
}
div[data-baseweb="popover"],
div[data-baseweb="popover"] > div,
div[data-baseweb="popover"] > div > div,
div[data-baseweb="popover"] > div > div > div,
div[data-baseweb="menu"],
div[data-baseweb="menu"] > div,
div[role="listbox"],
ul[role="listbox"] {
background: var(--plexi-panel-strong) !important;
color: var(--plexi-ink) !important;
border: 1px solid var(--plexi-line) !important;
box-shadow: var(--plexi-shadow) !important;
}
div[data-baseweb="popover"] *,
div[data-baseweb="menu"] *,
div[role="listbox"] *,
ul[role="listbox"] * {
color: var(--plexi-ink) !important;
}
div[data-baseweb="popover"] ul,
div[data-baseweb="popover"] li,
div[data-baseweb="popover"] li > div,
div[data-baseweb="menu"] ul,
div[data-baseweb="menu"] li,
div[data-baseweb="menu"] li > div {
background: var(--plexi-panel-strong) !important;
}
li[role="option"],
li[role="option"] > div,
li[role="option"] * {
background: transparent !important;
color: var(--plexi-ink) !important;
}
li[role="option"]:hover,
li[role="option"]:hover > div,
li[role="option"]:hover *,
li[role="option"][aria-selected="true"] {
background: var(--plexi-accent-soft) !important;
color: var(--plexi-ink) !important;
}
li[role="option"][aria-selected="true"] > div,
li[role="option"][aria-selected="true"] * {
background: var(--plexi-accent-soft) !important;
color: var(--plexi-ink) !important;
}
.plexi-sidecard-title {
font-family: "DM Serif Display", Georgia, serif;
font-size: 1.25rem;
margin-bottom: 0.35rem;
}
.plexi-meta {
background: var(--plexi-meta-background);
border: 1px solid var(--plexi-line);
border-radius: 18px;
padding: 0.85rem 1rem;
margin-bottom: 1rem;
}
.plexi-meta-row {
display: flex;
justify-content: space-between;
gap: 1rem;
align-items: flex-start;
padding: 0.65rem 0;
border-bottom: 1px solid var(--plexi-meta-row-border);
}
.plexi-meta-row:last-child {
border-bottom: none;
padding-bottom: 0;
}
.plexi-meta-row:first-child {
padding-top: 0;
}
.plexi-meta-key {
color: var(--plexi-muted);
font-size: 0.78rem;
text-transform: uppercase;
letter-spacing: 0.08em;
flex: 0 0 38%;
}
.plexi-meta-value {
text-align: right;
color: var(--plexi-ink);
font-size: 0.96rem;
line-height: 1.5;
overflow-wrap: anywhere;
}
.plexi-filelist {
margin: 0;
padding-left: 1.1rem;
color: var(--plexi-muted);
line-height: 1.7;
}
.plexi-filelist li.current {
color: var(--plexi-ink);
font-weight: 600;
}
.plexi-divider {
height: 1px;
background: var(--plexi-divider);
margin: 1rem 0 1.1rem;
}
$system_css
@media (max-width: 900px) {
.block-container {
padding-top: 1.2rem;
}
.plexi-hero {
padding: 1.35rem 1.2rem;
border-radius: 20px;
}
.plexi-stat {
min-height: 0;
}
.plexi-meta-row {
display: block;
}
.plexi-meta-value {
text-align: left;
margin-top: 0.2rem;
}
.plexi-cta-grid {
grid-template-columns: 1fr;
}
}
</style>
"""
).substitute(
{
"palette_vars": _css_vars_block(palette),
"color_scheme": color_scheme,
"system_css": system_css,
}
)
st.markdown(css, unsafe_allow_html=True)
def summarize_manifest(manifest):
"""Return top-level counts for the materials catalog."""
subject_total = sum(len(subjects) for subjects in manifest.values())
file_total = sum(
len(files)
for subjects in manifest.values()
for types in subjects.values()
for files in types.values()
)
material_types = sorted(
{
material_type
for subjects in manifest.values()
for types in subjects.values()
for material_type in types.keys()
}
)
return {
"semester_count": len(manifest),
"subject_count": subject_total,
"file_count": file_total,
"material_types": material_types,
}
def summarize_subject_catalog(subject_data):
"""Return counts for one selected subject catalog."""
return {
"type_count": len(subject_data),
"file_count": sum(len(files) for files in subject_data.values()),
"types": sorted(subject_data.keys()),
}
def render_page_header(kicker, title, subtitle, badges=None):
"""Render a shared hero block for each page."""
badge_html = ""
if badges:
badge_html = "".join(
f'<span class="plexi-chip">{escape(str(badge))}</span>'
for badge in badges
if badge
)
badge_html = f'<div class="plexi-chip-row">{badge_html}</div>'
st.markdown(
f"""
<section class="plexi-hero">
<div class="plexi-kicker">{escape(kicker)}</div>
<h1 class="plexi-title">{escape(title)}</h1>
<p class="plexi-subtitle">{escape(subtitle)}</p>
{badge_html}
</section>
""",
unsafe_allow_html=True,
)
def render_stat_cards(cards):
"""Render compact metrics in a responsive grid."""
if not cards:
return
cols = st.columns(len(cards))
for col, card in zip(cols, cards):
label = escape(str(card.get("label", "")))
value = escape(str(card.get("value", "")))
note = escape(str(card.get("note", "")))
with col:
st.markdown(
f"""
<div class="plexi-stat">
<div class="plexi-stat-label">{label}</div>
<div class="plexi-stat-value">{value}</div>
<div class="plexi-stat-note">{note}</div>
</div>
""",
unsafe_allow_html=True,
)
def render_panel(title, body, tone="default"):
"""Render a simple informational panel."""
panel_class = "plexi-callout" if tone == "callout" else "plexi-panel"
st.markdown(
f"""
<section class="{panel_class}">
<div class="plexi-sidecard-title">{escape(title)}</div>
<div class="plexi-muted">{escape(body)}</div>
</section>
""",
unsafe_allow_html=True,
)
def _manifest_url():
"""Raw GitHub URL for manifest.json."""
return f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/manifest.json"
@st.cache_data(ttl=300, show_spinner=False)
def get_manifest():
"""Fetch the materials manifest from GitHub. Cached for 5 minutes."""
url = _manifest_url()
resp = requests.get(url, timeout=15)
resp.raise_for_status()
return resp.json()
def download_github_file(download_url, max_retries=3):
"""Download a file from a GitHub Release asset URL with retry logic."""
for attempt in range(max_retries):
try:
resp = requests.get(download_url, timeout=60)
resp.raise_for_status()
return resp.content
except requests.RequestException as err:
print(f"Download error (attempt {attempt + 1}): {err}")
if attempt == max_retries - 1:
raise
return None
def get_mime_type(filename):
"""Guess MIME type from filename extension."""
mime, _ = mimetypes.guess_type(filename)
return mime or "application/octet-stream"
# ── Lightweight Office → PDF conversion (pure Python) ──────────────────
#
# Uses python-pptx / python-docx to extract content and fpdf2 to render
# PDF pages. No system packages (LibreOffice, etc.) required.
# The output is a readable *preview*, not a pixel-perfect replica.
def _pptx_to_pdf(file_bytes):
"""Convert PPTX bytes to PDF bytes using python-pptx + fpdf2."""
from pptx import Presentation as PptxPresentation
from pptx.util import Emu
from fpdf import FPDF
prs = PptxPresentation(io.BytesIO(file_bytes))
# Slide dimensions in mm (default is 10" × 7.5")
slide_w_mm = prs.slide_width / Emu(914400) * 25.4 # EMU → inches → mm
slide_h_mm = prs.slide_height / Emu(914400) * 25.4
pdf = FPDF(orientation="L", unit="mm", format=(slide_h_mm, slide_w_mm))
pdf.set_auto_page_break(auto=True, margin=12)
# Use built-in Helvetica (no font file needed)
TITLE_SIZE = 18
BODY_SIZE = 11
MARGIN = 14
for slide_idx, slide in enumerate(prs.slides, start=1):
pdf.add_page()
pdf.set_left_margin(MARGIN)
pdf.set_right_margin(MARGIN)
pdf.set_y(MARGIN)
# ── Slide number chip ──
pdf.set_font("Helvetica", "I", 8)
pdf.set_text_color(120, 120, 120)
pdf.cell(0, 5, f"Slide {slide_idx}", ln=True)
pdf.ln(2)
# ── Extract text from shapes ──
title_text = ""
body_parts = []
for shape in slide.shapes:
if shape.has_text_frame:
for para in shape.text_frame.paragraphs:
text = para.text.strip()
if not text:
continue
# Heuristic: first non-empty text in a title placeholder
if not title_text and hasattr(shape, "placeholder_format"):
ph = shape.placeholder_format
if ph is not None and ph.idx in (0, 1):
title_text = text
continue
body_parts.append(text)
# ── Embedded images ──
if shape.shape_type == 13: # MSO_SHAPE_TYPE.PICTURE
try:
img_bytes = shape.image.blob
img_stream = io.BytesIO(img_bytes)
# Scale image to fit page width (with margins)
max_w = slide_w_mm - 2 * MARGIN
pdf.image(img_stream, x=MARGIN, w=min(max_w, 120))
pdf.ln(4)
except Exception:
pass # skip unreadable images
# ── Render title ──
if title_text:
pdf.set_font("Helvetica", "B", TITLE_SIZE)
pdf.set_text_color(22, 49, 44) # plexi-ink dark
pdf.multi_cell(0, TITLE_SIZE * 0.5, title_text)
pdf.ln(4)
# ── Render body text ──
if body_parts:
pdf.set_font("Helvetica", "", BODY_SIZE)
pdf.set_text_color(50, 50, 50)
for part in body_parts:
pdf.multi_cell(0, BODY_SIZE * 0.45, part)
pdf.ln(2)
if len(prs.slides) == 0:
pdf.add_page()
pdf.set_font("Helvetica", "I", 12)
pdf.cell(0, 10, "This presentation has no slides.", ln=True)
return bytes(pdf.output())
def _docx_to_pdf(file_bytes):
"""Convert DOCX bytes to PDF bytes using python-docx + fpdf2."""
from docx import Document as DocxDocument
from fpdf import FPDF
doc = DocxDocument(io.BytesIO(file_bytes))
pdf = FPDF(orientation="P", unit="mm", format="A4")
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
MARGIN = 16
pdf.set_left_margin(MARGIN)
pdf.set_right_margin(MARGIN)
HEADING_SIZES = {"Heading 1": 20, "Heading 2": 16, "Heading 3": 14}
BODY_SIZE = 11
for para in doc.paragraphs:
text = para.text.strip()
if not text:
pdf.ln(3)
continue
style_name = para.style.name if para.style else ""
if style_name in HEADING_SIZES:
size = HEADING_SIZES[style_name]
pdf.set_font("Helvetica", "B", size)
pdf.set_text_color(22, 49, 44)
pdf.ln(4)
pdf.multi_cell(0, size * 0.5, text)
pdf.ln(3)
else:
is_bold = any(
run.bold for run in para.runs if run.bold is not None
)
pdf.set_font("Helvetica", "B" if is_bold else "", BODY_SIZE)
pdf.set_text_color(50, 50, 50)
pdf.multi_cell(0, BODY_SIZE * 0.45, text)
pdf.ln(1.5)
# ── Inline images ──
for rel in doc.part.rels.values():
if "image" in rel.reltype:
try:
img_stream = io.BytesIO(rel.target_part.blob)
pdf.image(img_stream, x=MARGIN, w=100)
pdf.ln(4)
except Exception:
pass
return bytes(pdf.output())
def convert_office_to_pdf(file_bytes, filename):
"""Convert an Office document (PPTX/DOCX/PPT/DOC) to PDF bytes.
Uses pure-Python libraries (python-pptx, python-docx, fpdf2) so no
system packages like LibreOffice are needed. The output is a readable
preview rather than a pixel-perfect replica.
Returns
-------
bytes | None
PDF bytes on success, or ``None`` on failure.
"""
ext = Path(filename).suffix.lower()
try:
if ext in (".pptx", ".ppt"):
return _pptx_to_pdf(file_bytes)
elif ext in (".docx", ".doc"):
return _docx_to_pdf(file_bytes)
except Exception as err:
print(f"Office-to-PDF conversion error ({filename}): {err}")
return None
def render_sidebar_intro():
"""Render the shared sidebar intro card."""
with st.sidebar:
st.markdown(
"""
<section class="plexi-sidecard">
<div class="plexi-kicker">Plexi</div>
<div class="plexi-sidecard-title">Grounded study assistant</div>
<div class="plexi-muted">
Browse materials, preview files, and ask questions backed by the
currently loaded course content.
</div>
</section>
""",
unsafe_allow_html=True,
)
def render_sidebar_footer():
"""Render shared appearance controls and outbound links at the end of the sidebar."""
with st.sidebar:
current_mode = get_theme_mode()
widget_value = current_mode.capitalize()
if st.session_state.get(THEME_MODE_WIDGET_KEY) != widget_value:
st.session_state[THEME_MODE_WIDGET_KEY] = widget_value
st.markdown(
'<div class="plexi-section-label">Appearance</div>',
unsafe_allow_html=True,
)
st.selectbox(
"Theme",
["System", "Light", "Dark"],
key=THEME_MODE_WIDGET_KEY,
on_change=sync_theme_mode,
help="System follows your device preference unless you override it here.",
)
st.caption("Built by **Kunal Gupta** (LazyHuman)")
cols = st.columns(3)
with cols[0]:
st.link_button("Web", "https://lazyhideout.tech", use_container_width=True)
with cols[1]:
st.link_button(
"GitHub", "https://github.com/kunalgupta25", use_container_width=True
)
with cols[2]:
st.link_button(
"Ko-fi", "https://ko-fi.com/lazy_human", use_container_width=True
)
st.markdown('<div class="plexi-divider"></div>', unsafe_allow_html=True)
def render_sidebar():
"""Render the shared sidebar for pages without extra sidebar sections."""
render_sidebar_intro()
render_sidebar_footer()
def read_pdf_text(pdf_bytes):
"""Extract text from PDF bytes with error handling."""
text = []
try:
reader = PyPDF2.PdfReader(io.BytesIO(pdf_bytes))
for page in reader.pages:
try:
page_text = page.extract_text()
if page_text:
filtered = page_text.encode("utf-16", "surrogatepass").decode(
"utf-16", "ignore"
)
text.append(filtered)
except Exception:
pass
return "\n".join(text)
except Exception:
return pdf_bytes.decode("utf-8", errors="ignore") if pdf_bytes else ""
def load_subject_context(manifest, semester, subject):
"""Download and extract text from all files for a given semester + subject.
Returns (context_string, source_list) where:
- context_string: numbered source blocks for the system prompt
- source_list: list of dicts with 'id', 'name', 'type' for citation display
"""
subject_data = manifest.get(semester, {}).get(subject, {})
parts = []
sources = []
source_id = 0
for file_type, file_list in subject_data.items():
for file_entry in file_list:
name = file_entry["name"]
mime = get_mime_type(name)
if not (mime.startswith("text/") or mime == "application/pdf"):
continue
try:
content = download_github_file(file_entry["download_url"])
if not content:
continue
if mime == "application/pdf":
text = read_pdf_text(content)
else:
text = content.decode("utf-8", errors="ignore")
if text.strip():
source_id += 1
sources.append({"id": source_id, "name": name, "type": file_type})
parts.append(
f"[Source {source_id}: {name} ({file_type})]\n{text}\n[End Source {source_id}]"
)
except Exception as err:
print(f"Error loading {name}: {err}")
return "\n\n".join(parts), sources
# RAG index loading from GitHub
# The index is pre-built by GitHub Actions (build_index.py) and
# committed to the materials repo. We just download and load it.
EMBED_MODEL_ID = "sentence-transformers/all-MiniLM-L6-v2" # must match build_index.py
INDEX_FILES = [
"default__vector_store.json",
"docstore.json",
"graph_store.json",
"image__vector_store.json",
"index_store.json",
]
@st.cache_resource(show_spinner="Loading RAG index...")
def fetch_rag_index():
"""
Download the pre-built LlamaIndex from the materials repo and return
a ready-to-use VectorStoreIndex. Cached once per Streamlit session.
Returns (index, error_msg) - index is None if loading failed.
"""
if not LLAMA_INDEX_AVAILABLE:
return (
None,
"LlamaIndex not installed - install llama-index-core and dependencies.",
)
index_base_url = (
f"https://raw.githubusercontent.com/{MATERIALS_REPO}/{MANIFEST_BRANCH}/index"
)
index_dir = tempfile.mkdtemp(prefix="plexi_index_")
try:
for filename in INDEX_FILES:
url = f"{index_base_url}/{filename}"
resp = requests.get(url, timeout=30)
resp.raise_for_status()
with open(os.path.join(index_dir, filename), "wb") as file_handle:
file_handle.write(resp.content)
except Exception as err:
return None, f"Failed to download index files: {err}"
try:
embed_model = HuggingFaceEmbedding(model_name=EMBED_MODEL_ID)
Settings.embed_model = embed_model
Settings.llm = None
storage_context = StorageContext.from_defaults(persist_dir=index_dir)
index = load_index_from_storage(storage_context)
return index, None
except Exception as err:
return None, f"Failed to load index: {err}"