import os import re import tempfile import time import traceback import gradio as gr import httpx import yt_dlp try: import spaces except ImportError: class spaces: @staticmethod def GPU(duration=60): def decorator(fn): return fn return decorator PROXY_BASE = os.environ.get("PROXY_BASE", "").rstrip("/") PROXY_TOKEN = os.environ.get("PROXY_TOKEN", "") from transcribe import transcribe_audio, unload_model as unload_whisper from lecture_processor import summarize_lecture, generate_quiz # LANGUAGES = { # "Auto-detect": None, # "English": "en", # "Korean": "ko", # "Japanese": "ja", # "Chinese": "zh", # "Spanish": "es", # "French": "fr", # "German": "de", # "Italian": "it", # "Portuguese": "pt", # "Russian": "ru", # "Arabic": "ar", # "Hindi": "hi", # } def get_youtube_video_id(url: str) -> str | None: """Extract video ID from various YouTube URL formats.""" patterns = [ r"(?:youtube\.com/watch\?v=|youtu\.be/|youtube\.com/embed/)([a-zA-Z0-9_-]{11})", ] for pattern in patterns: match = re.search(pattern, url) if match: return match.group(1) return None def make_embed_html(video_id: str) -> str: return f'' def download_youtube_audio(url: str) -> str: """Download audio from YouTube URL, returns path to wav file.""" tmp_dir = tempfile.mkdtemp() output_path = f"{tmp_dir}/audio.wav" ydl_opts = { "format": "bestaudio/best", "postprocessors": [{ "key": "FFmpegExtractAudio", "preferredcodec": "wav", }], "outtmpl": f"{tmp_dir}/audio", "quiet": True, "no_warnings": True, } with yt_dlp.YoutubeDL(ydl_opts) as ydl: ydl.download([url]) return output_path LANGUAGES = { "English": "en", } def make_status_html(step: int = 0, timing: str = "", error: str = "") -> str: """Step progress indicator. Steps: 0=idle, 1=download, 2=transcribe, 3=summarize, 4=quiz, 5=done.""" if error: return f'
{error}
' if step == 0: return "" labels = ["Download", "Transcribe", "Summarize", "Quiz"] items = [] for i, label in enumerate(labels): s = i + 1 if s < step or step == 5: cls, icon = "done", "✓" elif s == step: cls, icon = "active", "↻" else: cls, icon = "pending", str(s) items.append( f'
{icon}{label}
' ) connector = '
' steps_html = connector.join(items) timing_html = f'
{timing}
' if timing else "" return f'
{steps_html}
{timing_html}
' @spaces.GPU(duration=120) def _run_pipeline(audio_path: str, language: str): """Pipeline that yields (transcript, summary, quiz, step, timing) progressively.""" lang_code = LANGUAGES.get(language) timings = {} gr.Info("Transcribing audio with WhisperX...") try: t0 = time.time() raw_text = transcribe_audio(audio_path, language=lang_code) timings["Transcription"] = time.time() - t0 except Exception as e: yield f"[Transcription error] {e}", "", "", 0, "" return if not raw_text: yield "(no speech detected)", "", "", 0, "" return timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items()) yield raw_text, "", "", 3, timing_str unload_whisper() gr.Info("Generating summary with Gemma...") try: t0 = time.time() summary = summarize_lecture(raw_text) timings["Summarization"] = time.time() - t0 except Exception as e: print(f"[ERROR] Summarization failed: {e}") traceback.print_exc() summary = f"[Summarization error] {e}" timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items()) yield raw_text, summary, "", 4, timing_str gr.Info("Generating quiz with Gemma...") try: t0 = time.time() quiz = generate_quiz(raw_text) timings["Quiz Generation"] = time.time() - t0 except Exception as e: print(f"[ERROR] Quiz generation failed: {e}") traceback.print_exc() quiz = f"[Quiz generation error] {e}" timing_str = " | ".join(f"{k}: {v:.1f}s" for k, v in timings.items()) total = sum(timings.values()) timing_str += f" | Total: {total:.1f}s" yield raw_text, summary, quiz, 5, timing_str def fetch_audio_from_proxy(url: str) -> str: """Request audio extraction from proxy, save to tmp file, return path.""" headers = {"x-proxy-token": PROXY_TOKEN} if PROXY_TOKEN else {} with httpx.stream( "POST", f"{PROXY_BASE}/extract", json={"url": url, "audio_format": "best"}, headers=headers, timeout=600, ) as resp: resp.raise_for_status() tmp_dir = tempfile.mkdtemp() audio_path = f"{tmp_dir}/audio.wav" with open(audio_path, "wb") as f: for chunk in resp.iter_bytes(chunk_size=8192): f.write(chunk) return audio_path def process_youtube(url: str, language: str): """Yields (embed, transcript, summary, quiz, status_html) progressively.""" if not url or not url.strip(): yield "", "", "", "", "" return url = url.strip() video_id = get_youtube_video_id(url) if not video_id: yield "", "", "", "", make_status_html(error="Please enter a valid YouTube URL") return embed_html = make_embed_html(video_id) yield embed_html, "", "", "", make_status_html(1) try: t0 = time.time() if PROXY_BASE: audio_path = fetch_audio_from_proxy(url) else: gr.Info("Downloading audio from YouTube...") audio_path = download_youtube_audio(url) dl_time = time.time() - t0 except Exception as e: yield embed_html, "", "", "", make_status_html(error=f"Download failed: {e}") return yield embed_html, "", "", "", make_status_html(2, f"Download: {dl_time:.1f}s") for raw_text, summary, quiz, step, timing_str in _run_pipeline(audio_path, language): full_timing = f"Download: {dl_time:.1f}s | {timing_str}" if timing_str else "" yield embed_html, raw_text, summary, quiz, make_status_html(step, full_timing) EXAMPLES = { "MIT OpenCourseWare": "https://www.youtube.com/watch?v=7Pq-S557XQU", "Stanford CS229": "https://www.youtube.com/watch?v=jGwO_UgTS7I", } # --------------------------------------------------------------------------- # ICL Gradio Theme # --------------------------------------------------------------------------- _icl_blue = gr.themes.Color( c50="#F0F7FC", c100="#D4EFFC", c200="#A8DFFA", c300="#5CC4F0", c400="#00ACD7", c500="#0091D4", c600="#003E74", c700="#002147", c800="#001A38", c900="#001029", c950="#000A1A", name="icl-blue", ) _icl_tangerine = gr.themes.Color( c50="#FFF5EB", c100="#FFE6CC", c200="#FFCC99", c300="#FFB366", c400="#FF9933", c500="#EC7300", c600="#CC6300", c700="#A35000", c800="#7A3C00", c900="#522800", c950="#331900", name="icl-tangerine", ) _icl_grey = gr.themes.Color( c50="#F7F8F8", c100="#EBEEEE", c200="#D5D9D9", c300="#B8BCBC", c400="#9D9D9D", c500="#7A7A7A", c600="#5C5C5C", c700="#4A4A4A", c800="#373A36", c900="#2A2D2A", c950="#1A1C1A", name="icl-grey", ) ICL_THEME = gr.themes.Base( primary_hue=_icl_blue, secondary_hue=_icl_tangerine, neutral_hue=_icl_grey, font=[gr.themes.GoogleFont("Source Sans Pro"), "Arial", "sans-serif"], font_mono=[gr.themes.GoogleFont("Source Code Pro"), "monospace"], ).set( # Primary buttons – Navy background button_primary_background_fill="#002147", button_primary_background_fill_dark="#003E74", button_primary_background_fill_hover="#003E74", button_primary_background_fill_hover_dark="#0091D4", button_primary_border_color="#002147", button_primary_border_color_dark="#003E74", button_primary_border_color_hover="#003E74", button_primary_text_color="white", button_primary_text_color_dark="white", # Secondary buttons – white bg, blue border/text button_secondary_background_fill="white", button_secondary_background_fill_dark="#1A1C1A", button_secondary_background_fill_hover="#D4EFFC", button_secondary_background_fill_hover_dark="#001A38", button_secondary_border_color="#003E74", button_secondary_border_color_dark="#0091D4", button_secondary_border_color_hover="#002147", button_secondary_text_color="#003E74", button_secondary_text_color_dark="#D4EFFC", button_secondary_text_color_hover="#002147", # Focus & loader input_border_color_focus="#00ACD7", input_border_color_focus_dark="#00ACD7", loader_color="#003E74", loader_color_dark="#0091D4", ) # --------------------------------------------------------------------------- # CSS – custom properties + minimal overrides # --------------------------------------------------------------------------- CSS = """ :root { --icl-navy: #002147; --icl-blue: #003E74; --icl-process-blue: #0091D4; --icl-pool: #00ACD7; --icl-light-blue: #D4EFFC; --icl-tangerine: #EC7300; --icl-violet: #653098; --icl-green: #02893B; --icl-lime: #BBCE00; --icl-red: #B22234; --icl-grey: #EBEEEE; --icl-cool-grey: #9D9D9D; --icl-dark-grey: #373A36; --sp-1: 4px; --sp-2: 8px; --sp-3: 12px; --sp-4: 16px; --sp-5: 24px; --sp-6: 32px; --sp-7: 48px; --sp-8: 64px; } /* Header brand bar */ .icl-header { text-align: center; padding: var(--sp-5) var(--sp-4); border-bottom: 3px solid var(--icl-navy); margin-bottom: var(--sp-5); } .icl-header img { height: 60px; margin-bottom: var(--sp-2); } .dark .icl-header { border-bottom-color: var(--icl-pool); } /* Title & subtitle */ .main-title { text-align: center; color: var(--icl-navy); margin-bottom: 0 !important; } .subtitle { text-align: center; color: var(--icl-blue); margin-top: 0 !important; } .dark .main-title { color: var(--icl-light-blue); } .dark .subtitle { color: var(--icl-pool); } /* Tab selected override (Gradio tabs need !important) */ .tabs .tab-nav button.selected { border-color: var(--icl-navy) !important; color: var(--icl-navy) !important; } .dark .tabs .tab-nav button.selected { border-color: var(--icl-pool) !important; color: var(--icl-pool) !important; } /* Focus & active states */ button:focus-visible, input:focus-visible, textarea:focus-visible, select:focus-visible { outline: 3px solid var(--icl-pool); outline-offset: 2px; } button:active { transform: scale(0.97); } /* Example buttons – compact inside bordered card */ .examples-row { justify-content: center !important; gap: var(--sp-2); border: 1px solid var(--icl-light-blue); border-radius: 8px; padding: var(--sp-3) var(--sp-4); background: var(--icl-grey); } .examples-row > * { flex: 0 0 auto !important; max-width: fit-content !important; } .dark .examples-row { background: #1f2937; border-color: var(--icl-blue); } /* Step progress indicator */ .status-bar { padding: var(--sp-3) var(--sp-4); border-radius: 8px; background: var(--icl-grey); border: 1px solid var(--icl-light-blue); } .status-bar.error { background: #f8d7da; border-color: #f5c6cb; color: #721c24; text-align: center; font-weight: 500; } .status-bar .steps { display: flex; align-items: center; justify-content: center; gap: 0; } .status-bar .step { display: flex; align-items: center; gap: 6px; padding: 6px 14px; border-radius: 20px; font-size: 14px; font-weight: 500; background: var(--icl-light-blue); color: var(--icl-blue); white-space: nowrap; transition: all 0.3s ease; } .status-bar .step.active { background: var(--icl-blue); color: white; animation: pulse 1.5s ease-in-out infinite; } .status-bar .step.done { background: var(--icl-navy); color: white; } .status-bar .step .num { font-weight: 700; min-width: 18px; text-align: center; } .status-bar .conn { width: 24px; height: 2px; background: var(--icl-light-blue); flex-shrink: 0; } .status-bar .timing { text-align: center; margin-top: var(--sp-2); font-size: 13px; color: var(--icl-blue); } @keyframes pulse { 0%, 100% { opacity: 1; } 50% { opacity: 0.6; } } /* Dark mode – status bar */ .dark .status-bar { background: #1f2937; border-color: var(--icl-blue); } .dark .status-bar.error { background: #7f1d1d; border-color: #991b1b; color: #fca5a5; } .dark .status-bar .step { background: var(--icl-blue); color: var(--icl-light-blue); } .dark .status-bar .step.active { background: var(--icl-tangerine); color: white; } .dark .status-bar .step.done { background: var(--icl-navy); color: var(--icl-light-blue); } .dark .status-bar .conn { background: var(--icl-blue); } .dark .status-bar .timing { color: var(--icl-light-blue); } /* Footer */ .footer { text-align: center; color: var(--icl-dark-grey); font-size: 0.85em; margin-top: var(--sp-4); } .dark .footer { color: var(--icl-cool-grey); } /* Reduced motion */ @media (prefers-reduced-motion: reduce) { *, *::before, *::after { animation-duration: 0.01ms !important; animation-iteration-count: 1 !important; transition-duration: 0.01ms !important; } } /* Responsive */ @media (max-width: 768px) { .icl-header img { height: 40px; } .status-bar .step { padding: 4px 10px; font-size: 12px; } .status-bar .conn { width: 12px; } } @media (max-width: 480px) { .icl-header img { height: 32px; } .icl-header { padding: var(--sp-3) var(--sp-2); } } """ with gr.Blocks( title="Lecture Processor", css=CSS, theme=ICL_THEME, ) as demo: gr.HTML("""
ICL Crest
""") gr.Markdown("# Lecture Processor", elem_classes="main-title") gr.Markdown( "Transcribe, summarize, and generate quizzes from lectures", elem_classes="subtitle", ) with gr.Row(): youtube_input = gr.Textbox( label="🔗 YouTube URL", placeholder="https://www.youtube.com/watch?v=...", scale=3, ) language_dropdown = gr.Dropdown( choices=list(LANGUAGES.keys()), value="English", label="Language", scale=1, ) youtube_btn = gr.Button("▶ Process Lecture", variant="primary", size="lg") gr.Markdown("**Examples:**") with gr.Row(elem_classes="examples-row"): for name, url in EXAMPLES.items(): gr.Button(name, variant="secondary", size="sm", min_width=160).click( fn=lambda u=url: u, outputs=[youtube_input] ) status_output = gr.HTML() video_embed = gr.HTML() with gr.Tabs(): with gr.TabItem("Transcript"): raw_output = gr.Textbox( label="Raw Transcription", lines=12 ) with gr.TabItem("Summary"): summary_output = gr.Textbox(label="Lecture Summary", lines=12) with gr.TabItem("Quiz"): quiz_output = gr.Textbox(label="Quiz Questions", lines=12) gr.Markdown( "Powered by **WhisperX** & **Gemma 3 4B** | Fine-tuned LoRA adapter", elem_classes="footer", ) outputs = [video_embed, raw_output, summary_output, quiz_output, status_output] youtube_btn.click( fn=process_youtube, inputs=[youtube_input, language_dropdown], outputs=outputs, ) youtube_input.submit( fn=process_youtube, inputs=[youtube_input, language_dropdown], outputs=outputs, ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", share=True)