"""`vgm` command-line interface. Three subcommands: - `build` — full pipeline: video (+ optional transcript) → HTML / zip / review HTML. - `export-metadata` — run the pipeline and dump the unified metadata JSON. - `render-from-metadata` — re-render HTML from a previously exported metadata JSON. """ from __future__ import annotations import contextlib import logging from pathlib import Path from typing import Optional import typer from rich.progress import ( BarColumn, Progress, TaskProgressColumn, TextColumn, TimeElapsedColumn, ) from .pipeline import bundle_zip, dump_metadata, load_metadata, render_guide, render_review from .pipeline.html_gen import metadata_to_segment, render_from_metadata from .pipeline.orchestrator import ( PipelineError, PipelineInputs, ProgressEvent, run_pipeline, ) @contextlib.contextmanager def _progress_bar(): """Yield a (callable progress_cb, finalize) pair backed by a rich Progress. The progress_cb signature matches orchestrator.ProgressCallback. """ progress = Progress( TextColumn("[bold]{task.fields[stage]:<13}", justify="left"), BarColumn(bar_width=None), TaskProgressColumn(), TextColumn("{task.fields[msg]}"), TimeElapsedColumn(), transient=False, ) progress.start() task_id = progress.add_task("vgm", total=100, stage="starting", msg="") def cb(ev: ProgressEvent) -> None: progress.update(task_id, completed=ev.percent, stage=ev.stage, msg=ev.message) try: yield cb finally: progress.stop() app = typer.Typer( add_completion=False, help="VideoGuideMaker — generate WCAG-ready study guides from video + transcript.", no_args_is_help=True, ) log = logging.getLogger("videoguidemaker.cli") def _setup_logging(verbose: bool) -> None: # Default to WARNING so the rich progress bar isn't disrupted by INFO logs. # `--verbose` opts into DEBUG output. logging.basicConfig( level=logging.DEBUG if verbose else logging.WARNING, format="%(asctime)s %(levelname)s %(name)s: %(message)s", ) def _resolve_format(fmt: str, output: Path) -> str: fmt = fmt.lower() if fmt not in ("review", "single", "zip", "guide"): raise typer.BadParameter("format must be one of: review, single, zip, guide") return fmt def _safe_filename(title: str) -> str: safe = "".join(c if c.isalnum() or c in "-_ " else "-" for c in title).strip() safe = safe.replace(" ", "-") return safe or "study-guide" @app.command() def build( video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False), title: str = typer.Option("Untitled Study Guide", "--title"), subtitle: Optional[str] = typer.Option(None, "--subtitle"), module: Optional[str] = typer.Option(None, "--module"), output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"), frames_dir: Path = typer.Option(Path("static"), "--frames-dir"), auto_transcribe: bool = typer.Option(False, "--auto-transcribe"), whisper_model: str = typer.Option("small", "--whisper-model"), threshold: float = typer.Option(27.0, "--threshold"), min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."), max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."), skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."), skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."), face_threshold: float = typer.Option(0.12, "--face-threshold"), lang: str = typer.Option("en", "--lang"), fmt: str = typer.Option("single", "--format", help="review | single | zip | guide"), export_metadata: Optional[Path] = typer.Option(None, "--export-metadata"), verbose: bool = typer.Option(False, "--verbose", "-v"), ) -> None: """Run the full pipeline: video + transcript → HTML.""" _setup_logging(verbose) fmt = _resolve_format(fmt, output) inputs = PipelineInputs( video_path=video, transcript_path=transcript if transcript and transcript.exists() else None, frames_dir=frames_dir, title=title, subtitle=subtitle, module=module, lang=lang, threshold=threshold, min_gap_seconds=min_gap, max_frames=max_frames, skip_ocr=skip_ocr, skip_inverted_ocr=skip_inverted_ocr, face_threshold=face_threshold, auto_transcribe=auto_transcribe, whisper_model=whisper_model, inline_images=(fmt in ("single", "review")), ) try: with _progress_bar() as cb: result = run_pipeline(inputs, progress=cb) except PipelineError as exc: typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True) raise typer.Exit(2) # Dump metadata BEFORE rendering: a render failure (template bug, # disk full mid-write) would otherwise discard the LLM/OCR work # the user just paid for. if export_metadata: dump_metadata(export_metadata, result.page) typer.echo(f"wrote {export_metadata}") common = dict( title=title, segments=result.segments, lang=lang, subtitle=subtitle, module=module, meta_lines=result.page.meta_lines or None, eyebrow=result.page.eyebrow, ) if fmt == "review": html = render_review(**common) else: inline = fmt == "single" if inline: # Inline audio data URIs alongside images so the single HTML # stays self-contained (no broken audio/foo.mp3 references). import base64 for seg, ap in zip(result.segments, result.audio_paths): if ap and ap.exists(): seg.audio_data_uri = ( "data:audio/mpeg;base64," + base64.b64encode(ap.read_bytes()).decode("ascii") ) html = render_guide(inline_images=inline, **common) if fmt == "zip": audio_disk_paths = [p for p in result.audio_paths if p is not None] zip_bytes = bundle_zip( html, [f.image_path for f in result.kept_frames], audio_paths=audio_disk_paths, ) if output.suffix.lower() != ".zip": output = output.with_suffix(".zip") output.write_bytes(zip_bytes) else: output.write_text(html, encoding="utf-8") typer.echo(f"wrote {output}") @app.command("export-metadata") def export_metadata_cmd( video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False), title: str = typer.Option("Untitled Study Guide", "--title"), subtitle: Optional[str] = typer.Option(None, "--subtitle"), module: Optional[str] = typer.Option(None, "--module"), output: Path = typer.Option(Path("study_guide_metadata.json"), "--output", "-o"), frames_dir: Path = typer.Option(Path("static"), "--frames-dir"), auto_transcribe: bool = typer.Option(False, "--auto-transcribe"), whisper_model: str = typer.Option("small", "--whisper-model"), threshold: float = typer.Option(27.0, "--threshold"), min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."), max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."), skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."), skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."), face_threshold: float = typer.Option(0.12, "--face-threshold"), lang: str = typer.Option("en", "--lang"), verbose: bool = typer.Option(False, "--verbose", "-v"), ) -> None: """Run the pipeline and dump the metadata JSON only (no HTML).""" _setup_logging(verbose) inputs = PipelineInputs( video_path=video, transcript_path=transcript if transcript and transcript.exists() else None, frames_dir=frames_dir, title=title, subtitle=subtitle, module=module, lang=lang, threshold=threshold, min_gap_seconds=min_gap, max_frames=max_frames, skip_ocr=skip_ocr, skip_inverted_ocr=skip_inverted_ocr, face_threshold=face_threshold, auto_transcribe=auto_transcribe, whisper_model=whisper_model, inline_images=False, ) try: with _progress_bar() as cb: result = run_pipeline(inputs, progress=cb) except PipelineError as exc: typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True) raise typer.Exit(2) dump_metadata(output, result.page) typer.echo(f"wrote {output} ({len(result.page.segments)} segments, frames in {frames_dir})") @app.command("render-from-metadata") def render_from_metadata_cmd( metadata_json: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True), output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"), frames_dir: Optional[Path] = typer.Option( None, "--frames-dir", help="Override the frames_dir recorded in the metadata JSON.", ), fmt: str = typer.Option("single", "--format", help="review | single | guide"), verbose: bool = typer.Option(False, "--verbose", "-v"), ) -> None: """Re-render HTML from a previously exported metadata JSON.""" _setup_logging(verbose) fmt = _resolve_format(fmt, output) if fmt == "zip": raise typer.BadParameter("zip format requires source frames; use 'build' instead.") page = load_metadata(metadata_json) resolved_frames_dir = ( frames_dir if frames_dir is not None else (metadata_json.parent / page.frames_dir).resolve() ) html = render_from_metadata(page, Path(resolved_frames_dir), mode=fmt) output.write_text(html, encoding="utf-8") typer.echo(f"wrote {output}") if __name__ == "__main__": # pragma: no cover app()