Spaces:
Running
Running
File size: 10,632 Bytes
2636772 4b79268 2636772 4b79268 2636772 4b79268 2636772 4b79268 2636772 4b79268 2636772 4b79268 2636772 fe75e0e 355b428 2636772 fe75e0e 355b428 2636772 4b79268 2636772 9d2a95f 2636772 c4e5c07 2636772 c4e5c07 2636772 4b79268 2636772 fe75e0e 355b428 2636772 fe75e0e 355b428 2636772 4b79268 2636772 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 | """`vgm` command-line interface.
Three subcommands:
- `build` — full pipeline: video (+ optional transcript) → HTML / zip / review HTML.
- `export-metadata` — run the pipeline and dump the unified metadata JSON.
- `render-from-metadata` — re-render HTML from a previously exported metadata JSON.
"""
from __future__ import annotations
import contextlib
import logging
from pathlib import Path
from typing import Optional
import typer
from rich.progress import (
BarColumn,
Progress,
TaskProgressColumn,
TextColumn,
TimeElapsedColumn,
)
from .pipeline import bundle_zip, dump_metadata, load_metadata, render_guide, render_review
from .pipeline.html_gen import metadata_to_segment, render_from_metadata
from .pipeline.orchestrator import (
PipelineError,
PipelineInputs,
ProgressEvent,
run_pipeline,
)
@contextlib.contextmanager
def _progress_bar():
"""Yield a (callable progress_cb, finalize) pair backed by a rich Progress.
The progress_cb signature matches orchestrator.ProgressCallback.
"""
progress = Progress(
TextColumn("[bold]{task.fields[stage]:<13}", justify="left"),
BarColumn(bar_width=None),
TaskProgressColumn(),
TextColumn("{task.fields[msg]}"),
TimeElapsedColumn(),
transient=False,
)
progress.start()
task_id = progress.add_task("vgm", total=100, stage="starting", msg="")
def cb(ev: ProgressEvent) -> None:
progress.update(task_id, completed=ev.percent, stage=ev.stage, msg=ev.message)
try:
yield cb
finally:
progress.stop()
app = typer.Typer(
add_completion=False,
help="VideoGuideMaker — generate WCAG-ready study guides from video + transcript.",
no_args_is_help=True,
)
log = logging.getLogger("videoguidemaker.cli")
def _setup_logging(verbose: bool) -> None:
# Default to WARNING so the rich progress bar isn't disrupted by INFO logs.
# `--verbose` opts into DEBUG output.
logging.basicConfig(
level=logging.DEBUG if verbose else logging.WARNING,
format="%(asctime)s %(levelname)s %(name)s: %(message)s",
)
def _resolve_format(fmt: str, output: Path) -> str:
fmt = fmt.lower()
if fmt not in ("review", "single", "zip", "guide"):
raise typer.BadParameter("format must be one of: review, single, zip, guide")
return fmt
def _safe_filename(title: str) -> str:
safe = "".join(c if c.isalnum() or c in "-_ " else "-" for c in title).strip()
safe = safe.replace(" ", "-")
return safe or "study-guide"
@app.command()
def build(
video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True),
transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False),
title: str = typer.Option("Untitled Study Guide", "--title"),
subtitle: Optional[str] = typer.Option(None, "--subtitle"),
module: Optional[str] = typer.Option(None, "--module"),
output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"),
frames_dir: Path = typer.Option(Path("static"), "--frames-dir"),
auto_transcribe: bool = typer.Option(False, "--auto-transcribe"),
whisper_model: str = typer.Option("small", "--whisper-model"),
threshold: float = typer.Option(27.0, "--threshold"),
min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."),
max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."),
skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."),
skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."),
face_threshold: float = typer.Option(0.12, "--face-threshold"),
lang: str = typer.Option("en", "--lang"),
fmt: str = typer.Option("single", "--format", help="review | single | zip | guide"),
export_metadata: Optional[Path] = typer.Option(None, "--export-metadata"),
verbose: bool = typer.Option(False, "--verbose", "-v"),
) -> None:
"""Run the full pipeline: video + transcript → HTML."""
_setup_logging(verbose)
fmt = _resolve_format(fmt, output)
inputs = PipelineInputs(
video_path=video,
transcript_path=transcript if transcript and transcript.exists() else None,
frames_dir=frames_dir,
title=title,
subtitle=subtitle,
module=module,
lang=lang,
threshold=threshold,
min_gap_seconds=min_gap,
max_frames=max_frames,
skip_ocr=skip_ocr,
skip_inverted_ocr=skip_inverted_ocr,
face_threshold=face_threshold,
auto_transcribe=auto_transcribe,
whisper_model=whisper_model,
inline_images=(fmt in ("single", "review")),
)
try:
with _progress_bar() as cb:
result = run_pipeline(inputs, progress=cb)
except PipelineError as exc:
typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True)
raise typer.Exit(2)
# Dump metadata BEFORE rendering: a render failure (template bug,
# disk full mid-write) would otherwise discard the LLM/OCR work
# the user just paid for.
if export_metadata:
dump_metadata(export_metadata, result.page)
typer.echo(f"wrote {export_metadata}")
common = dict(
title=title,
segments=result.segments,
lang=lang,
subtitle=subtitle,
module=module,
meta_lines=result.page.meta_lines or None,
eyebrow=result.page.eyebrow,
)
if fmt == "review":
html = render_review(**common)
else:
inline = fmt == "single"
if inline:
# Inline audio data URIs alongside images so the single HTML
# stays self-contained (no broken audio/foo.mp3 references).
import base64
for seg, ap in zip(result.segments, result.audio_paths):
if ap and ap.exists():
seg.audio_data_uri = (
"data:audio/mpeg;base64,"
+ base64.b64encode(ap.read_bytes()).decode("ascii")
)
html = render_guide(inline_images=inline, **common)
if fmt == "zip":
audio_disk_paths = [p for p in result.audio_paths if p is not None]
zip_bytes = bundle_zip(
html,
[f.image_path for f in result.kept_frames],
audio_paths=audio_disk_paths,
)
if output.suffix.lower() != ".zip":
output = output.with_suffix(".zip")
output.write_bytes(zip_bytes)
else:
output.write_text(html, encoding="utf-8")
typer.echo(f"wrote {output}")
@app.command("export-metadata")
def export_metadata_cmd(
video: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True),
transcript: Optional[Path] = typer.Argument(None, exists=False, dir_okay=False),
title: str = typer.Option("Untitled Study Guide", "--title"),
subtitle: Optional[str] = typer.Option(None, "--subtitle"),
module: Optional[str] = typer.Option(None, "--module"),
output: Path = typer.Option(Path("study_guide_metadata.json"), "--output", "-o"),
frames_dir: Path = typer.Option(Path("static"), "--frames-dir"),
auto_transcribe: bool = typer.Option(False, "--auto-transcribe"),
whisper_model: str = typer.Option("small", "--whisper-model"),
threshold: float = typer.Option(27.0, "--threshold"),
min_gap: float = typer.Option(0.0, "--min-gap", help="Drop frames closer than N seconds to the previous one."),
max_frames: Optional[int] = typer.Option(None, "--max-frames", help="Cap total frames; uniformly downsamples preserving first + last."),
skip_ocr: bool = typer.Option(False, "--skip-ocr", help="Skip OCR pass; on-screen text fields will be empty."),
skip_inverted_ocr: bool = typer.Option(False, "--skip-inverted-ocr", help="Skip the inverted-binarization OCR pass. Halves OCR time and avoids inverted-pass garbling on slides without coloured callouts."),
face_threshold: float = typer.Option(0.12, "--face-threshold"),
lang: str = typer.Option("en", "--lang"),
verbose: bool = typer.Option(False, "--verbose", "-v"),
) -> None:
"""Run the pipeline and dump the metadata JSON only (no HTML)."""
_setup_logging(verbose)
inputs = PipelineInputs(
video_path=video,
transcript_path=transcript if transcript and transcript.exists() else None,
frames_dir=frames_dir,
title=title,
subtitle=subtitle,
module=module,
lang=lang,
threshold=threshold,
min_gap_seconds=min_gap,
max_frames=max_frames,
skip_ocr=skip_ocr,
skip_inverted_ocr=skip_inverted_ocr,
face_threshold=face_threshold,
auto_transcribe=auto_transcribe,
whisper_model=whisper_model,
inline_images=False,
)
try:
with _progress_bar() as cb:
result = run_pipeline(inputs, progress=cb)
except PipelineError as exc:
typer.secho(f"error: {exc}", fg=typer.colors.RED, err=True)
raise typer.Exit(2)
dump_metadata(output, result.page)
typer.echo(f"wrote {output} ({len(result.page.segments)} segments, frames in {frames_dir})")
@app.command("render-from-metadata")
def render_from_metadata_cmd(
metadata_json: Path = typer.Argument(..., exists=True, dir_okay=False, readable=True),
output: Path = typer.Option(Path("study_guide.html"), "--output", "-o"),
frames_dir: Optional[Path] = typer.Option(
None, "--frames-dir",
help="Override the frames_dir recorded in the metadata JSON.",
),
fmt: str = typer.Option("single", "--format", help="review | single | guide"),
verbose: bool = typer.Option(False, "--verbose", "-v"),
) -> None:
"""Re-render HTML from a previously exported metadata JSON."""
_setup_logging(verbose)
fmt = _resolve_format(fmt, output)
if fmt == "zip":
raise typer.BadParameter("zip format requires source frames; use 'build' instead.")
page = load_metadata(metadata_json)
resolved_frames_dir = (
frames_dir
if frames_dir is not None
else (metadata_json.parent / page.frames_dir).resolve()
)
html = render_from_metadata(page, Path(resolved_frames_dir), mode=fmt)
output.write_text(html, encoding="utf-8")
typer.echo(f"wrote {output}")
if __name__ == "__main__": # pragma: no cover
app()
|