import { startTransition, useDeferredValue, useEffect, useEffectEvent, useRef, useState, } from "react"; import { ArrowLeft, Camera, Film, Pause, Play } from "lucide-react"; import { BrandMark } from "./BrandMark"; import { useVLM } from "../context/VLMContext"; export type CaptureSource = | { kind: "webcam"; label: string; stream: MediaStream; } | { kind: "file"; label: string; url: string; }; type CaptionEntry = { id: string; text: string; }; type CaptureSceneProps = { mediaError: string | null; onChooseVideo: () => void; onChooseWebcam: () => Promise; onDismissMediaError: () => void; onExit: () => void; onPromptChange: (prompt: string) => void; prompt: string; promptPresets: readonly { display: string; prompt: string; }[]; source: CaptureSource; }; const CAPTION_LIMIT = 4; function wait(milliseconds: number) { return new Promise((resolve) => { window.setTimeout(resolve, milliseconds); }); } function createCaptionId() { return ( globalThis.crypto?.randomUUID?.() ?? `caption-${Date.now()}-${Math.random()}` ); } function normalizePrompt(text: string) { return text.replace(/\s+/g, " ").trim(); } function getErrorMessage(error: unknown) { if (error instanceof Error) { return error.message; } return "Something went wrong while captioning the current frame."; } export function CaptureScene({ mediaError, onChooseVideo, onChooseWebcam, onDismissMediaError, onExit, onPromptChange, prompt, promptPresets, source, }: CaptureSceneProps) { const { generateCaption } = useVLM(); const videoRef = useRef(null); const canvasRef = useRef(null); const loopIdRef = useRef(0); const [activeCaption, setActiveCaption] = useState(""); const [captionHistory, setCaptionHistory] = useState([]); const [isGenerating, setIsGenerating] = useState(false); const [isPaused, setIsPaused] = useState(false); const [runtimeError, setRuntimeError] = useState(null); const [videoReady, setVideoReady] = useState(false); const deferredPrompt = useDeferredValue( normalizePrompt(prompt) || promptPresets[0].prompt, ); useEffect(() => { const video = videoRef.current; if (!video) { return; } setVideoReady(false); setRuntimeError(null); if (source.kind === "webcam") { video.srcObject = source.stream; video.removeAttribute("src"); void video.play().catch(() => undefined); return () => { video.pause(); video.srcObject = null; }; } video.srcObject = null; video.src = source.url; video.load(); void video.play().catch(() => undefined); return () => { video.pause(); video.removeAttribute("src"); video.load(); }; }, [source]); useEffect(() => { setCaptionHistory([]); setActiveCaption(""); setIsGenerating(false); setIsPaused(false); }, [source]); useEffect(() => { if (!isPaused) { return; } setActiveCaption(""); setIsGenerating(false); }, [isPaused]); const handleCanPlay = () => { setVideoReady(true); void videoRef.current?.play().catch(() => undefined); }; const captureFrame = useEffectEvent(() => { const video = videoRef.current; const canvas = canvasRef.current; if ( !video || !canvas || !videoReady || video.paused || video.ended || video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA || video.videoWidth === 0 || video.videoHeight === 0 ) { return null; } const maxDimension = 960; const scale = Math.min( 1, maxDimension / Math.max(video.videoWidth, video.videoHeight), ); const width = Math.max(1, Math.round(video.videoWidth * scale)); const height = Math.max(1, Math.round(video.videoHeight * scale)); if (canvas.width !== width) { canvas.width = width; } if (canvas.height !== height) { canvas.height = height; } const context = canvas.getContext("2d", { willReadFrequently: true }); if (!context) { return null; } context.drawImage(video, 0, 0, width, height); return context.getImageData(0, 0, width, height); }); const runCaptionPass = useEffectEvent(async (loopId: number) => { if (isPaused) { await wait(120); return; } const frame = captureFrame(); if (!frame) { await wait(120); return; } setRuntimeError(null); setIsGenerating(true); setActiveCaption(""); try { const finalCaption = await generateCaption({ frame, onStream: (text) => { if (loopIdRef.current !== loopId) { return; } setActiveCaption(text); }, prompt: deferredPrompt, }); if (loopIdRef.current !== loopId) { return; } const normalizedCaption = normalizePrompt(finalCaption); if (normalizedCaption.length === 0) { return; } startTransition(() => { setCaptionHistory((current) => { if (current[0]?.text === normalizedCaption) { return current; } return [ { id: createCaptionId(), text: normalizedCaption }, ...current, ].slice(0, CAPTION_LIMIT); }); }); } catch (error) { if (loopIdRef.current !== loopId) { return; } setRuntimeError(getErrorMessage(error)); await wait(240); } finally { if (loopIdRef.current === loopId) { setActiveCaption(""); setIsGenerating(false); } } }); useEffect(() => { loopIdRef.current += 1; const currentLoopId = loopIdRef.current; let mounted = true; const loop = async () => { while (mounted && loopIdRef.current === currentLoopId) { await runCaptionPass(currentLoopId); await wait(72); } }; void loop(); return () => { mounted = false; loopIdRef.current += 1; }; }, [source]); const displayedHistory = [...captionHistory].reverse(); return (