LFM2-VL-WebGPU / src /components /CaptureScene.tsx
mlabonne's picture
upload demo files (#1)
01488bc
import {
startTransition,
useDeferredValue,
useEffect,
useEffectEvent,
useRef,
useState,
} from "react";
import { ArrowLeft, Camera, Film, Pause, Play } from "lucide-react";
import { BrandMark } from "./BrandMark";
import { useVLM } from "../context/VLMContext";
export type CaptureSource =
| {
kind: "webcam";
label: string;
stream: MediaStream;
}
| {
kind: "file";
label: string;
url: string;
};
type CaptionEntry = {
id: string;
text: string;
};
type CaptureSceneProps = {
mediaError: string | null;
onChooseVideo: () => void;
onChooseWebcam: () => Promise<void>;
onDismissMediaError: () => void;
onExit: () => void;
onPromptChange: (prompt: string) => void;
prompt: string;
promptPresets: readonly {
display: string;
prompt: string;
}[];
source: CaptureSource;
};
const CAPTION_LIMIT = 4;
function wait(milliseconds: number) {
return new Promise<void>((resolve) => {
window.setTimeout(resolve, milliseconds);
});
}
function createCaptionId() {
return (
globalThis.crypto?.randomUUID?.() ??
`caption-${Date.now()}-${Math.random()}`
);
}
function normalizePrompt(text: string) {
return text.replace(/\s+/g, " ").trim();
}
function getErrorMessage(error: unknown) {
if (error instanceof Error) {
return error.message;
}
return "Something went wrong while captioning the current frame.";
}
export function CaptureScene({
mediaError,
onChooseVideo,
onChooseWebcam,
onDismissMediaError,
onExit,
onPromptChange,
prompt,
promptPresets,
source,
}: CaptureSceneProps) {
const { generateCaption } = useVLM();
const videoRef = useRef<HTMLVideoElement>(null);
const canvasRef = useRef<HTMLCanvasElement>(null);
const loopIdRef = useRef(0);
const [activeCaption, setActiveCaption] = useState("");
const [captionHistory, setCaptionHistory] = useState<CaptionEntry[]>([]);
const [isGenerating, setIsGenerating] = useState(false);
const [isPaused, setIsPaused] = useState(false);
const [runtimeError, setRuntimeError] = useState<string | null>(null);
const [videoReady, setVideoReady] = useState(false);
const deferredPrompt = useDeferredValue(
normalizePrompt(prompt) || promptPresets[0].prompt,
);
useEffect(() => {
const video = videoRef.current;
if (!video) {
return;
}
setVideoReady(false);
setRuntimeError(null);
if (source.kind === "webcam") {
video.srcObject = source.stream;
video.removeAttribute("src");
void video.play().catch(() => undefined);
return () => {
video.pause();
video.srcObject = null;
};
}
video.srcObject = null;
video.src = source.url;
video.load();
void video.play().catch(() => undefined);
return () => {
video.pause();
video.removeAttribute("src");
video.load();
};
}, [source]);
useEffect(() => {
setCaptionHistory([]);
setActiveCaption("");
setIsGenerating(false);
setIsPaused(false);
}, [source]);
useEffect(() => {
if (!isPaused) {
return;
}
setActiveCaption("");
setIsGenerating(false);
}, [isPaused]);
const handleCanPlay = () => {
setVideoReady(true);
void videoRef.current?.play().catch(() => undefined);
};
const captureFrame = useEffectEvent(() => {
const video = videoRef.current;
const canvas = canvasRef.current;
if (
!video ||
!canvas ||
!videoReady ||
video.paused ||
video.ended ||
video.readyState < HTMLMediaElement.HAVE_CURRENT_DATA ||
video.videoWidth === 0 ||
video.videoHeight === 0
) {
return null;
}
const maxDimension = 960;
const scale = Math.min(
1,
maxDimension / Math.max(video.videoWidth, video.videoHeight),
);
const width = Math.max(1, Math.round(video.videoWidth * scale));
const height = Math.max(1, Math.round(video.videoHeight * scale));
if (canvas.width !== width) {
canvas.width = width;
}
if (canvas.height !== height) {
canvas.height = height;
}
const context = canvas.getContext("2d", { willReadFrequently: true });
if (!context) {
return null;
}
context.drawImage(video, 0, 0, width, height);
return context.getImageData(0, 0, width, height);
});
const runCaptionPass = useEffectEvent(async (loopId: number) => {
if (isPaused) {
await wait(120);
return;
}
const frame = captureFrame();
if (!frame) {
await wait(120);
return;
}
setRuntimeError(null);
setIsGenerating(true);
setActiveCaption("");
try {
const finalCaption = await generateCaption({
frame,
onStream: (text) => {
if (loopIdRef.current !== loopId) {
return;
}
setActiveCaption(text);
},
prompt: deferredPrompt,
});
if (loopIdRef.current !== loopId) {
return;
}
const normalizedCaption = normalizePrompt(finalCaption);
if (normalizedCaption.length === 0) {
return;
}
startTransition(() => {
setCaptionHistory((current) => {
if (current[0]?.text === normalizedCaption) {
return current;
}
return [
{ id: createCaptionId(), text: normalizedCaption },
...current,
].slice(0, CAPTION_LIMIT);
});
});
} catch (error) {
if (loopIdRef.current !== loopId) {
return;
}
setRuntimeError(getErrorMessage(error));
await wait(240);
} finally {
if (loopIdRef.current === loopId) {
setActiveCaption("");
setIsGenerating(false);
}
}
});
useEffect(() => {
loopIdRef.current += 1;
const currentLoopId = loopIdRef.current;
let mounted = true;
const loop = async () => {
while (mounted && loopIdRef.current === currentLoopId) {
await runCaptionPass(currentLoopId);
await wait(72);
}
};
void loop();
return () => {
mounted = false;
loopIdRef.current += 1;
};
}, [source]);
const displayedHistory = [...captionHistory].reverse();
return (
<main className="capture-scene">
<video
ref={videoRef}
autoPlay
className="capture-video"
loop={source.kind === "file"}
muted
onCanPlay={handleCanPlay}
playsInline
/>
<canvas ref={canvasRef} className="capture-canvas" />
<div className="capture-scrim" />
<header className="capture-toolbar">
<div className="capture-toolbar__left">
<BrandMark />
<div className="status-pill">
<span className={`status-dot ${videoReady ? "is-live" : ""}`} />
{source.kind === "webcam" ? "Webcam" : source.label}
</div>
</div>
</header>
{mediaError ? (
<div className="floating-alert" role="alert">
<span>{mediaError}</span>
<button
className="ghost-button ghost-button--small"
onClick={onDismissMediaError}
type="button"
>
Dismiss
</button>
</div>
) : null}
{runtimeError ? (
<div className="floating-alert floating-alert--secondary" role="alert">
<span>{runtimeError}</span>
</div>
) : null}
<section className="prompt-dock">
<span className="dock-label">Prompt</span>
<div className="prompt-chip-row">
{promptPresets.map((preset) => (
<button
key={preset.display}
className={`prompt-chip ${prompt === preset.prompt ? "is-active" : ""}`}
onClick={() => onPromptChange(preset.prompt)}
type="button"
>
{preset.display}
</button>
))}
</div>
<textarea
className="prompt-input"
onChange={(event) => onPromptChange(event.target.value)}
placeholder="Ask the model anything about the current frame."
rows={3}
spellCheck={false}
value={prompt}
/>
</section>
<section className="capture-side-rail">
<div className="capture-actions">
<button
className="ghost-button"
onClick={() => setIsPaused((current) => !current)}
type="button"
>
{isPaused ? (
<Play className="button-icon" size={16} strokeWidth={1.8} />
) : (
<Pause className="button-icon" size={16} strokeWidth={1.8} />
)}
{isPaused ? "Resume" : "Pause"}
</button>
<button
className="ghost-button"
onClick={() => void onChooseWebcam()}
type="button"
>
<Camera className="button-icon" size={16} strokeWidth={1.8} />
Webcam
</button>
<button
className="ghost-button"
onClick={onChooseVideo}
type="button"
>
<Film className="button-icon" size={16} strokeWidth={1.8} />
Video file
</button>
<button className="ghost-button" onClick={onExit} type="button">
<ArrowLeft className="button-icon" size={16} strokeWidth={1.8} />
Back
</button>
</div>
<section className="caption-dock">
{displayedHistory.map((caption, index) => {
const depth = displayedHistory.length - index;
const opacity = Math.max(0.18, 1 - depth * 0.18);
const scale = 1 - depth * 0.04;
return (
<article
key={caption.id}
className="caption-bubble caption-bubble--history"
style={{
opacity,
transform: `translateY(${-depth * 8}px) scale(${scale})`,
}}
>
{caption.text}
</article>
);
})}
{activeCaption || isGenerating ? (
<article className="caption-bubble caption-bubble--active">
<div className="caption-meta">Live caption</div>
{activeCaption || (
<span className="caption-placeholder">
Scanning current frame...
</span>
)}
</article>
) : null}
</section>
</section>
</main>
);
}