import React, { useEffect, useState } from "react";
import { Button } from "@/components/ui/button";
import { Input } from "@/components/ui/input";
import { NumberInput } from "@/components/ui/number-input";
import { Label } from "@/components/ui/label";
import {
Dialog,
DialogContent,
DialogHeader,
DialogTitle,
DialogDescription,
} from "@/components/ui/dialog";
import { Alert, AlertDescription } from "@/components/ui/alert";
import {
Select,
SelectContent,
SelectItem,
SelectTrigger,
SelectValue,
} from "@/components/ui/select";
import { AlertTriangle, CheckCircle, Loader2, Play, VideoOff } from "lucide-react";
import { RobotRecord } from "@/hooks/useRobots";
import { useApi } from "@/contexts/ApiContext";
import { useToast } from "@/hooks/use-toast";
import { useNavigate } from "react-router-dom";
import {
JobCheckpoint,
PolicyConfigSummary,
getCheckpointPolicyConfig,
listJobCheckpoints,
} from "@/lib/checkpointsApi";
import { startInference } from "@/lib/inferenceApi";
import CheckpointDropdown from "@/components/jobs/CheckpointDropdown";
import { useAvailableCameras } from "@/hooks/useAvailableCameras";
import { useCameraStream } from "@/hooks/useCameraStream";
const CameraThumbnail: React.FC<{ deviceId: string; paused: boolean }> = ({
deviceId,
paused,
}) => {
const { videoRef, hasError } = useCameraStream(deviceId, paused);
if (paused || hasError || !deviceId) {
return (
{paused ? "Released" : "No preview"}
);
}
return (
);
};
interface Props {
open: boolean;
onOpenChange: (open: boolean) => void;
robot: RobotRecord | null;
jobId: string;
initialStep: number | null;
}
const DEFAULT_FPS = 30;
const InferenceModal: React.FC = ({
open,
onOpenChange,
robot,
jobId,
initialStep,
}) => {
const { baseUrl, fetchWithHeaders } = useApi();
const { toast } = useToast();
const navigate = useNavigate();
const [checkpoints, setCheckpoints] = useState([]);
const [selectedStep, setSelectedStep] = useState(initialStep);
const [task, setTask] = useState("");
const [durationS, setDurationS] = useState(60);
const [submitting, setSubmitting] = useState(false);
const [policyConfig, setPolicyConfig] = useState(null);
const [policyConfigLoading, setPolicyConfigLoading] = useState(false);
const [policyConfigError, setPolicyConfigError] = useState(null);
// Per expected camera name → user-selected physical camera index (or null).
const [cameraBindings, setCameraBindings] = useState>({});
const { cameras: availableCameras } = useAvailableCameras({ enabled: open });
// Load checkpoints when modal opens.
useEffect(() => {
if (!open) return;
let cancelled = false;
listJobCheckpoints(baseUrl, fetchWithHeaders, jobId)
.then((cks) => {
if (cancelled) return;
setCheckpoints(cks);
if (cks.length > 0) {
const latest = cks[cks.length - 1].step;
setSelectedStep((prev) => (prev != null ? prev : latest));
}
})
.catch(() => {
if (cancelled) return;
setCheckpoints([]);
});
return () => {
cancelled = true;
};
}, [open, baseUrl, fetchWithHeaders, jobId]);
// Load policy config when step changes.
useEffect(() => {
if (!open || selectedStep == null) {
setPolicyConfig(null);
setPolicyConfigError(null);
return;
}
let cancelled = false;
setPolicyConfigLoading(true);
setPolicyConfigError(null);
getCheckpointPolicyConfig(baseUrl, fetchWithHeaders, jobId, selectedStep)
.then((cfg) => {
if (cancelled) return;
setPolicyConfig(cfg);
// Reset camera bindings to one entry per expected camera name.
// Preserve any prior selection that's still relevant.
setCameraBindings((prev) => {
const next: Record = {};
for (const name of Object.keys(cfg.image_features)) {
next[name] = prev[name] ?? null;
}
return next;
});
})
.catch((e) => {
if (cancelled) return;
setPolicyConfig(null);
setPolicyConfigError(e instanceof Error ? e.message : String(e));
})
.finally(() => {
if (!cancelled) setPolicyConfigLoading(false);
});
return () => {
cancelled = true;
};
}, [open, baseUrl, fetchWithHeaders, jobId, selectedStep]);
// If the selected robot has cameras whose names match a policy-expected
// camera, auto-bind them. Prefer matching by browser device_id (stable
// across cv2 index drift); fall back to the saved camera_index.
useEffect(() => {
if (!policyConfig) return;
const robotCams = robot?.cameras ?? [];
if (robotCams.length === 0 || availableCameras.length === 0) return;
setCameraBindings((prev) => {
let changed = false;
const next = { ...prev };
for (const policyName of Object.keys(policyConfig.image_features)) {
if (next[policyName] != null) continue;
const robotCam = robotCams.find(
(c) => c.name.toLowerCase() === policyName.toLowerCase(),
);
if (!robotCam) continue;
const live =
(robotCam.device_id &&
availableCameras.find((c) => c.deviceId === robotCam.device_id)) ||
availableCameras.find((c) => c.index === robotCam.camera_index);
if (live) {
next[policyName] = live.index;
changed = true;
}
}
return changed ? next : prev;
});
}, [policyConfig, robot, availableCameras]);
const selectedRef =
selectedStep != null
? checkpoints.find((c) => c.step === selectedStep)?.ref ?? null
: null;
const expectedCameraNames = policyConfig
? Object.keys(policyConfig.image_features)
: [];
const allCamerasBound = expectedCameraNames.every(
(name) => cameraBindings[name] != null,
);
const canStart =
!!robot &&
robot.is_clean &&
selectedRef != null &&
!!policyConfig &&
allCamerasBound &&
!submitting;
const handleStart = async () => {
if (!robot || selectedRef == null || !policyConfig) return;
// Setting submitting=true makes every CameraPreview drop its
// browser stream — required so the rollout subprocess can open the
// same camera index via OpenCV without colliding on the device.
setSubmitting(true);
await new Promise((r) => setTimeout(r, 300));
const cameraDict: Record = {};
for (const [name, dims] of Object.entries(policyConfig.image_features)) {
const idx = cameraBindings[name];
if (idx == null) continue;
cameraDict[name] = {
type: "opencv",
camera_index: idx,
width: dims.width,
height: dims.height,
fps: DEFAULT_FPS,
};
}
try {
await startInference(baseUrl, fetchWithHeaders, {
follower_port: robot.follower_port,
follower_config: robot.follower_config,
policy_ref: selectedRef,
task,
cameras: cameraDict,
duration_s: durationS,
});
onOpenChange(false);
navigate("/inference");
} catch (e) {
toast({
title: "Couldn't start inference",
description: e instanceof Error ? e.message : String(e),
variant: "destructive",
});
// Failure: bring the previews back so the user can adjust.
setSubmitting(false);
}
};
const onCameraBindingChange = (name: string, value: string) => {
const idx = Number(value);
setCameraBindings((prev) => ({ ...prev, [name]: idx }));
};
return (
);
};
export default InferenceModal;