flood-diffusion-api / index.html
SamSak09's picture
Update index.html
c3b8c3c verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>FloodDiffusion: Live 3D Client</title>
<style>
body { margin: 0; overflow: hidden; background-color: #ffffff; color: #333; font-family: sans-serif; }
#ui-panel {
position: absolute; top: 20px; left: 20px; z-index: 100;
background: rgba(255, 255, 255, 0.95); padding: 20px;
border: 1px solid #ccc; border-radius: 8px; width: 350px;
box-shadow: 0 4px 15px rgba(0,0,0,0.1);
}
input[type="text"] { width: 100%; padding: 12px; margin-bottom: 10px; font-size: 14px; border: 2px solid #008080; border-radius: 6px; box-sizing: border-box; outline: none; transition: box-shadow 0.3s;}
input[type="text"]:focus { box-shadow: 0 0 8px rgba(0, 128, 128, 0.4); }
.metrics { font-family: monospace; font-size: 13px; margin-top: 15px; color: #555; line-height: 1.6; }
#status { font-weight: bold; margin-top: 5px; color: teal; }
#instructions { font-size: 11px; color: #666; margin-top: 15px; text-align: center; font-style: italic; border-top: 1px solid #eee; padding-top: 10px;}
#socket-status { position: absolute; top: 20px; right: 20px; padding: 6px 12px; background: rgba(255,255,255,0.9); border: 1px solid #ccc; border-radius: 20px; font-size: 12px; font-weight: bold; color: #666; z-index: 100; box-shadow: 0 2px 5px rgba(0,0,0,0.1);}
</style>
</head>
<body>
<div id="socket-status">πŸ”Œ Disconnected</div>
<div id="ui-panel">
<h3 style="margin-top: 0; color: #222;">Live Motion Prompt</h3>
<input type="text" id="promptInput" placeholder="Start typing (e.g., 'running fast')..." />
<div id="status">Waiting for input...</div>
<div class="metrics" id="metricsPanel">
<div id="latency">Inference Latency: --</div>
<div id="payloadSize">Payload Size: --</div>
</div>
<div id="instructions">πŸ–±οΈ Left Click to Rotate | πŸ“œ Scroll to Zoom</div>
</div>
<script src="https://cdnjs.cloudflare.com/ajax/libs/three.js/r128/three.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/three@0.128.0/examples/js/controls/OrbitControls.js"></script>
<script>
// --- 1. SET UP THE 3D WORLD (WITH SHADOWS) ---
const scene = new THREE.Scene();
// Add a soft fog to blend the horizon
scene.fog = new THREE.FogExp2(0xf0f0f0, 0.05);
scene.background = new THREE.Color(0xf0f0f0);
const renderer = new THREE.WebGLRenderer({ antialias: true });
renderer.setSize(window.innerWidth, window.innerHeight);
// ENABLE SHADOWS
renderer.shadowMap.enabled = true;
renderer.shadowMap.type = THREE.PCFSoftShadowMap;
document.body.appendChild(renderer.domElement);
const camera = new THREE.PerspectiveCamera(75, window.innerWidth / window.innerHeight, 0.1, 1000);
camera.position.set(0, 1, 5);
const controls = new THREE.OrbitControls(camera, renderer.domElement);
controls.enableDamping = true;
controls.dampingFactor = 0.05;
// --- 2. LIGHTING & ENVIRONMENT ---
const ambientLight = new THREE.AmbientLight(0xffffff, 0.4);
scene.add(ambientLight);
const directionalLight = new THREE.DirectionalLight(0xffffff, 0.8);
directionalLight.position.set(5, 10, 5);
// LIGHT CASTS SHADOW
directionalLight.castShadow = true;
directionalLight.shadow.mapSize.width = 2048;
directionalLight.shadow.mapSize.height = 2048;
scene.add(directionalLight);
// Solid Floor to receive shadows
const floorGeometry = new THREE.PlaneGeometry(100, 100);
const floorMaterial = new THREE.MeshStandardMaterial({ color: 0xdddddd, roughness: 0.8 });
const floor = new THREE.Mesh(floorGeometry, floorMaterial);
floor.rotation.x = -Math.PI / 2;
floor.receiveShadow = true; // FLOOR RECEIVES SHADOW
scene.add(floor);
const gridHelper = new THREE.GridHelper(100, 100, 0xcccccc, 0xcccccc);
scene.add(gridHelper);
// ============================================================
// 3. SKELETON (placed at x = +1.5, right side)
// ============================================================
const SKELETON_OFFSET_X = 1.5;
const maxPoints = 22;
const boneConnections = [
0,1, 0,2, 0,3,
1,4, 2,5, 3,6,
4,7, 5,8, 6,9,
7,10, 8,11, 9,12,
9,13, 9,14, 12,15,
13,16,14,17,
16,18,17,19,
18,20,19,21
];
const jointMaterial = new THREE.MeshStandardMaterial({ color: 0x333333, roughness: 0.2, metalness: 0.8 });
const boneMaterial = new THREE.MeshStandardMaterial({ color: 0xe0e0e0, roughness: 0.5, metalness: 0.1 });
const jointMeshes = [];
for (let i = 0; i < maxPoints; i++) {
const s = new THREE.Mesh(new THREE.SphereGeometry(0.08, 16, 16), jointMaterial);
s.castShadow = true;
scene.add(s);
jointMeshes.push(s);
}
const boneMeshes = [];
const capsuleGeo = new THREE.CylinderGeometry(0.06, 0.06, 1, 16);
capsuleGeo.rotateX(Math.PI / 2);
for (let i = 0; i < boneConnections.length; i += 2) {
const b = new THREE.Mesh(capsuleGeo, boneMaterial);
b.castShadow = true;
scene.add(b);
boneMeshes.push(b);
}
// ============================================================
// 4. BONE REMAPPING β€” GLB bone order β†’ FloodDiffusion joint index
// ============================================================
// The SMPL Blender addon exports bones depth-first (left leg, right
// leg, spine, arms), but FloodDiffusion uses breadth-first SMPL-22.
// Confirmed from console: 25 bones (root + 22 SMPL + 2 hand bones).
//
// GLB idx : bone name β†’ FD joint
// 0: root β†’ FD 0 (use pelvis pos)
// 1: Pelvis β†’ FD 0
// 2: L_Hip β†’ FD 1
// 3: L_Knee β†’ FD 4
// 4: L_Ankle β†’ FD 7
// 5: L_Foot β†’ FD 10
// 6: R_Hip β†’ FD 2
// 7: R_Knee β†’ FD 5
// 8: R_Ankle β†’ FD 8
// 9: R_Foot β†’ FD 11
// 10: Spine1 β†’ FD 3
// 11: Spine2 β†’ FD 6
// 12: Spine3 β†’ FD 9
// 13: Neck β†’ FD 12
// 14: Head β†’ FD 15
// 15: L_Collar β†’ FD 13
// 16: L_Shoulder β†’ FD 16
// 17: L_Elbow β†’ FD 18
// 18: L_Wrist β†’ FD 20
// 19: L_Hand β†’ FD 20 (no FD equivalent, reuse wrist)
// 20: R_Collar β†’ FD 14
// 21: R_Shoulder β†’ FD 17
// 22: R_Elbow β†’ FD 19
// 23: R_Wrist β†’ FD 21
// 24: R_Hand β†’ FD 21 (no FD equivalent, reuse wrist)
const BONE_TO_FD = [0, 0, 1, 4, 7, 10, 2, 5, 8, 11, 3, 6, 9, 12, 15, 13, 16, 18, 20, 20, 14, 17, 19, 21, 21];
// GLB bone parent hierarchy (depth-first, 25 bones)
const GLB_PARENTS = [-1, 0, 1, 2, 3, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 12, 15, 16, 17, 18, 12, 20, 21, 22, 23];
// ============================================================
// 5. SMPL RIGGED MESH (placed at x = -1.5, left side)
// ============================================================
const SMPL_OFFSET_X = -1.5;
let smplSkinnedMesh = null;
let smplSkeleton = null;
let restLocalDirs = [];
let smplLoaded = false;
// This matrix converts FloodDiffusion world-space joint positions
// into the SMPL scene's LOCAL space (fixes Blender Z-up vs Y-up mismatch)
let smplInvRotMatrix = new THREE.Matrix4();
// Reusable vectors
const _v1 = new THREE.Vector3();
const _v2 = new THREE.Vector3();
const gltfLoader = new THREE.GLTFLoader();
gltfLoader.load('smpl.glb', (gltf) => {
// ── Find the SkinnedMesh ──────────────────────────────────
gltf.scene.traverse((child) => {
if (child.isSkinnedMesh && !smplSkinnedMesh) {
smplSkinnedMesh = child;
smplSkeleton = child.skeleton;
child.material = new THREE.MeshStandardMaterial({
color: 0x88bbff, roughness: 0.5, metalness: 0.1, skinning: true
});
child.castShadow = true;
}
});
if (!smplSkinnedMesh) {
const el = document.getElementById('smplStatus'); if (el) el.textContent = "SMPL Mesh: ⚠️ Not rigged";
gltf.scene.position.set(SMPL_OFFSET_X, 0, 0);
scene.add(gltf.scene);
return;
}
// ── Place scene, then extract its rotation ────────────────
// Blender exports with a -90Β° X rotation to convert Z-up β†’ Y-up.
// We need the inverse of that rotation to bring FloodDiffusion's
// Y-up joint positions into the bone's local coordinate space.
gltf.scene.position.set(SMPL_OFFSET_X, 0, 0);
scene.add(gltf.scene);
gltf.scene.updateWorldMatrix(true, true);
// Extract only the rotation part (no translation) of the inverse
const fullInv = new THREE.Matrix4().copy(gltf.scene.matrixWorld).invert();
smplInvRotMatrix.extractRotation(fullInv);
// ── Log bones to verify ordering ─────────────────────────
console.log("[SMPL] Bones:", smplSkeleton.bones.map((b, i) => `${i}: ${b.name}`).join(', '));
// ── Capture rest-pose local bone directions ───────────────
// IMPORTANT: read AFTER the scene is added so matrixWorld is correct
restLocalDirs = smplSkeleton.bones.map(bone => bone.position.clone().normalize());
smplLoaded = true;
const el = document.getElementById('smplStatus');
if (el) el.textContent = `SMPL Mesh: βœ… Rigged (${smplSkeleton.bones.length} bones)`;
}, undefined, (err) => {
console.error("[SMPL] Failed to load smpl.glb:", err);
document.getElementById('smplStatus').textContent = "SMPL Mesh: ❌ Load failed";
});
// ============================================================
// 6. APPLY POSE TO SMPL SKELETON FROM JOINT POSITIONS
// Forward-kinematics top-down pass.
// frameData shape: [22][3] β€” Y-up world-space joint positions.
//
// KEY FIX: Blender exports with a -90Β° X rotation baked into
// gltf.scene to convert its Z-up world to glTF Y-up.
// All bone local positions live in Blender's Z-up space BEFORE
// that scene rotation. So we must transform FloodDiffusion's
// Y-up positions through smplInvRotMatrix before comparing
// them to rest-pose bone directions.
// ============================================================
const _worldRots = new Array(25);
// Convert a FloodDiffusion Y-up world joint into SMPL scene local space
function toLocal(x, y, z) {
return new THREE.Vector3(x, y, z).applyMatrix4(smplInvRotMatrix);
}
function applyPoseToSMPL(frameData) {
if (!smplLoaded || !smplSkeleton) return;
const bones = smplSkeleton.bones;
const n = bones.length; // 25
// Pre-convert all 22 FD joints to SMPL scene local space
const localJoints = frameData.map(j => toLocal(j[0], j[1], j[2]));
// ── ROOT BONE (no FD equivalent, park at pelvis position) ─
bones[0].position.copy(localJoints[0]);
bones[0].quaternion.identity();
_worldRots[0] = new THREE.Quaternion();
// ── CHILD BONES ──────────────────────────────────────────
for (let i = 1; i < n; i++) {
const p = GLB_PARENTS[i];
const Q_parent = _worldRots[p];
const restLocal = restLocalDirs[i];
if (restLocal.lengthSq() < 1e-6) {
bones[i].quaternion.identity();
_worldRots[i] = Q_parent.clone();
continue;
}
// Rest direction of this bone in SMPL-local world space
_v1.copy(restLocal).applyQuaternion(Q_parent);
// Target: FD joint of this bone minus FD joint of parent
const myFD = BONE_TO_FD[i];
const parFD = BONE_TO_FD[p];
_v2.copy(localJoints[myFD]).sub(localJoints[parFD]);
if (_v2.lengthSq() < 1e-6) {
// Zero-length (e.g. hand bone shares FD index with wrist)
bones[i].quaternion.identity();
_worldRots[i] = Q_parent.clone();
continue;
}
_v2.normalize();
const Q_needed = new THREE.Quaternion().setFromUnitVectors(_v1, _v2);
const localRot = Q_parent.clone().invert().multiply(Q_needed);
bones[i].quaternion.copy(localRot);
_worldRots[i] = Q_needed;
}
smplSkeleton.update();
}
// ============================================================
// 7. ANIMATION LOOP
// ============================================================
let motionData = [];
let currentFrame = 0;
let isPlaying = false;
let lastFrameTime = 0;
const pA = new THREE.Vector3();
const pB = new THREE.Vector3();
const cameraTarget = new THREE.Vector3(0, 1, 0);
function animate(timestamp) {
requestAnimationFrame(animate);
if (isPlaying && motionData.length > 0) {
if (timestamp - lastFrameTime > 33) { // ~30 fps
const frameData = motionData[currentFrame]; // shape [22][3]
// ── Update skeleton (offset to the right) ────────
for (let i = 0; i < maxPoints; i++) {
jointMeshes[i].position.set(
frameData[i][0] + SKELETON_OFFSET_X,
frameData[i][1],
frameData[i][2]
);
}
let boneIdx = 0;
for (let i = 0; i < boneConnections.length; i += 2) {
const iA = boneConnections[i], iB = boneConnections[i+1];
pA.set(frameData[iA][0] + SKELETON_OFFSET_X, frameData[iA][1], frameData[iA][2]);
pB.set(frameData[iB][0] + SKELETON_OFFSET_X, frameData[iB][1], frameData[iB][2]);
const dist = pA.distanceTo(pB);
const mid = pA.clone().lerp(pB, 0.5);
boneMeshes[boneIdx].position.copy(mid);
boneMeshes[boneIdx].scale.set(1, 1, dist);
boneMeshes[boneIdx].lookAt(pB);
boneIdx++;
}
// ── Drive the SMPL skinned mesh ───────────────────
// frameData joints are raw (skeleton origin = 0,0,0).
// The gltf.scene is positioned at SMPL_OFFSET_X so
// we pass raw frameData β€” Three.js handles the offset.
applyPoseToSMPL(frameData);
// ── Camera follows root joint ─────────────────────
const root = frameData[0];
cameraTarget.lerp(new THREE.Vector3(root[0], root[1], root[2]), 0.1);
controls.target.copy(cameraTarget);
currentFrame = (currentFrame + 1) % motionData.length;
lastFrameTime = timestamp;
}
}
controls.update();
renderer.render(scene, camera);
}
animate(0);
window.addEventListener('resize', () => {
camera.aspect = window.innerWidth / window.innerHeight;
camera.updateProjectionMatrix();
renderer.setSize(window.innerWidth, window.innerHeight);
});
// --- 5. WEBSOCKET PIPELINE ---
const socketStatus = document.getElementById('socket-status');
const statusText = document.getElementById('status');
// This automatically grabs the current Hugging Face URL and connects securely
const wsProtocol = window.location.protocol === 'https:' ? 'wss://' : 'ws://';
const ws = new WebSocket(wsProtocol + window.location.host + '/api/generate_stream');
ws.onopen = () => {
socketStatus.innerHTML = "🟒 Live TCP Connection";
socketStatus.style.color = "green";
};
ws.onclose = () => {
socketStatus.innerHTML = "πŸ”΄ Disconnected";
socketStatus.style.color = "red";
};
// --- INSIDE index.html ---
let currentTicket = 0; // Keep track of the newest request
ws.onmessage = (event) => {
const data = JSON.parse(event.data);
// SECURITY CHECK: Only play the animation if the ticket matches our latest request!
if (data.status === "success" && data.ticket === currentTicket && data.data) {
motionData = data.data;
currentFrame = 0;
isPlaying = true;
const kbSize = (new Blob([event.data]).size / 1024).toFixed(2);
document.getElementById('latency').textContent = `Inference Latency: ${data.latency_ms} ms`;
document.getElementById('payloadSize').textContent = `Payload Size: ${kbSize} KB`;
statusText.textContent = "Rendering...";
statusText.style.color = "teal";
} else if (data.ticket !== currentTicket) {
console.log(`Dropped outdated packet. Ticket: ${data.ticket}, Current: ${currentTicket}`);
}
};
let typingTimer;
const inputField = document.getElementById('promptInput');
// Back to live listening!
inputField.addEventListener('input', () => {
clearTimeout(typingTimer);
statusText.textContent = "Typing...";
statusText.style.color = "#ff8c00";
typingTimer = setTimeout(() => {
const promptText = inputField.value.trim();
if (promptText.length > 0 && ws.readyState === WebSocket.OPEN) {
currentTicket++; // Generate a new ticket number for this specific word
statusText.textContent = "Transmitting to Edge GPU...";
// Send the text AND the ticket number
ws.send(JSON.stringify({ prompt: promptText, ticket: currentTicket }));
} else if (promptText.length === 0) {
statusText.textContent = "Waiting for input...";
statusText.style.color = "teal";
}
}, 400); // 400ms debounce: waits just long enough for you to finish a word
});
</script>
</body>
</html>