Spaces:

Solar-Prince
/

SentAI

Running

App Files Files Community

Solar-Prince commited on about 1 month ago

Commit

d8765bb

verified ·

1 Parent(s): f145864

Upload 2 files

Browse files

Files changed (2) hide show

README.md +4 -18
index.html +102 -29

README.md CHANGED Viewed

@@ -1,29 +1,15 @@
 ---
 title: SentAI
-emoji: 🤖
 colorFrom: blue
 colorTo: purple
 sdk: static
 pinned: false
-short_description: Live face expression age and gender estimates
 ---
 # SentAI
-SentAI is a browser-based live camera application for face detection, expression scoring, apparent age range, and male/female presentation estimate.
-## Phase 3 changes
-- Keeps the fast on-device face box loop.
-- Adds a background accuracy pack using Transformers.js-compatible ONNX models.
-- Uses temporal smoothing so expression scores and age range do not flicker frame-by-frame.
-- Reduces the old neutral-to-confused behavior so "Confused" is not overconfident just because the face is neutral.
-- Adds an age offset control for quick calibration during demos.
-- Supports mobile camera permission and front/rear camera switching when the browser exposes it.
-## Notes
-The app estimates visible facial expression and apparent age from camera frames. It cannot know a person's true internal feeling. Lighting, pose, camera quality, glasses, occlusion, and model bias can affect results.
-Phase 3B fixes the accuracy-pack loader by using the Transformers.js package version that the selected ONNX emotion model was published for.

 ---
 title: SentAI
+emoji: 🧠
 colorFrom: blue
 colorTo: purple
 sdk: static
 pinned: false
+short_description: Live facial expression and age estimates
 ---
 # SentAI
+Static browser-side live face analysis demo for Hugging Face Spaces.
+Phase 3C adds multi-crop transformer expression scoring and calibration to reduce Happy/Confused dominance, with better sensitivity for Sad, Fear, and Disgust.

index.html CHANGED Viewed

@@ -391,7 +391,7 @@
     <header class="hero" aria-label="SentAI heading">
       <div class="brand">
         <h1>SentAI</h1>
-        <p>Higher-accuracy live face analysis with stabilized expression scores, apparent age range, and male/female presentation estimate. The fast face box runs continuously; heavier transformer models run on cropped faces in the background.</p>
       </div>
       <div class="status-stack">
         <div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
@@ -404,6 +404,11 @@
       <button id="switchBtn" disabled>Switch front/rear</button>
       <button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
       <button id="stopBtn" class="danger" disabled>Stop</button>
       <select id="modeSelect" aria-label="Performance mode">
         <option value="fast">Fast mode</option>
         <option value="balanced">Balanced mode</option>
@@ -476,7 +481,7 @@
         </div>
         <div class="note wide">
-          The app estimates visible facial expression and apparent age from camera frames. It cannot know a person's true internal feeling. The accuracy pack improves expression and age estimates but may download larger ONNX model files the first time.
         </div>
       </aside>
     </section>
@@ -500,6 +505,7 @@
       switchBtn: document.getElementById("switchBtn"),
       accuracyBtn: document.getElementById("accuracyBtn"),
       stopBtn: document.getElementById("stopBtn"),
       modeSelect: document.getElementById("modeSelect"),
       cameraTag: document.getElementById("cameraTag"),
       video: document.getElementById("video"),
@@ -650,19 +656,50 @@
       for (const item of list) {
         const label = String(item.label || item.class || "").toLowerCase();
         const score = clamp01(item.score || item.probability || 0);
-        if (label.includes("happy") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
-        else if (label.includes("sad")) scores.Sad = Math.max(scores.Sad, score);
-        else if (label.includes("fear")) scores.Fear = Math.max(scores.Fear, score);
         else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
-        else if (label.includes("disgust")) scores.Disgust = Math.max(scores.Disgust, score);
         else if (label.includes("surprise") || label.includes("neutral")) {
-          const scaled = label.includes("neutral") ? score * 0.10 : score * 0.55;
-          scores.Confused = Math.max(scores.Confused, Math.min(0.48, scaled));
         }
       }
       return normalizeScores(scores);
     }
     function combineEmotionScores(faceScores) {
       const now = performance.now();
       const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
@@ -670,33 +707,40 @@
       for (const label of emotionLabels) {
         const base = faceScores[label] || 0;
         const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
-        combined[label] = freshPro ? (proValue * 0.76 + base * 0.24) : base;
       }
       const cfg = modes[els.modeSelect.value] || modes.accurate;
       if (!emotionEma) {
-        emotionEma = combined;
       } else {
         for (const label of emotionLabels) {
-          emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) + combined[label] * cfg.smoothing;
         }
       }
       return normalizeScores(emotionEma);
     }
     function topEmotion(scores) {
-      const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
-      const top = sorted[0] || ["Confused", 0];
-      const second = sorted[1] || ["", 0];
-      let label = top[0];
-      let score = top[1];
-      if (score < 0.18) {
         label = "Confused";
-        score = Math.max(scores.Confused || 0, 0.18);
-      } else if (score - second[1] < 0.035 && label !== "Confused") {
-        // When the visible expression is ambiguous, mark it as confused but keep confidence modest.
         label = "Confused";
-        score = Math.max(Math.min(scores.Confused || 0, 0.34), 0.22);
       }
       return { label, score: clamp01(score) };
     }
@@ -936,11 +980,10 @@
       if (lastDetections.length) drawDetections(lastDetections);
     }
-    function cropFaceCanvas(det, targetSize = 256) {
       const box = det.detection.box;
       const videoW = els.video.videoWidth || els.overlay.width;
       const videoH = els.video.videoHeight || els.overlay.height;
-      const pad = 0.32;
       const cx = box.x + box.width / 2;
       const cy = box.y + box.height / 2;
       const side = Math.max(box.width, box.height) * (1 + pad * 2);
@@ -955,7 +998,16 @@
       const c = canvas.getContext("2d", { willReadFrequently: true });
       c.fillStyle = "#000";
       c.fillRect(0, 0, targetSize, targetSize);
-      c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
       return canvas;
     }
@@ -1117,13 +1169,34 @@
         url = await canvasToBlobUrl(crop);
         if (pro.emotionPipe) {
-          let output;
           try {
-            output = await pro.emotionPipe(url, { topK: 7 });
-          } catch (_) {
-            output = await pro.emotionPipe(url);
           }
-          pro.emotionScores = normalizeExternalEmotion(output);
           pro.emotionAt = performance.now();
         }

     <header class="hero" aria-label="SentAI heading">
       <div class="brand">
         <h1>SentAI</h1>
+        <p>Higher-accuracy live face analysis with multi-crop expression scoring, apparent age range, and male/female presentation estimate. Phase 3C is tuned to reduce Happy/Confused dominance.</p>
       </div>
       <div class="status-stack">
         <div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
       <button id="switchBtn" disabled>Switch front/rear</button>
       <button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
       <button id="stopBtn" class="danger" disabled>Stop</button>
+      <select id="emotionMode" aria-label="Emotion scoring mode">
+        <option value="sensitive" selected>Boost sad/fear/disgust</option>
+        <option value="balanced">Balanced emotions</option>
+        <option value="raw">Raw model scores</option>
+      </select>
       <select id="modeSelect" aria-label="Performance mode">
         <option value="fast">Fast mode</option>
         <option value="balanced">Balanced mode</option>
         </div>
         <div class="note wide">
+          The app estimates visible facial expression and apparent age from camera frames. Phase 3C uses multi-crop transformer averaging and sad/fear/disgust calibration to reduce Happy/Confused dominance. It still cannot know a person's true internal feeling.
         </div>
       </aside>
     </section>
       switchBtn: document.getElementById("switchBtn"),
       accuracyBtn: document.getElementById("accuracyBtn"),
       stopBtn: document.getElementById("stopBtn"),
+      emotionMode: document.getElementById("emotionMode"),
       modeSelect: document.getElementById("modeSelect"),
       cameraTag: document.getElementById("cameraTag"),
       video: document.getElementById("video"),
       for (const item of list) {
         const label = String(item.label || item.class || "").toLowerCase();
         const score = clamp01(item.score || item.probability || 0);
+        if (label.includes("happy") || label.includes("happiness") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
+        else if (label.includes("sad") || label.includes("sadness")) scores.Sad = Math.max(scores.Sad, score);
+        else if (label.includes("fear") || label.includes("fearful")) scores.Fear = Math.max(scores.Fear, score);
         else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
+        else if (label.includes("disgust") || label.includes("disgusted")) scores.Disgust = Math.max(scores.Disgust, score);
         else if (label.includes("surprise") || label.includes("neutral")) {
+          // Neutral/surprise should not dominate. They only become Confused when no clear emotion wins.
+          const scaled = label.includes("neutral") ? score * 0.045 : score * 0.30;
+          scores.Confused = Math.max(scores.Confused, Math.min(0.30, scaled));
         }
       }
       return normalizeScores(scores);
     }
+    function emotionWeights() {
+      const mode = els.emotionMode?.value || "sensitive";
+      if (mode === "raw") {
+        return { Happy: 1.00, Sad: 1.00, Fear: 1.00, Anger: 1.00, Confused: 1.00, Disgust: 1.00 };
+      }
+      if (mode === "balanced") {
+        return { Happy: 0.88, Sad: 1.28, Fear: 1.38, Anger: 0.95, Confused: 0.58, Disgust: 1.48 };
+      }
+      // Default: compensate for webcam models over-predicting smile/neutral/anger and under-predicting subtle negative expressions.
+      return { Happy: 0.70, Sad: 1.62, Fear: 1.82, Anger: 0.86, Confused: 0.42, Disgust: 2.05 };
+    }
+    function applyEmotionCalibration(scores) {
+      const mode = els.emotionMode?.value || "sensitive";
+      if (mode === "raw") return normalizeScores(scores);
+      const weights = emotionWeights();
+      const out = blankScores();
+      for (const label of emotionLabels) {
+        let v = clamp01(scores[label] || 0);
+        // Make low but consistent sad/fear/disgust evidence visible instead of crushed by happy/confused.
+        if (["Sad", "Fear", "Disgust"].includes(label)) v = Math.pow(v, 0.82);
+        if (label === "Happy") v = Math.pow(v, 1.12);
+        if (label === "Confused") v = Math.pow(v, 1.18);
+        out[label] = clamp01(v * weights[label]);
+      }
+      // Confused is a fallback label, not a high-confidence emotion class.
+      out.Confused = Math.min(out.Confused, mode === "balanced" ? 0.34 : 0.24);
+      return normalizeScores(out);
+    }
     function combineEmotionScores(faceScores) {
       const now = performance.now();
       const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
       for (const label of emotionLabels) {
         const base = faceScores[label] || 0;
         const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
+        // Trust the transformer crop classifier more than face-api expressions when it is fresh.
+        combined[label] = freshPro ? (proValue * 0.88 + base * 0.12) : base;
       }
+      const calibrated = applyEmotionCalibration(combined);
       const cfg = modes[els.modeSelect.value] || modes.accurate;
       if (!emotionEma) {
+        emotionEma = calibrated;
       } else {
         for (const label of emotionLabels) {
+          emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) + calibrated[label] * cfg.smoothing;
         }
       }
       return normalizeScores(emotionEma);
     }
     function topEmotion(scores) {
+      const nonConfused = Object.entries(scores).filter(([label]) => label !== "Confused").sort((a, b) => b[1] - a[1]);
+      let [label, score] = nonConfused[0] || ["Confused", 0];
+      const rare = ["Disgust", "Fear", "Sad"].map(name => [name, scores[name] || 0]).sort((a, b) => b[1] - a[1])[0];
+      const mode = els.emotionMode?.value || "sensitive";
+      if (mode !== "raw" && rare && rare[1] >= 0.24) {
+        const rescueMargin = mode === "sensitive" ? 0.20 : 0.12;
+        if (rare[1] >= score - rescueMargin) {
+          label = rare[0];
+          score = rare[1];
+        }
+      }
+      if (score < 0.16) {
         label = "Confused";
+        score = Math.max(scores.Confused || 0, 0.16);
+      } else if ((scores.Confused || 0) > score && score < 0.26) {
         label = "Confused";
+        score = Math.min(scores.Confused || 0.22, 0.28);
       }
       return { label, score: clamp01(score) };
     }
       if (lastDetections.length) drawDetections(lastDetections);
     }
+    function cropFaceCanvas(det, targetSize = 256, pad = 0.32, filter = "none", mirror = false) {
       const box = det.detection.box;
       const videoW = els.video.videoWidth || els.overlay.width;
       const videoH = els.video.videoHeight || els.overlay.height;
       const cx = box.x + box.width / 2;
       const cy = box.y + box.height / 2;
       const side = Math.max(box.width, box.height) * (1 + pad * 2);
       const c = canvas.getContext("2d", { willReadFrequently: true });
       c.fillStyle = "#000";
       c.fillRect(0, 0, targetSize, targetSize);
+      c.filter = filter;
+      if (mirror) {
+        c.translate(targetSize, 0);
+        c.scale(-1, 1);
+        c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
+        c.setTransform(1, 0, 0, 1, 0, 0);
+      } else {
+        c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
+      }
+      c.filter = "none";
       return canvas;
     }
         url = await canvasToBlobUrl(crop);
         if (pro.emotionPipe) {
+          const cropVariants = [
+            cropFaceCanvas(primary, 288, 0.16, "contrast(1.10) saturate(0.96)", false),
+            cropFaceCanvas(primary, 288, 0.34, "contrast(1.18) saturate(0.92)", false),
+            cropFaceCanvas(primary, 288, 0.06, "contrast(1.22) brightness(1.03)", false),
+            cropFaceCanvas(primary, 288, 0.22, "contrast(1.14) saturate(0.92)", true),
+          ];
+          const urls = [];
+          const aggregate = blankScores();
+          let count = 0;
           try {
+            for (const variant of cropVariants) {
+              const variantUrl = await canvasToBlobUrl(variant);
+              urls.push(variantUrl);
+              let output;
+              try {
+                output = await pro.emotionPipe(variantUrl, { topK: 7 });
+              } catch (_) {
+                output = await pro.emotionPipe(variantUrl);
+              }
+              const scores = normalizeExternalEmotion(output);
+              for (const label of emotionLabels) aggregate[label] += scores[label] || 0;
+              count += 1;
+            }
+          } finally {
+            for (const variantUrl of urls) URL.revokeObjectURL(variantUrl);
           }
+          for (const label of emotionLabels) aggregate[label] = count ? aggregate[label] / count : 0;
+          pro.emotionScores = normalizeScores(aggregate);
           pro.emotionAt = performance.now();
         }