Solar-Prince commited on
Commit
d8765bb
·
verified ·
1 Parent(s): f145864

Upload 2 files

Browse files
Files changed (2) hide show
  1. README.md +4 -18
  2. index.html +102 -29
README.md CHANGED
@@ -1,29 +1,15 @@
1
  ---
2
  title: SentAI
3
- emoji: 🤖
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: static
7
  pinned: false
8
- short_description: Live face expression age and gender estimates
9
  ---
10
 
11
  # SentAI
12
 
13
- SentAI is a browser-based live camera application for face detection, expression scoring, apparent age range, and male/female presentation estimate.
14
 
15
- ## Phase 3 changes
16
-
17
- - Keeps the fast on-device face box loop.
18
- - Adds a background accuracy pack using Transformers.js-compatible ONNX models.
19
- - Uses temporal smoothing so expression scores and age range do not flicker frame-by-frame.
20
- - Reduces the old neutral-to-confused behavior so "Confused" is not overconfident just because the face is neutral.
21
- - Adds an age offset control for quick calibration during demos.
22
- - Supports mobile camera permission and front/rear camera switching when the browser exposes it.
23
-
24
- ## Notes
25
-
26
- The app estimates visible facial expression and apparent age from camera frames. It cannot know a person's true internal feeling. Lighting, pose, camera quality, glasses, occlusion, and model bias can affect results.
27
-
28
-
29
- Phase 3B fixes the accuracy-pack loader by using the Transformers.js package version that the selected ONNX emotion model was published for.
 
1
  ---
2
  title: SentAI
3
+ emoji: 🧠
4
  colorFrom: blue
5
  colorTo: purple
6
  sdk: static
7
  pinned: false
8
+ short_description: Live facial expression and age estimates
9
  ---
10
 
11
  # SentAI
12
 
13
+ Static browser-side live face analysis demo for Hugging Face Spaces.
14
 
15
+ Phase 3C adds multi-crop transformer expression scoring and calibration to reduce Happy/Confused dominance, with better sensitivity for Sad, Fear, and Disgust.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
index.html CHANGED
@@ -391,7 +391,7 @@
391
  <header class="hero" aria-label="SentAI heading">
392
  <div class="brand">
393
  <h1>SentAI</h1>
394
- <p>Higher-accuracy live face analysis with stabilized expression scores, apparent age range, and male/female presentation estimate. The fast face box runs continuously; heavier transformer models run on cropped faces in the background.</p>
395
  </div>
396
  <div class="status-stack">
397
  <div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
@@ -404,6 +404,11 @@
404
  <button id="switchBtn" disabled>Switch front/rear</button>
405
  <button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
406
  <button id="stopBtn" class="danger" disabled>Stop</button>
 
 
 
 
 
407
  <select id="modeSelect" aria-label="Performance mode">
408
  <option value="fast">Fast mode</option>
409
  <option value="balanced">Balanced mode</option>
@@ -476,7 +481,7 @@
476
  </div>
477
 
478
  <div class="note wide">
479
- The app estimates visible facial expression and apparent age from camera frames. It cannot know a person's true internal feeling. The accuracy pack improves expression and age estimates but may download larger ONNX model files the first time.
480
  </div>
481
  </aside>
482
  </section>
@@ -500,6 +505,7 @@
500
  switchBtn: document.getElementById("switchBtn"),
501
  accuracyBtn: document.getElementById("accuracyBtn"),
502
  stopBtn: document.getElementById("stopBtn"),
 
503
  modeSelect: document.getElementById("modeSelect"),
504
  cameraTag: document.getElementById("cameraTag"),
505
  video: document.getElementById("video"),
@@ -650,19 +656,50 @@
650
  for (const item of list) {
651
  const label = String(item.label || item.class || "").toLowerCase();
652
  const score = clamp01(item.score || item.probability || 0);
653
- if (label.includes("happy") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
654
- else if (label.includes("sad")) scores.Sad = Math.max(scores.Sad, score);
655
- else if (label.includes("fear")) scores.Fear = Math.max(scores.Fear, score);
656
  else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
657
- else if (label.includes("disgust")) scores.Disgust = Math.max(scores.Disgust, score);
658
  else if (label.includes("surprise") || label.includes("neutral")) {
659
- const scaled = label.includes("neutral") ? score * 0.10 : score * 0.55;
660
- scores.Confused = Math.max(scores.Confused, Math.min(0.48, scaled));
 
661
  }
662
  }
663
  return normalizeScores(scores);
664
  }
665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
666
  function combineEmotionScores(faceScores) {
667
  const now = performance.now();
668
  const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
@@ -670,33 +707,40 @@
670
  for (const label of emotionLabels) {
671
  const base = faceScores[label] || 0;
672
  const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
673
- combined[label] = freshPro ? (proValue * 0.76 + base * 0.24) : base;
 
674
  }
675
 
 
676
  const cfg = modes[els.modeSelect.value] || modes.accurate;
677
  if (!emotionEma) {
678
- emotionEma = combined;
679
  } else {
680
  for (const label of emotionLabels) {
681
- emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) + combined[label] * cfg.smoothing;
682
  }
683
  }
684
  return normalizeScores(emotionEma);
685
  }
686
 
687
  function topEmotion(scores) {
688
- const sorted = Object.entries(scores).sort((a, b) => b[1] - a[1]);
689
- const top = sorted[0] || ["Confused", 0];
690
- const second = sorted[1] || ["", 0];
691
- let label = top[0];
692
- let score = top[1];
693
- if (score < 0.18) {
 
 
 
 
 
 
694
  label = "Confused";
695
- score = Math.max(scores.Confused || 0, 0.18);
696
- } else if (score - second[1] < 0.035 && label !== "Confused") {
697
- // When the visible expression is ambiguous, mark it as confused but keep confidence modest.
698
  label = "Confused";
699
- score = Math.max(Math.min(scores.Confused || 0, 0.34), 0.22);
700
  }
701
  return { label, score: clamp01(score) };
702
  }
@@ -936,11 +980,10 @@
936
  if (lastDetections.length) drawDetections(lastDetections);
937
  }
938
 
939
- function cropFaceCanvas(det, targetSize = 256) {
940
  const box = det.detection.box;
941
  const videoW = els.video.videoWidth || els.overlay.width;
942
  const videoH = els.video.videoHeight || els.overlay.height;
943
- const pad = 0.32;
944
  const cx = box.x + box.width / 2;
945
  const cy = box.y + box.height / 2;
946
  const side = Math.max(box.width, box.height) * (1 + pad * 2);
@@ -955,7 +998,16 @@
955
  const c = canvas.getContext("2d", { willReadFrequently: true });
956
  c.fillStyle = "#000";
957
  c.fillRect(0, 0, targetSize, targetSize);
958
- c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
 
 
 
 
 
 
 
 
 
959
  return canvas;
960
  }
961
 
@@ -1117,13 +1169,34 @@
1117
  url = await canvasToBlobUrl(crop);
1118
 
1119
  if (pro.emotionPipe) {
1120
- let output;
 
 
 
 
 
 
 
 
1121
  try {
1122
- output = await pro.emotionPipe(url, { topK: 7 });
1123
- } catch (_) {
1124
- output = await pro.emotionPipe(url);
 
 
 
 
 
 
 
 
 
 
 
 
1125
  }
1126
- pro.emotionScores = normalizeExternalEmotion(output);
 
1127
  pro.emotionAt = performance.now();
1128
  }
1129
 
 
391
  <header class="hero" aria-label="SentAI heading">
392
  <div class="brand">
393
  <h1>SentAI</h1>
394
+ <p>Higher-accuracy live face analysis with multi-crop expression scoring, apparent age range, and male/female presentation estimate. Phase 3C is tuned to reduce Happy/Confused dominance.</p>
395
  </div>
396
  <div class="status-stack">
397
  <div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
 
404
  <button id="switchBtn" disabled>Switch front/rear</button>
405
  <button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
406
  <button id="stopBtn" class="danger" disabled>Stop</button>
407
+ <select id="emotionMode" aria-label="Emotion scoring mode">
408
+ <option value="sensitive" selected>Boost sad/fear/disgust</option>
409
+ <option value="balanced">Balanced emotions</option>
410
+ <option value="raw">Raw model scores</option>
411
+ </select>
412
  <select id="modeSelect" aria-label="Performance mode">
413
  <option value="fast">Fast mode</option>
414
  <option value="balanced">Balanced mode</option>
 
481
  </div>
482
 
483
  <div class="note wide">
484
+ The app estimates visible facial expression and apparent age from camera frames. Phase 3C uses multi-crop transformer averaging and sad/fear/disgust calibration to reduce Happy/Confused dominance. It still cannot know a person's true internal feeling.
485
  </div>
486
  </aside>
487
  </section>
 
505
  switchBtn: document.getElementById("switchBtn"),
506
  accuracyBtn: document.getElementById("accuracyBtn"),
507
  stopBtn: document.getElementById("stopBtn"),
508
+ emotionMode: document.getElementById("emotionMode"),
509
  modeSelect: document.getElementById("modeSelect"),
510
  cameraTag: document.getElementById("cameraTag"),
511
  video: document.getElementById("video"),
 
656
  for (const item of list) {
657
  const label = String(item.label || item.class || "").toLowerCase();
658
  const score = clamp01(item.score || item.probability || 0);
659
+ if (label.includes("happy") || label.includes("happiness") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
660
+ else if (label.includes("sad") || label.includes("sadness")) scores.Sad = Math.max(scores.Sad, score);
661
+ else if (label.includes("fear") || label.includes("fearful")) scores.Fear = Math.max(scores.Fear, score);
662
  else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
663
+ else if (label.includes("disgust") || label.includes("disgusted")) scores.Disgust = Math.max(scores.Disgust, score);
664
  else if (label.includes("surprise") || label.includes("neutral")) {
665
+ // Neutral/surprise should not dominate. They only become Confused when no clear emotion wins.
666
+ const scaled = label.includes("neutral") ? score * 0.045 : score * 0.30;
667
+ scores.Confused = Math.max(scores.Confused, Math.min(0.30, scaled));
668
  }
669
  }
670
  return normalizeScores(scores);
671
  }
672
 
673
+ function emotionWeights() {
674
+ const mode = els.emotionMode?.value || "sensitive";
675
+ if (mode === "raw") {
676
+ return { Happy: 1.00, Sad: 1.00, Fear: 1.00, Anger: 1.00, Confused: 1.00, Disgust: 1.00 };
677
+ }
678
+ if (mode === "balanced") {
679
+ return { Happy: 0.88, Sad: 1.28, Fear: 1.38, Anger: 0.95, Confused: 0.58, Disgust: 1.48 };
680
+ }
681
+ // Default: compensate for webcam models over-predicting smile/neutral/anger and under-predicting subtle negative expressions.
682
+ return { Happy: 0.70, Sad: 1.62, Fear: 1.82, Anger: 0.86, Confused: 0.42, Disgust: 2.05 };
683
+ }
684
+
685
+ function applyEmotionCalibration(scores) {
686
+ const mode = els.emotionMode?.value || "sensitive";
687
+ if (mode === "raw") return normalizeScores(scores);
688
+ const weights = emotionWeights();
689
+ const out = blankScores();
690
+ for (const label of emotionLabels) {
691
+ let v = clamp01(scores[label] || 0);
692
+ // Make low but consistent sad/fear/disgust evidence visible instead of crushed by happy/confused.
693
+ if (["Sad", "Fear", "Disgust"].includes(label)) v = Math.pow(v, 0.82);
694
+ if (label === "Happy") v = Math.pow(v, 1.12);
695
+ if (label === "Confused") v = Math.pow(v, 1.18);
696
+ out[label] = clamp01(v * weights[label]);
697
+ }
698
+ // Confused is a fallback label, not a high-confidence emotion class.
699
+ out.Confused = Math.min(out.Confused, mode === "balanced" ? 0.34 : 0.24);
700
+ return normalizeScores(out);
701
+ }
702
+
703
  function combineEmotionScores(faceScores) {
704
  const now = performance.now();
705
  const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
 
707
  for (const label of emotionLabels) {
708
  const base = faceScores[label] || 0;
709
  const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
710
+ // Trust the transformer crop classifier more than face-api expressions when it is fresh.
711
+ combined[label] = freshPro ? (proValue * 0.88 + base * 0.12) : base;
712
  }
713
 
714
+ const calibrated = applyEmotionCalibration(combined);
715
  const cfg = modes[els.modeSelect.value] || modes.accurate;
716
  if (!emotionEma) {
717
+ emotionEma = calibrated;
718
  } else {
719
  for (const label of emotionLabels) {
720
+ emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) + calibrated[label] * cfg.smoothing;
721
  }
722
  }
723
  return normalizeScores(emotionEma);
724
  }
725
 
726
  function topEmotion(scores) {
727
+ const nonConfused = Object.entries(scores).filter(([label]) => label !== "Confused").sort((a, b) => b[1] - a[1]);
728
+ let [label, score] = nonConfused[0] || ["Confused", 0];
729
+ const rare = ["Disgust", "Fear", "Sad"].map(name => [name, scores[name] || 0]).sort((a, b) => b[1] - a[1])[0];
730
+ const mode = els.emotionMode?.value || "sensitive";
731
+ if (mode !== "raw" && rare && rare[1] >= 0.24) {
732
+ const rescueMargin = mode === "sensitive" ? 0.20 : 0.12;
733
+ if (rare[1] >= score - rescueMargin) {
734
+ label = rare[0];
735
+ score = rare[1];
736
+ }
737
+ }
738
+ if (score < 0.16) {
739
  label = "Confused";
740
+ score = Math.max(scores.Confused || 0, 0.16);
741
+ } else if ((scores.Confused || 0) > score && score < 0.26) {
 
742
  label = "Confused";
743
+ score = Math.min(scores.Confused || 0.22, 0.28);
744
  }
745
  return { label, score: clamp01(score) };
746
  }
 
980
  if (lastDetections.length) drawDetections(lastDetections);
981
  }
982
 
983
+ function cropFaceCanvas(det, targetSize = 256, pad = 0.32, filter = "none", mirror = false) {
984
  const box = det.detection.box;
985
  const videoW = els.video.videoWidth || els.overlay.width;
986
  const videoH = els.video.videoHeight || els.overlay.height;
 
987
  const cx = box.x + box.width / 2;
988
  const cy = box.y + box.height / 2;
989
  const side = Math.max(box.width, box.height) * (1 + pad * 2);
 
998
  const c = canvas.getContext("2d", { willReadFrequently: true });
999
  c.fillStyle = "#000";
1000
  c.fillRect(0, 0, targetSize, targetSize);
1001
+ c.filter = filter;
1002
+ if (mirror) {
1003
+ c.translate(targetSize, 0);
1004
+ c.scale(-1, 1);
1005
+ c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
1006
+ c.setTransform(1, 0, 0, 1, 0, 0);
1007
+ } else {
1008
+ c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
1009
+ }
1010
+ c.filter = "none";
1011
  return canvas;
1012
  }
1013
 
 
1169
  url = await canvasToBlobUrl(crop);
1170
 
1171
  if (pro.emotionPipe) {
1172
+ const cropVariants = [
1173
+ cropFaceCanvas(primary, 288, 0.16, "contrast(1.10) saturate(0.96)", false),
1174
+ cropFaceCanvas(primary, 288, 0.34, "contrast(1.18) saturate(0.92)", false),
1175
+ cropFaceCanvas(primary, 288, 0.06, "contrast(1.22) brightness(1.03)", false),
1176
+ cropFaceCanvas(primary, 288, 0.22, "contrast(1.14) saturate(0.92)", true),
1177
+ ];
1178
+ const urls = [];
1179
+ const aggregate = blankScores();
1180
+ let count = 0;
1181
  try {
1182
+ for (const variant of cropVariants) {
1183
+ const variantUrl = await canvasToBlobUrl(variant);
1184
+ urls.push(variantUrl);
1185
+ let output;
1186
+ try {
1187
+ output = await pro.emotionPipe(variantUrl, { topK: 7 });
1188
+ } catch (_) {
1189
+ output = await pro.emotionPipe(variantUrl);
1190
+ }
1191
+ const scores = normalizeExternalEmotion(output);
1192
+ for (const label of emotionLabels) aggregate[label] += scores[label] || 0;
1193
+ count += 1;
1194
+ }
1195
+ } finally {
1196
+ for (const variantUrl of urls) URL.revokeObjectURL(variantUrl);
1197
  }
1198
+ for (const label of emotionLabels) aggregate[label] = count ? aggregate[label] / count : 0;
1199
+ pro.emotionScores = normalizeScores(aggregate);
1200
  pro.emotionAt = performance.now();
1201
  }
1202