Spaces:
Running
Running
Upload 2 files
Browse files- README.md +4 -18
- index.html +102 -29
README.md
CHANGED
|
@@ -1,29 +1,15 @@
|
|
| 1 |
---
|
| 2 |
title: SentAI
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
-
short_description: Live
|
| 9 |
---
|
| 10 |
|
| 11 |
# SentAI
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
- Keeps the fast on-device face box loop.
|
| 18 |
-
- Adds a background accuracy pack using Transformers.js-compatible ONNX models.
|
| 19 |
-
- Uses temporal smoothing so expression scores and age range do not flicker frame-by-frame.
|
| 20 |
-
- Reduces the old neutral-to-confused behavior so "Confused" is not overconfident just because the face is neutral.
|
| 21 |
-
- Adds an age offset control for quick calibration during demos.
|
| 22 |
-
- Supports mobile camera permission and front/rear camera switching when the browser exposes it.
|
| 23 |
-
|
| 24 |
-
## Notes
|
| 25 |
-
|
| 26 |
-
The app estimates visible facial expression and apparent age from camera frames. It cannot know a person's true internal feeling. Lighting, pose, camera quality, glasses, occlusion, and model bias can affect results.
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
Phase 3B fixes the accuracy-pack loader by using the Transformers.js package version that the selected ONNX emotion model was published for.
|
|
|
|
| 1 |
---
|
| 2 |
title: SentAI
|
| 3 |
+
emoji: 🧠
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: static
|
| 7 |
pinned: false
|
| 8 |
+
short_description: Live facial expression and age estimates
|
| 9 |
---
|
| 10 |
|
| 11 |
# SentAI
|
| 12 |
|
| 13 |
+
Static browser-side live face analysis demo for Hugging Face Spaces.
|
| 14 |
|
| 15 |
+
Phase 3C adds multi-crop transformer expression scoring and calibration to reduce Happy/Confused dominance, with better sensitivity for Sad, Fear, and Disgust.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
index.html
CHANGED
|
@@ -391,7 +391,7 @@
|
|
| 391 |
<header class="hero" aria-label="SentAI heading">
|
| 392 |
<div class="brand">
|
| 393 |
<h1>SentAI</h1>
|
| 394 |
-
<p>Higher-accuracy live face analysis with
|
| 395 |
</div>
|
| 396 |
<div class="status-stack">
|
| 397 |
<div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
|
|
@@ -404,6 +404,11 @@
|
|
| 404 |
<button id="switchBtn" disabled>Switch front/rear</button>
|
| 405 |
<button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
|
| 406 |
<button id="stopBtn" class="danger" disabled>Stop</button>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
<select id="modeSelect" aria-label="Performance mode">
|
| 408 |
<option value="fast">Fast mode</option>
|
| 409 |
<option value="balanced">Balanced mode</option>
|
|
@@ -476,7 +481,7 @@
|
|
| 476 |
</div>
|
| 477 |
|
| 478 |
<div class="note wide">
|
| 479 |
-
The app estimates visible facial expression and apparent age from camera frames.
|
| 480 |
</div>
|
| 481 |
</aside>
|
| 482 |
</section>
|
|
@@ -500,6 +505,7 @@
|
|
| 500 |
switchBtn: document.getElementById("switchBtn"),
|
| 501 |
accuracyBtn: document.getElementById("accuracyBtn"),
|
| 502 |
stopBtn: document.getElementById("stopBtn"),
|
|
|
|
| 503 |
modeSelect: document.getElementById("modeSelect"),
|
| 504 |
cameraTag: document.getElementById("cameraTag"),
|
| 505 |
video: document.getElementById("video"),
|
|
@@ -650,19 +656,50 @@
|
|
| 650 |
for (const item of list) {
|
| 651 |
const label = String(item.label || item.class || "").toLowerCase();
|
| 652 |
const score = clamp01(item.score || item.probability || 0);
|
| 653 |
-
if (label.includes("happy") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
|
| 654 |
-
else if (label.includes("sad")) scores.Sad = Math.max(scores.Sad, score);
|
| 655 |
-
else if (label.includes("fear")) scores.Fear = Math.max(scores.Fear, score);
|
| 656 |
else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
|
| 657 |
-
else if (label.includes("disgust")) scores.Disgust = Math.max(scores.Disgust, score);
|
| 658 |
else if (label.includes("surprise") || label.includes("neutral")) {
|
| 659 |
-
|
| 660 |
-
|
|
|
|
| 661 |
}
|
| 662 |
}
|
| 663 |
return normalizeScores(scores);
|
| 664 |
}
|
| 665 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 666 |
function combineEmotionScores(faceScores) {
|
| 667 |
const now = performance.now();
|
| 668 |
const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
|
|
@@ -670,33 +707,40 @@
|
|
| 670 |
for (const label of emotionLabels) {
|
| 671 |
const base = faceScores[label] || 0;
|
| 672 |
const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
|
| 673 |
-
|
|
|
|
| 674 |
}
|
| 675 |
|
|
|
|
| 676 |
const cfg = modes[els.modeSelect.value] || modes.accurate;
|
| 677 |
if (!emotionEma) {
|
| 678 |
-
emotionEma =
|
| 679 |
} else {
|
| 680 |
for (const label of emotionLabels) {
|
| 681 |
-
emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) +
|
| 682 |
}
|
| 683 |
}
|
| 684 |
return normalizeScores(emotionEma);
|
| 685 |
}
|
| 686 |
|
| 687 |
function topEmotion(scores) {
|
| 688 |
-
const
|
| 689 |
-
|
| 690 |
-
const
|
| 691 |
-
|
| 692 |
-
|
| 693 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 694 |
label = "Confused";
|
| 695 |
-
score = Math.max(scores.Confused || 0, 0.
|
| 696 |
-
} else if (
|
| 697 |
-
// When the visible expression is ambiguous, mark it as confused but keep confidence modest.
|
| 698 |
label = "Confused";
|
| 699 |
-
score = Math.
|
| 700 |
}
|
| 701 |
return { label, score: clamp01(score) };
|
| 702 |
}
|
|
@@ -936,11 +980,10 @@
|
|
| 936 |
if (lastDetections.length) drawDetections(lastDetections);
|
| 937 |
}
|
| 938 |
|
| 939 |
-
function cropFaceCanvas(det, targetSize = 256) {
|
| 940 |
const box = det.detection.box;
|
| 941 |
const videoW = els.video.videoWidth || els.overlay.width;
|
| 942 |
const videoH = els.video.videoHeight || els.overlay.height;
|
| 943 |
-
const pad = 0.32;
|
| 944 |
const cx = box.x + box.width / 2;
|
| 945 |
const cy = box.y + box.height / 2;
|
| 946 |
const side = Math.max(box.width, box.height) * (1 + pad * 2);
|
|
@@ -955,7 +998,16 @@
|
|
| 955 |
const c = canvas.getContext("2d", { willReadFrequently: true });
|
| 956 |
c.fillStyle = "#000";
|
| 957 |
c.fillRect(0, 0, targetSize, targetSize);
|
| 958 |
-
c.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 959 |
return canvas;
|
| 960 |
}
|
| 961 |
|
|
@@ -1117,13 +1169,34 @@
|
|
| 1117 |
url = await canvasToBlobUrl(crop);
|
| 1118 |
|
| 1119 |
if (pro.emotionPipe) {
|
| 1120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1121 |
try {
|
| 1122 |
-
|
| 1123 |
-
|
| 1124 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1125 |
}
|
| 1126 |
-
|
|
|
|
| 1127 |
pro.emotionAt = performance.now();
|
| 1128 |
}
|
| 1129 |
|
|
|
|
| 391 |
<header class="hero" aria-label="SentAI heading">
|
| 392 |
<div class="brand">
|
| 393 |
<h1>SentAI</h1>
|
| 394 |
+
<p>Higher-accuracy live face analysis with multi-crop expression scoring, apparent age range, and male/female presentation estimate. Phase 3C is tuned to reduce Happy/Confused dominance.</p>
|
| 395 |
</div>
|
| 396 |
<div class="status-stack">
|
| 397 |
<div class="status-pill" aria-live="polite"><span id="coreDot" class="dot"></span><span id="coreStatus">Loading core models...</span></div>
|
|
|
|
| 404 |
<button id="switchBtn" disabled>Switch front/rear</button>
|
| 405 |
<button id="accuracyBtn" class="accent" disabled>Load accuracy pack</button>
|
| 406 |
<button id="stopBtn" class="danger" disabled>Stop</button>
|
| 407 |
+
<select id="emotionMode" aria-label="Emotion scoring mode">
|
| 408 |
+
<option value="sensitive" selected>Boost sad/fear/disgust</option>
|
| 409 |
+
<option value="balanced">Balanced emotions</option>
|
| 410 |
+
<option value="raw">Raw model scores</option>
|
| 411 |
+
</select>
|
| 412 |
<select id="modeSelect" aria-label="Performance mode">
|
| 413 |
<option value="fast">Fast mode</option>
|
| 414 |
<option value="balanced">Balanced mode</option>
|
|
|
|
| 481 |
</div>
|
| 482 |
|
| 483 |
<div class="note wide">
|
| 484 |
+
The app estimates visible facial expression and apparent age from camera frames. Phase 3C uses multi-crop transformer averaging and sad/fear/disgust calibration to reduce Happy/Confused dominance. It still cannot know a person's true internal feeling.
|
| 485 |
</div>
|
| 486 |
</aside>
|
| 487 |
</section>
|
|
|
|
| 505 |
switchBtn: document.getElementById("switchBtn"),
|
| 506 |
accuracyBtn: document.getElementById("accuracyBtn"),
|
| 507 |
stopBtn: document.getElementById("stopBtn"),
|
| 508 |
+
emotionMode: document.getElementById("emotionMode"),
|
| 509 |
modeSelect: document.getElementById("modeSelect"),
|
| 510 |
cameraTag: document.getElementById("cameraTag"),
|
| 511 |
video: document.getElementById("video"),
|
|
|
|
| 656 |
for (const item of list) {
|
| 657 |
const label = String(item.label || item.class || "").toLowerCase();
|
| 658 |
const score = clamp01(item.score || item.probability || 0);
|
| 659 |
+
if (label.includes("happy") || label.includes("happiness") || label.includes("joy")) scores.Happy = Math.max(scores.Happy, score);
|
| 660 |
+
else if (label.includes("sad") || label.includes("sadness")) scores.Sad = Math.max(scores.Sad, score);
|
| 661 |
+
else if (label.includes("fear") || label.includes("fearful")) scores.Fear = Math.max(scores.Fear, score);
|
| 662 |
else if (label.includes("angry") || label.includes("anger")) scores.Anger = Math.max(scores.Anger, score);
|
| 663 |
+
else if (label.includes("disgust") || label.includes("disgusted")) scores.Disgust = Math.max(scores.Disgust, score);
|
| 664 |
else if (label.includes("surprise") || label.includes("neutral")) {
|
| 665 |
+
// Neutral/surprise should not dominate. They only become Confused when no clear emotion wins.
|
| 666 |
+
const scaled = label.includes("neutral") ? score * 0.045 : score * 0.30;
|
| 667 |
+
scores.Confused = Math.max(scores.Confused, Math.min(0.30, scaled));
|
| 668 |
}
|
| 669 |
}
|
| 670 |
return normalizeScores(scores);
|
| 671 |
}
|
| 672 |
|
| 673 |
+
function emotionWeights() {
|
| 674 |
+
const mode = els.emotionMode?.value || "sensitive";
|
| 675 |
+
if (mode === "raw") {
|
| 676 |
+
return { Happy: 1.00, Sad: 1.00, Fear: 1.00, Anger: 1.00, Confused: 1.00, Disgust: 1.00 };
|
| 677 |
+
}
|
| 678 |
+
if (mode === "balanced") {
|
| 679 |
+
return { Happy: 0.88, Sad: 1.28, Fear: 1.38, Anger: 0.95, Confused: 0.58, Disgust: 1.48 };
|
| 680 |
+
}
|
| 681 |
+
// Default: compensate for webcam models over-predicting smile/neutral/anger and under-predicting subtle negative expressions.
|
| 682 |
+
return { Happy: 0.70, Sad: 1.62, Fear: 1.82, Anger: 0.86, Confused: 0.42, Disgust: 2.05 };
|
| 683 |
+
}
|
| 684 |
+
|
| 685 |
+
function applyEmotionCalibration(scores) {
|
| 686 |
+
const mode = els.emotionMode?.value || "sensitive";
|
| 687 |
+
if (mode === "raw") return normalizeScores(scores);
|
| 688 |
+
const weights = emotionWeights();
|
| 689 |
+
const out = blankScores();
|
| 690 |
+
for (const label of emotionLabels) {
|
| 691 |
+
let v = clamp01(scores[label] || 0);
|
| 692 |
+
// Make low but consistent sad/fear/disgust evidence visible instead of crushed by happy/confused.
|
| 693 |
+
if (["Sad", "Fear", "Disgust"].includes(label)) v = Math.pow(v, 0.82);
|
| 694 |
+
if (label === "Happy") v = Math.pow(v, 1.12);
|
| 695 |
+
if (label === "Confused") v = Math.pow(v, 1.18);
|
| 696 |
+
out[label] = clamp01(v * weights[label]);
|
| 697 |
+
}
|
| 698 |
+
// Confused is a fallback label, not a high-confidence emotion class.
|
| 699 |
+
out.Confused = Math.min(out.Confused, mode === "balanced" ? 0.34 : 0.24);
|
| 700 |
+
return normalizeScores(out);
|
| 701 |
+
}
|
| 702 |
+
|
| 703 |
function combineEmotionScores(faceScores) {
|
| 704 |
const now = performance.now();
|
| 705 |
const freshPro = pro.emotionScores && (now - pro.emotionAt < 6500);
|
|
|
|
| 707 |
for (const label of emotionLabels) {
|
| 708 |
const base = faceScores[label] || 0;
|
| 709 |
const proValue = freshPro ? (pro.emotionScores[label] || 0) : 0;
|
| 710 |
+
// Trust the transformer crop classifier more than face-api expressions when it is fresh.
|
| 711 |
+
combined[label] = freshPro ? (proValue * 0.88 + base * 0.12) : base;
|
| 712 |
}
|
| 713 |
|
| 714 |
+
const calibrated = applyEmotionCalibration(combined);
|
| 715 |
const cfg = modes[els.modeSelect.value] || modes.accurate;
|
| 716 |
if (!emotionEma) {
|
| 717 |
+
emotionEma = calibrated;
|
| 718 |
} else {
|
| 719 |
for (const label of emotionLabels) {
|
| 720 |
+
emotionEma[label] = emotionEma[label] * (1 - cfg.smoothing) + calibrated[label] * cfg.smoothing;
|
| 721 |
}
|
| 722 |
}
|
| 723 |
return normalizeScores(emotionEma);
|
| 724 |
}
|
| 725 |
|
| 726 |
function topEmotion(scores) {
|
| 727 |
+
const nonConfused = Object.entries(scores).filter(([label]) => label !== "Confused").sort((a, b) => b[1] - a[1]);
|
| 728 |
+
let [label, score] = nonConfused[0] || ["Confused", 0];
|
| 729 |
+
const rare = ["Disgust", "Fear", "Sad"].map(name => [name, scores[name] || 0]).sort((a, b) => b[1] - a[1])[0];
|
| 730 |
+
const mode = els.emotionMode?.value || "sensitive";
|
| 731 |
+
if (mode !== "raw" && rare && rare[1] >= 0.24) {
|
| 732 |
+
const rescueMargin = mode === "sensitive" ? 0.20 : 0.12;
|
| 733 |
+
if (rare[1] >= score - rescueMargin) {
|
| 734 |
+
label = rare[0];
|
| 735 |
+
score = rare[1];
|
| 736 |
+
}
|
| 737 |
+
}
|
| 738 |
+
if (score < 0.16) {
|
| 739 |
label = "Confused";
|
| 740 |
+
score = Math.max(scores.Confused || 0, 0.16);
|
| 741 |
+
} else if ((scores.Confused || 0) > score && score < 0.26) {
|
|
|
|
| 742 |
label = "Confused";
|
| 743 |
+
score = Math.min(scores.Confused || 0.22, 0.28);
|
| 744 |
}
|
| 745 |
return { label, score: clamp01(score) };
|
| 746 |
}
|
|
|
|
| 980 |
if (lastDetections.length) drawDetections(lastDetections);
|
| 981 |
}
|
| 982 |
|
| 983 |
+
function cropFaceCanvas(det, targetSize = 256, pad = 0.32, filter = "none", mirror = false) {
|
| 984 |
const box = det.detection.box;
|
| 985 |
const videoW = els.video.videoWidth || els.overlay.width;
|
| 986 |
const videoH = els.video.videoHeight || els.overlay.height;
|
|
|
|
| 987 |
const cx = box.x + box.width / 2;
|
| 988 |
const cy = box.y + box.height / 2;
|
| 989 |
const side = Math.max(box.width, box.height) * (1 + pad * 2);
|
|
|
|
| 998 |
const c = canvas.getContext("2d", { willReadFrequently: true });
|
| 999 |
c.fillStyle = "#000";
|
| 1000 |
c.fillRect(0, 0, targetSize, targetSize);
|
| 1001 |
+
c.filter = filter;
|
| 1002 |
+
if (mirror) {
|
| 1003 |
+
c.translate(targetSize, 0);
|
| 1004 |
+
c.scale(-1, 1);
|
| 1005 |
+
c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
|
| 1006 |
+
c.setTransform(1, 0, 0, 1, 0, 0);
|
| 1007 |
+
} else {
|
| 1008 |
+
c.drawImage(els.video, sx, sy, sw, sh, 0, 0, targetSize, targetSize);
|
| 1009 |
+
}
|
| 1010 |
+
c.filter = "none";
|
| 1011 |
return canvas;
|
| 1012 |
}
|
| 1013 |
|
|
|
|
| 1169 |
url = await canvasToBlobUrl(crop);
|
| 1170 |
|
| 1171 |
if (pro.emotionPipe) {
|
| 1172 |
+
const cropVariants = [
|
| 1173 |
+
cropFaceCanvas(primary, 288, 0.16, "contrast(1.10) saturate(0.96)", false),
|
| 1174 |
+
cropFaceCanvas(primary, 288, 0.34, "contrast(1.18) saturate(0.92)", false),
|
| 1175 |
+
cropFaceCanvas(primary, 288, 0.06, "contrast(1.22) brightness(1.03)", false),
|
| 1176 |
+
cropFaceCanvas(primary, 288, 0.22, "contrast(1.14) saturate(0.92)", true),
|
| 1177 |
+
];
|
| 1178 |
+
const urls = [];
|
| 1179 |
+
const aggregate = blankScores();
|
| 1180 |
+
let count = 0;
|
| 1181 |
try {
|
| 1182 |
+
for (const variant of cropVariants) {
|
| 1183 |
+
const variantUrl = await canvasToBlobUrl(variant);
|
| 1184 |
+
urls.push(variantUrl);
|
| 1185 |
+
let output;
|
| 1186 |
+
try {
|
| 1187 |
+
output = await pro.emotionPipe(variantUrl, { topK: 7 });
|
| 1188 |
+
} catch (_) {
|
| 1189 |
+
output = await pro.emotionPipe(variantUrl);
|
| 1190 |
+
}
|
| 1191 |
+
const scores = normalizeExternalEmotion(output);
|
| 1192 |
+
for (const label of emotionLabels) aggregate[label] += scores[label] || 0;
|
| 1193 |
+
count += 1;
|
| 1194 |
+
}
|
| 1195 |
+
} finally {
|
| 1196 |
+
for (const variantUrl of urls) URL.revokeObjectURL(variantUrl);
|
| 1197 |
}
|
| 1198 |
+
for (const label of emotionLabels) aggregate[label] = count ? aggregate[label] / count : 0;
|
| 1199 |
+
pro.emotionScores = normalizeScores(aggregate);
|
| 1200 |
pro.emotionAt = performance.now();
|
| 1201 |
}
|
| 1202 |
|