HawkEyesAI
/

Voice-AI-Agent

Model card Files Files and versions

Voice-AI-Agent / frontend /script.js

rakib72642's picture

checkpoint 3 stable

496a69a about 10 hours ago

history blame contribute delete

47.4 kB

	'use strict';

	// ─── DOM refs ─────────────────────────────────────────────────────────────────
	const chatBox = document.getElementById('chat-box');
	const sendBtn = document.getElementById('send-btn');
	const textInput = document.getElementById('text-input');
	const micBtn = document.getElementById('mic-btn');
	const micLabel = micBtn.querySelector('.mic-label');
	const stopBtn = document.getElementById('stop-btn');
	const stateLabel = document.getElementById('state-label');
	const stateDot = document.getElementById('state-dot');
	const clearBtn = document.getElementById('clear-btn');
	const brainBtn = document.getElementById('brain-mode-btn');
	const voiceCaption = document.getElementById('voice-caption');
	const brainStage = document.getElementById('brain-stage');
	const brainBubbleStt = document.getElementById('brain-bubble-stt');
	const brainBubbleTts = document.getElementById('brain-bubble-tts');
	const brainBubbleSttText = document.getElementById('brain-bubble-stt-text');
	const brainBubbleTtsText = document.getElementById('brain-bubble-tts-text');
	const voiceViz = document.getElementById('voice-viz');
	const vizBars = Array.from(voiceViz.querySelectorAll('.viz-bar'));
	const queueBars = Array.from(document.querySelectorAll('.queue-bar'));
	const chunksCount = document.getElementById('chunks-count');
	const sidebarEl = document.getElementById('sidebar');
	const sidebarToggle = document.getElementById('sidebar-toggle');
	const mobileMenuBtn = document.getElementById('mobile-menu-btn');
	const appEl = document.getElementById('app');

	const sThreshold = document.getElementById('s-threshold');
	const sThresholdVal = document.getElementById('s-threshold-val');
	const sTimeout = document.getElementById('s-timeout');
	const sTimeoutVal = document.getElementById('s-timeout-val');
	const sVoice = document.getElementById('s-voice');

	const mStt = document.getElementById('m-stt');
	const mLlm = document.getElementById('m-llm');
	const mTts = document.getElementById('m-tts');
	const mTotal = document.getElementById('m-total');
	const sysStat = document.getElementById('sys-status');

	// ─── Ephemeral user identity ──────────────────────────────────────────────────
	// New page load = new user. Reloading the app generates a fresh ID.
	const USER_ID = (() => {
	if (window.crypto && typeof window.crypto.randomUUID === 'function') {
	return 'u_' + window.crypto.randomUUID().replace(/-/g, '').slice(0, 16);
	}
	return (
	'u_' +
	Date.now().toString(36) +
	'_' +
	Math.random().toString(36).slice(2, 10)
	);
	})();

	// ─── WebSocket base URL ────────────────────────────────────────────────────────
	const WS_BASES = (() => {
	const scheme = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
	const bases = [];
	const host =
	window.location.host && window.location.host !== 'null'
	? `${scheme}//${window.location.host}`
	: '';
	const push = (base) => {
	if (base && !bases.includes(base)) bases.push(base);
	};
	push(host);
	push(`${scheme}//127.0.0.1:8000`);
	push(`${scheme}//127.0.0.1:8679`);
	push(`${scheme}//localhost:8000`);
	push(`${scheme}//localhost:8679`);
	return bases;
	})();
	let _wsBaseIndex = 0;
	console.log('[Boot] WS bases:', WS_BASES.join(', '));

	// ─── WS handles ───────────────────────────────────────────────────────────────
	let chatWS = null;
	let voiceWS = null;
	let _chatRetry = 0;
	let _voiceRetry = 0;
	let _chatRetryTimer = null;
	let _voiceRetryTimer = null;

	// ─── VAD / recording settings ─────────────────────────────────────────────────
	let SILENCE_MS = 800; // default; user-adjustable in UI
	let SILENCE_DB = -30;
	const VAD_MS = 60;
	const MIN_SPEECH_MS = 320; // discard noise bursts shorter than this

	// ─── Playback state ───────────────────────────────────────────────────────────
	let _ctx = null;
	let _schedEnd = 0;
	let _endTimer = null;
	let _cancelled = false;
	let _inFlight = 0;
	let _ttsPlaying = false;
	let _activeSources = [];
	let _bargeInArmedAt = 0;
	let _bargeInFiredAt = 0;
	let _dropAudioUntil = 0;
	let _audioChain = Promise.resolve();
	let _playbackGen = 0;
	let _expectedSeq = 0;
	let _pendingAudio = new Map();
	let _currentTurn = 0;

	// Client-side playback speed multiplier.
	// This makes speech faster immediately even if the TTS provider speed setting
	// is limited/ignored. 1.0 = normal, >1.0 = faster.
	let TTS_PLAYBACK_RATE = 1.0;
	let brainMode = false;
	let brainVoiceActive = false;
	let brainRestartTimer = null;
	let brainAutoRestartTimer = null;
	let brainPendingAudio = null;
	let voicePendingPackets = [];
	let brainLastResponse = '';
	let _brainWelcomed = false;

	const VISIBLE_DIGIT_MAP = {
	'০': '0',
	'১': '1',
	'২': '2',
	'৩': '3',
	'৪': '4',
	'৫': '5',
	'৬': '6',
	'৭': '7',
	'৮': '8',
	'৯': '9',
	'٠': '0',
	'١': '1',
	'٢': '2',
	'٣': '3',
	'٤': '4',
	'٥': '5',
	'٦': '6',
	'٧': '7',
	'٨': '8',
	'٩': '9',
	};

	function _toAsciiDigits(text) {
	return String(text \|\| '').replace(
	/[০-৯٠-٩]/g,
	(ch) => VISIBLE_DIGIT_MAP[ch] \|\| ch,
	);
	}

	function _normalizeVisibleAiText(text) {
	if (!text) return '';
	return _toAsciiDigits(
	String(text).replaceAll('উপলব্ধ', 'এভেলেবেল').replaceAll('জ্বি', 'আচ্ছা'),
	);
	}

	const BRAIN_WELCOME_TEXT =
	'[calm] হ্যালো, আমি আয়েশা! হাসপাতাল রিসেপশন থেকে বলছি। আপনি কি কোনো অ্যাপয়েন্টমেন্ট বুক করতে চান?';

	// ─── Recording state ──────────────────────────────────────────────────────────
	let micStream = null;
	let analyserCtx = null;
	let analyser = null;
	let mediaRecorder = null;
	let audioChunks = [];
	let isListening = false;
	let isSpeaking = false;
	let isProcessing = false;
	let isRecordingLocked = false;
	let silenceTimer = null;
	let vadInt = null;
	let vizInt = null;
	let _speechStartMs = 0;
	let _recorderMime = 'audio/webm';

	// ─── AI streaming bubble state ────────────────────────────────────────────────
	let aiEl = null;
	let aiTxt = '';
	let thinkingEl = null;
	let _captionRaf = 0;
	let _captionText = '';

	// ─── Latency timestamps ───────────────────────────────────────────────────────
	let tSend = 0,
	tStt = 0,
	tLlm = 0,
	tTts = 0;

	function boot() {
	initWebSockets();
	appEl.classList.add('visible');
	setState('ready');
	}

	// ═══════════════════════════════════════════════════════════════════════════════
	// WEBSOCKETS
	// ═══════════════════════════════════════════════════════════════════════════════

	function _backoff(r) {
	return Math.min(1000 * Math.pow(2, r), 16000);
	}

	function _wsBase() {
	return WS_BASES[Math.min(_wsBaseIndex, WS_BASES.length - 1)] \|\| WS_BASES[0];
	}

	function _advanceWsBase() {
	if (WS_BASES.length <= 1) return _wsBase();
	_wsBaseIndex = (_wsBaseIndex + 1) % WS_BASES.length;
	console.log('[WS] Switching base to:', _wsBase());
	return _wsBase();
	}

	function _setSysStatus(online) {
	if (!sysStat) return;
	sysStat.textContent = online ? 'Ready' : 'Reconnecting';
	sysStat.className =
	'status-badge ' + (online ? 'badge-green' : 'badge-yellow');
	}

	function _connectChat() {
	if (chatWS && chatWS.readyState <= WebSocket.OPEN) return;
	chatWS = new WebSocket(`${_wsBase()}/ws/chat`);
	chatWS.onopen = () => {
	_chatRetry = 0;
	console.log('[Chat WS] connected');
	chatWS.send(JSON.stringify({ type: 'init', user_id: USER_ID }));
	};
	chatWS.onerror = (e) => console.error('[Chat WS] error:', e);
	chatWS.onclose = (ev) => {
	console.log(`[Chat WS] closed (${ev.code})`);
	_advanceWsBase();
	clearTimeout(_chatRetryTimer);
	_chatRetryTimer = setTimeout(() => {
	_chatRetry++;
	_connectChat();
	}, _backoff(_chatRetry));
	};
	chatWS.onmessage = onChatMsg;
	}

	function _connectVoice() {
	if (voiceWS && voiceWS.readyState <= WebSocket.OPEN) return;
	voiceWS = new WebSocket(`${_wsBase()}/ws/voice`);
	voiceWS.binaryType = 'arraybuffer';

	voiceWS.onopen = () => {
	_voiceRetry = 0;
	console.log('[Voice WS] connected, uid:', USER_ID);
	voiceWS.send(JSON.stringify({ type: 'init', user_id: USER_ID }));
	_setSysStatus(true);
	_flushVoicePendingPackets();
	_flushBrainPendingAudio();
	};
	voiceWS.onerror = (e) => console.error('[Voice WS] error:', e);
	voiceWS.onclose = (ev) => {
	console.log(`[Voice WS] closed (${ev.code})`);
	_setSysStatus(false);
	if (isListening \|\| isSpeaking \|\| isProcessing) {
	_teardownMicHardware();
	_resetVoiceState();
	setState('ready');
	setMic('off');
	micBtn.disabled = false;
	}
	clearTimeout(_voiceRetryTimer);
	_advanceWsBase();
	_voiceRetryTimer = setTimeout(() => {
	_voiceRetry++;
	_connectVoice();
	}, _backoff(_voiceRetry));
	if (brainMode && brainVoiceActive) {
	_queueBrainReconnect();
	}
	};
	voiceWS.onmessage = onVoiceMsg;
	}

	function initWebSockets() {
	_connectChat();
	_connectVoice();
	}

	// ── Chat WS handler ───────────────────────────────────────────────────────────
	function onChatMsg(ev) {
	let msg;
	try {
	msg = JSON.parse(ev.data);
	} catch {
	return;
	}
	console.log('[Chat WS]', msg.type);

	switch (msg.type) {
	case 'llm_token':
	if (!msg.token) break;
	if (tLlm === 0) {
	tLlm = Date.now();
	if (tSend > 0) mLlm.textContent = tLlm - tSend + ' ms';
	}
	_removeThinking();
	if (!aiEl) {
	aiEl = document.createElement('div');
	aiEl.className = 'message ai';
	chatBox.appendChild(aiEl);
	}
	aiTxt += msg.token;
	_renderAiText();
	break;

	case 'chat':
	if (!msg.text) break;
	_removeThinking();
	if (!aiEl) {
	aiEl = document.createElement('div');
	aiEl.className = 'message ai';
	chatBox.appendChild(aiEl);
	}
	aiTxt = msg.text;
	_renderAiText();
	break;

	case 'end':
	_removeThinking();
	_renderAiText(true);
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
	tSend = tStt = tLlm = tTts = 0;
	isProcessing = false;
	setState('ready');
	break;

	case 'error':
	_removeThinking();
	appendMsg('⚠️ ' + msg.text, 'system');
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	isProcessing = false;
	setState('ready');
	break;
	}
	}

	// ── Voice WS handler ──────────────────────────────────────────────────────────
	function onVoiceMsg(ev) {
	if (ev.data instanceof ArrayBuffer) {
	if (Date.now() < _dropAudioUntil) return; // drop late packets after cancel
	_ttsPlaying = true;
	// Framed audio: 4-byte big-endian turn id + 4-byte big-endian seq id + raw audio bytes.
	// We buffer/reorder by seq inside a turn, and ignore late packets from older turns.
	const u8 = new Uint8Array(ev.data);
	if (u8.length <= 8) return;
	const turn = (u8[0] << 24) \| (u8[1] << 16) \| (u8[2] << 8) \| (u8[3] << 0);
	const seq = (u8[4] << 24) \| (u8[5] << 16) \| (u8[6] << 8) \| (u8[7] << 0);
	const turnU = turn >>> 0;
	if (turnU !== _currentTurn >>> 0) return;
	const payload = ev.data.slice(8);
	_pendingAudio.set(seq >>> 0, payload);

	const gen = _playbackGen;
	while (_pendingAudio.has(_expectedSeq)) {
	const buf = _pendingAudio.get(_expectedSeq);
	_pendingAudio.delete(_expectedSeq);
	const playBuf = buf;
	_audioChain = _audioChain
	.catch(() => {})
	.then(() => {
	if (gen !== _playbackGen) return;
	if (_cancelled) return;
	return enqueueAudio(playBuf);
	});
	_expectedSeq++;
	}
	return;
	}

	let msg;
	try {
	msg = JSON.parse(ev.data);
	} catch {
	return;
	}
	console.log('[Voice WS]', msg.type);

	switch (msg.type) {
	case 'init_ack':
	console.log('[Voice WS] ack uid:', msg.user_id);
	break;

	case 'stt':
	// New turn: reset audio ordering/buffers.
	if (typeof msg.turn === 'number') _currentTurn = msg.turn >>> 0;
	_expectedSeq = 0;
	_pendingAudio.clear();
	tStt = Date.now();
	if (tSend > 0) mStt.textContent = tStt - tSend + ' ms';
	_removeThinking();
	if (!brainMode) appendMsg('🎤 ' + msg.text, 'user');
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	_brainSetSttBubble(msg.text);
	if (brainMode) _brainSetTtsBubble(brainLastResponse \|\| '', false);
	_brainModeSetSearch(true);
	appendThinking();
	setState('processing');
	break;

	case 'llm_token':
	if (!msg.token) break;
	const tokenText = _normalizeVisibleAiText(msg.token);
	if (tLlm === 0) {
	tLlm = Date.now();
	if (tStt > 0) mLlm.textContent = tLlm - tStt + ' ms';
	}
	_removeThinking();
	aiTxt = _normalizeVisibleAiText(aiTxt + tokenText);
	_setCaption(aiTxt);
	brainLastResponse = aiTxt;
	_brainSetTtsBubble(brainLastResponse);
	_brainModeSetSearch(true);
	if (!brainMode) {
	if (!aiEl) {
	aiEl = document.createElement('div');
	aiEl.className = 'message ai';
	chatBox.appendChild(aiEl);
	}
	_renderAiText();
	}
	break;

	case 'llm_full':
	if (!msg.text) break;
	// Best-effort recovery path: if any streamed tokens were dropped, the
	// server sends the final full text once at turn end.
	if (typeof msg.turn === 'number') {
	_currentTurn = msg.turn >>> 0;
	_expectedSeq = 0;
	_pendingAudio.clear();
	}
	brainLastResponse = _normalizeVisibleAiText(msg.text);
	aiTxt = brainLastResponse;
	_brainSetTtsBubble(brainLastResponse);
	if (!brainMode) {
	if (!aiEl) {
	aiEl = document.createElement('div');
	aiEl.className = 'message ai';
	chatBox.appendChild(aiEl);
	}
	_renderAiText();
	}
	break;

	case 'end':
	// In brain mode we don't stream tokens into chat UI, so append a final
	// transcript line at turn end.
	if (brainMode && aiTxt) appendMsg(aiTxt, 'ai');
	_renderAiText(true);
	_removeThinking();
	if (brainMode) brainLastResponse = aiTxt \|\| brainLastResponse;
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	_expectedSeq = 0;
	_pendingAudio.clear();
	if (tSend > 0) mTotal.textContent = Date.now() - tSend + ' ms';
	tSend = tStt = tLlm = tTts = 0;
	isProcessing = false;
	// BUG-FIX-C: schedule _done() to fire after TTS audio drains.
	// If no TTS audio arrived (_schedEnd == 0), _done fires in ~300 ms.
	_scheduleEnd();
	break;

	case 'error':
	_removeThinking();
	appendMsg('⚠️ ' + msg.text, 'system');
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	_expectedSeq = 0;
	_pendingAudio.clear();
	_brainSetTtsBubble('', false);
	_brainModeSetSearch(false);
	isProcessing = false;
	// BUG-FIX-C: unconditionally unlock on error
	_done();
	break;

	case 'pong':
	break;

	default:
	console.log('[Voice WS] unknown:', msg.type);
	}
	}

	// ─── Thinking bubble ──────────────────────────────────────────────────────────
	function appendThinking() {
	if (brainMode) return;
	if (thinkingEl) return;
	thinkingEl = document.createElement('div');
	thinkingEl.className = 'message ai thinking';
	thinkingEl.innerHTML =
	'<span class="dot"></span><span class="dot"></span><span class="dot"></span>';
	chatBox.appendChild(thinkingEl);
	chatBox.scrollTop = chatBox.scrollHeight;
	}
	function _removeThinking() {
	if (thinkingEl) {
	thinkingEl.remove();
	thinkingEl = null;
	}
	}

	function _renderAiText(force = false) {
	if (!aiEl \|\| !aiTxt) {
	if (force && aiEl) aiEl.innerHTML = '';
	return;
	}
	aiEl.innerHTML =
	typeof marked !== 'undefined'
	? marked.parse(aiTxt)
	: aiTxt.replace(/\n/g, '<br>');
	chatBox.scrollTop = chatBox.scrollHeight;
	}

	function _setCaption(text) {
	_captionText = _normalizeVisibleAiText(text);
	if (_captionRaf) return;
	_captionRaf = requestAnimationFrame(() => {
	_captionRaf = 0;
	if (!voiceCaption) return;
	voiceCaption.textContent = brainMode ? '' : _captionText;
	});
	}

	// ═══════════════════════════════════════════════════════════════════════════════
	// AUDIO PLAYBACK
	// ═══════════════════════════════════════════════════════════════════════════════

	function _ctxEnsure() {
	if (!_ctx \|\| _ctx.state === 'closed') {
	_ctx = new (window.AudioContext \|\| window.webkitAudioContext)();
	_schedEnd = 0;
	}
	if (_ctx.state === 'suspended') _ctx.resume();
	return _ctx;
	}

	function _stopAllSources() {
	const sources = _activeSources.splice(0);
	for (const src of sources) {
	try {
	src.onended = null;
	src.stop(0);
	} catch {}
	try {
	src.disconnect();
	} catch {}
	}
	}

	async function enqueueAudio(buf) {
	if (_cancelled) return;
	_inFlight++;
	_vizQ();

	const ctx = _ctxEnsure();
	let decoded;
	try {
	decoded = await ctx.decodeAudioData(buf.slice(0));
	} catch (e) {
	console.warn('[Audio] decode error:', e.message);
	_inFlight = Math.max(0, _inFlight - 1);
	_vizQ();
	return;
	}

	if (!decoded \|\| decoded.duration < 0.001 \|\| _cancelled) {
	_inFlight = Math.max(0, _inFlight - 1);
	_vizQ();
	return;
	}

	if (tTts === 0 && tLlm > 0) {
	tTts = Date.now();
	mTts.textContent = tTts - tLlm + ' ms';
	}

	const src = ctx.createBufferSource();
	src.buffer = decoded;
	try {
	src.playbackRate.value = Math.max(0.85, Math.min(2.0, TTS_PLAYBACK_RATE));
	} catch {}
	src.connect(ctx.destination);
	const now = ctx.currentTime;
	const GAP_S = 0.001;
	const start = Math.max(now + 0.01, _schedEnd + GAP_S);
	if (_cancelled) {
	_inFlight = Math.max(0, _inFlight - 1);
	_vizQ();
	return;
	}
	_activeSources.push(src);
	src.start(start);
	const rate = (() => {
	try {
	return src.playbackRate.value \|\| 1.0;
	} catch {
	return 1.0;
	}
	})();
	_schedEnd = start + decoded.duration / Math.max(0.01, rate);

	src.onended = () => {
	_inFlight = Math.max(0, _inFlight - 1);
	_vizQ();
	const idx = _activeSources.indexOf(src);
	if (idx >= 0) _activeSources.splice(idx, 1);
	};

	setState('speaking');
	}

	function _vizQ() {
	if (chunksCount) chunksCount.textContent = _inFlight;
	queueBars.forEach((b, i) => {
	b.classList.toggle('active', i < _inFlight);
	b.style.height = (i < _inFlight ? 12 + Math.random() * 30 : 4) + 'px';
	});
	}

	function _scheduleEnd() {
	clearTimeout(_endTimer);
	const ctx = _ctx;
	if (!ctx \|\| ctx.state === 'closed') {
	// No audio context — unlock immediately
	setTimeout(_done, 300);
	return;
	}
	const remainingMs = Math.max(0, (_schedEnd - ctx.currentTime) * 1000);
	// BUG-FIX-C: always call _done regardless of _cancelled — we must
	// release the lock. Use a minimal delay when no audio was scheduled.
	_endTimer = setTimeout(_done, remainingMs + 300);
	}

	/**
	* _done — returns system to fully idle state.
	* ALWAYS unlocks the mic. Never auto-restarts recording.
	*/
	function _done() {
	_ttsPlaying = false;
	isProcessing = false;
	isRecordingLocked = false;
	_brainModeSetSearch(false);
	_brainSetTtsBubble(brainLastResponse \|\| '', false);
	_inFlight = 0;
	_vizQ();
	micBtn.disabled = false;
	setState('ready');
	setMic('off');
	if (brainMode && brainVoiceActive) {
	clearTimeout(brainAutoRestartTimer);
	brainAutoRestartTimer = setTimeout(() => {
	if (
	!brainMode \|\|
	!brainVoiceActive \|\|
	isListening \|\|
	isProcessing \|\|
	isRecordingLocked
	) {
	return;
	}
	_brainResumeListening();
	}, 0);
	}
	console.log('[Voice] Idle — ready for next manual press');
	}

	function stopAllAudio() {
	_cancelled = true;
	_ttsPlaying = false;
	// With turn-id framed audio, we can shorten the drop window; late packets
	// are ignored by turn mismatch.
	_dropAudioUntil = Date.now() + 120;
	_playbackGen++;
	_audioChain = Promise.resolve();
	_expectedSeq = 0;
	_pendingAudio.clear();
	_stopAllSources();
	clearTimeout(_endTimer);
	_endTimer = null;
	_schedEnd = 0;
	_inFlight = 0;
	_vizQ();
	if (_ctx && _ctx.state !== 'closed') {
	// Close releases scheduled audio immediately; a new ctx is created on demand.
	_ctx.close().catch(() => {});
	}
	_ctx = null;
	if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
	voiceWS.send(JSON.stringify({ type: 'cancel' }));
	}
	}

	function _bargeInNow(reason = 'speech') {
	const now = Date.now();
	if (now - _bargeInFiredAt < 500) return; // debounce
	_bargeInFiredAt = now;

	console.log('[BargeIn] interrupt:', reason);
	stopAllAudio();

	// Unlock immediately so the user can speak right away.
	isProcessing = false;
	isRecordingLocked = false;
	_cancelled = false;
	aiEl = null;
	aiTxt = '';
	_setCaption('');
	_removeThinking();
	micBtn.disabled = false;

	// If mic is already warm (brain continuous mode), just re-arm VAD.
	if (brainMode && brainVoiceActive) {
	_brainModeSetSearch(false);
	// If analyser/mic are already active, VAD tick will immediately
	// transition into recording on the next speech sample.
	_brainResumeListening();
	return;
	}
	// Otherwise, start listening fresh (user initiated by speaking).
	startListening().catch(() => {});
	}

	// ═══════════════════════════════════════════════════════════════════════════════
	// TEXT CHAT
	// ═══════════════════════════════════════════════════════════════════════════════

	sendBtn.onclick = sendText;
	textInput.addEventListener('keydown', (e) => {
	if (e.key === 'Enter' && !e.shiftKey) sendText();
	});

	function sendText() {
	const text = textInput.value.trim();
	if (!text \|\| isProcessing) return;
	appendMsg(text, 'user');
	textInput.value = '';
	_cancelled = false;
	isProcessing = true;
	tSend = Date.now();
	tLlm = tTts = 0;
	aiEl = null;
	aiTxt = '';
	setState('processing');
	appendThinking();
	_sendViaChat(text);
	}

	function _sendViaChat(text) {
	const payload = JSON.stringify({ user_id: USER_ID, user_query: text });
	if (chatWS && chatWS.readyState === WebSocket.OPEN) {
	chatWS.send(payload);
	} else {
	const _retry = () => {
	if (chatWS && chatWS.readyState === WebSocket.OPEN) chatWS.send(payload);
	else setTimeout(_retry, 300);
	};
	_retry();
	}
	}

	// ═══════════════════════════════════════════════════════════════════════════════
	// MICROPHONE / VAD
	// ═══════════════════════════════════════════════════════════════════════════════

	micBtn.onclick = async () => {
	if (isRecordingLocked \|\| isProcessing) {
	console.log('[Mic] Ignored — system busy');
	return;
	}
	if (isListening) {
	if (brainMode && brainVoiceActive) {
	console.log('[Brain] Continuous mode active — use Stop to exit');
	return;
	}
	_teardownMicHardware();
	_resetVoiceState();
	setState('ready');
	setMic('off');
	} else {
	await startListening();
	}
	};

	stopBtn.onclick = () => {
	brainVoiceActive = false;
	clearTimeout(brainAutoRestartTimer);
	clearTimeout(brainRestartTimer);
	brainPendingAudio = null;
	stopAllAudio();
	if (isListening \|\| isSpeaking) _teardownMicHardware();
	_resetVoiceState();
	setState('ready');
	setMic('off');
	micBtn.disabled = false;
	};

	// ── startListening ────────────────────────────────────────────────────────────
	async function startListening() {
	if (isListening \|\| isProcessing \|\| isRecordingLocked) return;

	_ctxEnsure();

	try {
	micStream = await navigator.mediaDevices.getUserMedia({
	audio: {
	echoCancellation: true,
	noiseSuppression: true,
	autoGainControl: true,
	channelCount: 1,
	sampleRate: 16000,
	},
	});
	} catch (err) {
	console.error('[Mic] getUserMedia failed:', err);
	appendMsg('⚠️ মাইক্রোফোন অ্যাক্সেস দেওয়া হয়নি।', 'system');
	return;
	}

	analyserCtx = new AudioContext({ sampleRate: 16000 });
	const src = analyserCtx.createMediaStreamSource(micStream);
	analyser = analyserCtx.createAnalyser();
	analyser.fftSize = 512;
	analyser.smoothingTimeConstant = 0.6;
	src.connect(analyser);

	isListening = true;
	audioChunks = [];

	setMic('listening');
	setState('listening');
	voiceViz.classList.add('active');

	vadInt = setInterval(vadTick, VAD_MS);
	vizInt = setInterval(vizTick, 60);

	console.log('[Mic] Listening started');
	}

	// ── _teardownMicHardware ──────────────────────────────────────────────────────
	// Stops hardware: intervals, recorder (silenced), mic tracks, AudioContext.
	// IMPORTANT: does NOT clear audioChunks — caller's onstop captures them first.
	function _teardownMicHardware() {
	clearInterval(vadInt);
	clearInterval(vizInt);
	clearTimeout(silenceTimer);
	vadInt = vizInt = silenceTimer = null;

	// Silence callbacks so no onstop logic fires after forced teardown
	if (mediaRecorder && mediaRecorder.state !== 'inactive') {
	mediaRecorder.ondataavailable = () => {};
	mediaRecorder.onstop = () => {};
	mediaRecorder.stop();
	}
	mediaRecorder = null;

	micStream?.getTracks().forEach((t) => t.stop());
	micStream = null;

	if (analyserCtx && analyserCtx.state !== 'closed') {
	analyserCtx.close().catch(() => {});
	}
	analyserCtx = null;
	analyser = null;

	voiceViz.classList.remove('active');
	vizBars.forEach((b) => (b.style.height = '4px'));

	console.log('[Mic] Hardware torn down');
	}

	// ── _resetVoiceState ──────────────────────────────────────────────────────────
	function _resetVoiceState() {
	isListening = false;
	isSpeaking = false;
	isProcessing = false;
	isRecordingLocked = false;
	_ttsPlaying = false;
	_speechStartMs = 0;
	audioChunks = [];
	}

	// ── VAD tick ──────────────────────────────────────────────────────────────────
	function vadTick() {
	if (!analyser) return;
	// In brain mode we allow "barge-in": user speech interrupts TTS playback.
	// In non-brain mode we still keep the hard lock to prevent overlapping turns.
	if (!brainMode && (isProcessing \|\| isRecordingLocked)) return;

	const buf = new Float32Array(analyser.frequencyBinCount);
	analyser.getFloatTimeDomainData(buf);
	let sum = 0;
	for (let i = 0; i < buf.length; i++) sum += buf[i] * buf[i];
	const db = 20 * Math.log10(Math.sqrt(sum / buf.length) \|\| 1e-10);
	const speech = db > SILENCE_DB;

	if (speech) {
	// ── Barge-in detector ────────────────────────────────────────────────
	if (
	brainMode &&
	brainVoiceActive &&
	(_ttsPlaying \|\| isProcessing \|\| isRecordingLocked)
	) {
	// Stricter threshold reduces false triggers from echo + noise.
	const loud = db > SILENCE_DB + 4;
	if (loud) {
	if (!_bargeInArmedAt) _bargeInArmedAt = Date.now();
	if (Date.now() - _bargeInArmedAt >= 90) {
	_bargeInArmedAt = 0;
	_bargeInNow(_ttsPlaying ? 'vad_tts' : 'vad_thinking');
	// After barge-in unlock, continue into the normal recording start
	// path in this same tick.
	} else {
	// Don't start recording until we confirm it’s real barge-in speech.
	return;
	}
	} else {
	_bargeInArmedAt = 0;
	return;
	}
	}

	clearTimeout(silenceTimer);
	silenceTimer = null;

	if (!isSpeaking) {
	if (mediaRecorder && mediaRecorder.state !== 'inactive') return; // duplicate guard
	isSpeaking = true;
	_speechStartMs = Date.now();
	_cancelled = false;
	_ctxEnsure();
	startRecorder();
	setMic('recording');
	setState('recording');
	console.log('[VAD] Speech detected — recording');
	}
	} else {
	_bargeInArmedAt = 0;
	if (isSpeaking && !silenceTimer) {
	silenceTimer = setTimeout(_onSilenceTimeout, SILENCE_MS);
	}
	}
	}

	// ── _onSilenceTimeout ─────────────────────────────────────────────────────────
	function _onSilenceTimeout() {
	silenceTimer = null;

	const speechDuration = Date.now() - _speechStartMs;
	if (speechDuration < MIN_SPEECH_MS) {
	console.log(
	`[VAD] Too short (${speechDuration} ms) — discard & resume listening`,
	);
	isSpeaking = false;
	discardRecorder();
	// BUG-FIX-D: restart intervals so listening continues
	if (isListening && !vadInt) {
	vadInt = setInterval(vadTick, VAD_MS);
	vizInt = setInterval(vizTick, 60);
	}
	setMic('listening');
	setState('listening');
	return;
	}

	console.log(
	`[VAD] Silence after ${speechDuration} ms — finalising utterance`,
	);

	const keepBrainMicWarm = brainMode && brainVoiceActive;

	// In brain mode we keep VAD running so we can detect barge-in while the AI is
	// thinking/speaking. Outside brain mode we stop VAD during processing.
	if (!keepBrainMicWarm) {
	clearInterval(vadInt);
	clearInterval(vizInt);
	vadInt = vizInt = null;
	}

	// Lock state BEFORE stopRecorder (onstop may fire almost immediately)
	isSpeaking = false;
	isListening = keepBrainMicWarm; // mic stays "hot" in brain mode
	isProcessing = true;
	isRecordingLocked = true;
	_cancelled = false;

	tSend = Date.now();
	tLlm = 0;
	tTts = 0;

	micBtn.disabled = !keepBrainMicWarm;
	setMic(keepBrainMicWarm ? 'listening' : 'processing');
	setState(keepBrainMicWarm ? 'listening' : 'processing');

	stopRecorder(); // → triggers onstop asynchronously
	}

	// ── Viz tick ──────────────────────────────────────────────────────────────────
	function vizTick() {
	if (!analyser) return;
	const data = new Uint8Array(analyser.frequencyBinCount);
	analyser.getByteFrequencyData(data);
	const step = Math.floor(data.length / vizBars.length);
	vizBars.forEach((b, i) => {
	const v = data[i * step] / 255;
	b.style.height = Math.max(4, v * (isSpeaking ? 48 : 18)) + 'px';
	});
	}

	// ── MediaRecorder ─────────────────────────────────────────────────────────────
	function startRecorder() {
	if (!micStream) return;
	if (mediaRecorder && mediaRecorder.state !== 'inactive') {
	console.warn('[Recorder] Duplicate startRecorder() — ignored');
	return;
	}

	audioChunks = [];
	_recorderMime = MediaRecorder.isTypeSupported('audio/webm;codecs=opus')
	? 'audio/webm;codecs=opus'
	: 'audio/webm';

	try {
	mediaRecorder = new MediaRecorder(micStream, { mimeType: _recorderMime });
	} catch (err) {
	console.error('[Recorder] Creation failed:', err);
	isSpeaking = false;
	setMic('listening');
	setState('listening');
	return;
	}

	mediaRecorder.ondataavailable = (e) => {
	if (e.data && e.data.size > 0) audioChunks.push(e.data);
	};

	/**
	* onstop handler
	*
	* BUG-FIX-A: Capture audioChunks into a LOCAL variable as the very
	* first action, before any teardown or async work. Then clear the
	* module-level audioChunks. _teardownMicHardware() does NOT touch
	* audioChunks, so the local copy is safe.
	*
	* Old (broken) order:
	* 1. _fullMicTeardown() ← set audioChunks = [] HERE
	* 2. new Blob(audioChunks) ← always empty!
	*
	* New (correct) order:
	* 1. const captured = audioChunks.slice() ← copy before anything
	* 2. audioChunks = [] ← clear module ref
	* 3. _teardownMicHardware() ← safe, chunks are local
	* 4. new Blob(captured) ← has actual audio data
	*/
	mediaRecorder.onstop = async () => {
	// ── 1. Capture chunks locally (MUST be first) ──────────────────────────
	const captured = audioChunks.slice();
	audioChunks = [];

	const keepBrainMicWarm = brainMode && brainVoiceActive;

	// ── 2. Tear down mic hardware unless brain mode wants a live loop ─────
	if (keepBrainMicWarm) {
	mediaRecorder = null;
	setMic('off');
	} else {
	_teardownMicHardware();
	setMic('off');
	}

	console.log(
	`[Recorder] onstop: ${captured.length} chunk(s), ${captured
	.reduce((s, c) => s + c.size, 0)
	.toLocaleString()} bytes total`,
	);

	// ── 3. Validate ────────────────────────────────────────────────────────
	if (!captured.length) {
	console.warn('[Recorder] No audio chunks — possible threshold issue');
	appendMsg(
	'⚠️ কোনো অডিও রেকর্ড হয়নি। Silence threshold কমিয়ে দেখুন।',
	'system',
	);
	_resetVoiceState();
	setState(keepBrainMicWarm ? 'listening' : 'ready');
	micBtn.disabled = false;
	if (keepBrainMicWarm) _brainResumeListening();
	return;
	}

	// ── 4. Build ArrayBuffer ───────────────────────────────────────────────
	const blob = new Blob(captured, { type: _recorderMime });
	let buf;
	try {
	buf = await blob.arrayBuffer();
	} catch (err) {
	console.error('[Recorder] arrayBuffer() error:', err);
	_resetVoiceState();
	setState(keepBrainMicWarm ? 'listening' : 'ready');
	setMic('off');
	micBtn.disabled = false;
	if (keepBrainMicWarm) _brainResumeListening();
	return;
	}

	console.log(`[VAD] → voice WS: ${buf.byteLength.toLocaleString()} bytes`);

	// ── 5. Send to backend ─────────────────────────────────────────────────
	if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
	appendThinking();
	voiceWS.send(buf);
	// isProcessing + isRecordingLocked stay true until _done() fires
	} else {
	console.warn('[VAD] Voice WS not open — queueing utterance');
	voicePendingPackets.push(buf);
	_connectVoice();
	_resetVoiceState();
	setState(keepBrainMicWarm ? 'listening' : 'ready');
	setMic('off');
	micBtn.disabled = false;
	if (keepBrainMicWarm) _brainResumeListening();
	}
	};

	mediaRecorder.start();
	console.log('[Recorder] Started, mime:', _recorderMime);
	}

	function stopRecorder() {
	if (mediaRecorder && mediaRecorder.state !== 'inactive') {
	mediaRecorder.stop(); // triggers onstop asynchronously
	}
	}

	function discardRecorder() {
	if (!mediaRecorder \|\| mediaRecorder.state === 'inactive') {
	audioChunks = [];
	return;
	}
	mediaRecorder.ondataavailable = () => {};
	mediaRecorder.onstop = () => {
	audioChunks = [];
	};
	mediaRecorder.stop();
	mediaRecorder = null;
	audioChunks = [];
	}

	// ═══════════════════════════════════════════════════════════════════════════════
	// UI HELPERS
	// ═══════════════════════════════════════════════════════════════════════════════

	const STATE_MAP = {
	ready: { label: 'প্রস্তুত', cls: '' },
	listening: { label: 'শুনছি…', cls: 'listening' },
	recording: { label: 'রেকর্ড হচ্ছে…', cls: 'recording' },
	processing: { label: 'প্রক্রিয়া করছে…', cls: 'processing' },
	speaking: { label: 'AI বলছে…', cls: 'speaking' },
	};

	function setState(s) {
	const cfg = STATE_MAP[s] \|\| STATE_MAP.ready;
	stateLabel.textContent = cfg.label;
	stateDot.className = 'state-dot' + (cfg.cls ? ' ' + cfg.cls : '');
	if (brainStage) brainStage.dataset.state = s;
	}

	const MIC_MAP = {
	off: { cls: 'mic-off', label: 'Press to Start talking', icon: '🎤' },
	listening: {
	cls: 'mic-listening',
	label: 'Listening...',
	icon: '🟢',
	},
	recording: { cls: 'mic-recording', label: 'Listening..', icon: '🔴' },
	processing: { cls: 'mic-processing', label: 'Please wait !!!', icon: '⏳' },
	};

	function setMic(s) {
	const cfg = MIC_MAP[s] \|\| MIC_MAP.off;
	micBtn.className = 'mic-btn ' + cfg.cls;
	micLabel.textContent = cfg.label;
	micBtn.querySelector('.mic-icon').textContent = cfg.icon;
	}

	function appendMsg(text, who) {
	// In brain mode, keep user messages hidden (brain panel acts as UI),
	// but still show AI messages as a readable transcript.
	if (brainMode && who === 'user') return null;
	const d = document.createElement('div');
	d.className = 'message ' + who;
	const visibleText = _normalizeVisibleAiText(text);
	if (who === 'ai' && typeof marked !== 'undefined') {
	d.innerHTML = marked.parse(visibleText \|\| '');
	} else {
	d.textContent = visibleText;
	}
	chatBox.appendChild(d);
	chatBox.scrollTop = chatBox.scrollHeight;
	return d;
	}

	clearBtn.onclick = () => {
	chatBox.innerHTML = '';
	thinkingEl = null;
	if (!brainMode) appendMsg('চ্যাট পরিষ্কার করা হয়েছে।', 'system');
	};

	brainBtn.onclick = () => {
	setBrainMode(!brainMode);
	};

	sidebarToggle.onclick = () => {
	sidebarEl.classList.toggle('collapsed');
	sidebarToggle.textContent = sidebarEl.classList.contains('collapsed')
	? '›'
	: '‹';
	};
	mobileMenuBtn.onclick = () => sidebarEl.classList.toggle('mobile-open');

	function setBrainMode(on) {
	brainMode = !!on;
	document.body.classList.toggle('brain-mode', brainMode);
	brainBtn.classList.toggle('active', brainMode);
	brainBtn.setAttribute('aria-pressed', String(brainMode));
	if (brainStage) brainStage.setAttribute('aria-hidden', String(!brainMode));
	if (voiceCaption) voiceCaption.textContent = '';
	_sendVoiceControl({ type: 'brain_mode', enabled: brainMode });
	if (brainMode) {
	brainBubbleSttText.textContent = 'Listening…';
	brainBubbleTtsText.textContent =
	_normalizeVisibleAiText(brainLastResponse) \|\| 'Waiting…';
	brainVoiceActive = true;
	sidebarEl.classList.add('collapsed');
	sidebarToggle.textContent = '›';
	chatBox.scrollTop = chatBox.scrollHeight;
	textInput.blur();
	_brainModeSetSearch(
	isProcessing \|\| isListening \|\| isSpeaking \|\| _ttsPlaying,
	);
	// One-time welcome when entering brain mode (per page load).
	if (!_brainWelcomed) {
	_brainWelcomed = true;
	setTimeout(() => {
	if (!brainMode \|\| !brainVoiceActive) return;
	if (isProcessing \|\| isSpeaking \|\| _ttsPlaying) return;
	_brainSendWelcome();
	}, 0);
	}
	if (!isListening && !isProcessing && !isRecordingLocked) {
	setTimeout(() => {
	if (
	brainMode &&
	brainVoiceActive &&
	!isListening &&
	!isProcessing &&
	!isRecordingLocked
	) {
	_brainResumeListening();
	}
	}, 0);
	}
	} else {
	brainVoiceActive = false;
	clearTimeout(brainAutoRestartTimer);
	clearTimeout(brainRestartTimer);
	brainPendingAudio = null;
	sidebarEl.classList.remove('collapsed');
	sidebarToggle.textContent = '‹';
	_brainModeSetSearch(false);
	_brainSetSttBubble('');
	_brainSetTtsBubble('', false);
	}
	}

	function _sendVoiceControl(payload) {
	const packet = JSON.stringify(payload);
	if (voiceWS && voiceWS.readyState === WebSocket.OPEN) {
	try {
	voiceWS.send(packet);
	return;
	} catch {}
	}
	voicePendingPackets.push(packet);
	_connectVoice();
	}

	function _brainSendWelcome() {
	const payload = JSON.stringify({ type: 'speak', text: BRAIN_WELCOME_TEXT });
	if (!voiceWS \|\| voiceWS.readyState !== WebSocket.OPEN) {
	// If the socket is reconnecting, queue for later.
	voicePendingPackets.push(payload);
	_connectVoice();
	return;
	}
	try {
	appendThinking();
	voiceWS.send(payload);
	console.log('[Brain] welcome sent');
	} catch (err) {
	console.error('[Brain] welcome send failed:', err);
	voicePendingPackets.push(payload);
	_connectVoice();
	}
	}

	function _brainModeSetSearch(active) {
	if (!brainStage) return;
	brainStage.classList.toggle('searching', !!active);
	}

	function _brainSetSttBubble(text) {
	if (!brainBubbleStt \|\| !brainBubbleSttText) return;
	const value = _normalizeVisibleAiText(text).trim();
	brainBubbleSttText.textContent = value \|\| 'Listening…';
	brainBubbleStt.classList.toggle('active', !!value);
	}

	function _brainSetTtsBubble(text, active = true) {
	if (!brainBubbleTts \|\| !brainBubbleTtsText) return;
	const value = _normalizeVisibleAiText(text).trim();
	brainBubbleTtsText.textContent = value \|\| 'Waiting…';
	brainBubbleTts.classList.toggle('active', !!value \|\| !!active);
	brainBubbleTts.classList.toggle('speaking', !!active);
	}

	function _brainResumeListening() {
	if (
	!brainMode \|\|
	!brainVoiceActive \|\|
	isListening \|\|
	isProcessing \|\|
	isRecordingLocked
	) {
	return;
	}
	if (micStream && analyserCtx && analyser) {
	isListening = true;
	setMic('listening');
	setState('listening');
	voiceViz.classList.add('active');
	vadInt = setInterval(vadTick, VAD_MS);
	vizInt = setInterval(vizTick, 60);
	_brainModeSetSearch(false);
	console.log('[Brain] Mic re-armed');
	return;
	}
	startListening().catch((err) => {
	console.error('[Brain] resume failed:', err);
	});
	}

	function _queueBrainReconnect() {
	if (!brainMode \|\| !brainVoiceActive) return;
	clearTimeout(brainRestartTimer);
	brainRestartTimer = setTimeout(() => {
	if (!brainMode \|\| !brainVoiceActive) return;
	_flushBrainPendingAudio();
	}, 700);
	}

	function _flushVoicePendingPackets() {
	if (
	!voiceWS \|\|
	voiceWS.readyState !== WebSocket.OPEN \|\|
	!voicePendingPackets.length
	) {
	return;
	}
	const packets = voicePendingPackets.splice(0);
	for (const packet of packets) {
	try {
	voiceWS.send(packet);
	appendThinking();
	console.log('[Voice] queued packet flushed');
	} catch (err) {
	console.error('[Voice] flush failed:', err);
	voicePendingPackets.unshift(packet);
	_connectVoice();
	break;
	}
	}
	}

	function _flushBrainPendingAudio() {
	if (!brainPendingAudio) return;
	if (!voiceWS \|\| voiceWS.readyState !== WebSocket.OPEN) {
	_queueBrainReconnect();
	return;
	}
	const buf = brainPendingAudio;
	brainPendingAudio = null;
	try {
	appendThinking();
	voiceWS.send(buf);
	console.log('[Brain] queued utterance flushed');
	} catch (err) {
	console.error('[Brain] flush failed:', err);
	brainPendingAudio = buf;
	_queueBrainReconnect();
	}
	}

	sThreshold.value = SILENCE_DB;
	sThresholdVal.textContent = SILENCE_DB + ' dB';
	sThreshold.oninput = () => {
	SILENCE_DB = +sThreshold.value;
	sThresholdVal.textContent = SILENCE_DB + ' dB';
	};

	sTimeout.value = SILENCE_MS;
	sTimeoutVal.textContent = SILENCE_MS + ' ms';
	sTimeout.oninput = () => {
	SILENCE_MS = +sTimeout.value;
	sTimeoutVal.textContent = SILENCE_MS + ' ms';
	};

	sVoice.onchange = () => appendMsg('🔊 TTS voice: ' + sVoice.value, 'system');

	setInterval(() => {
	if (_inFlight > 0) _vizQ();
	}, 140);

	// ═══════════════════════════════════════════════════════════════════════════════
	// BOOT
	// ═══════════════════════════════════════════════════════════════════════════════
	boot();