File size: 6,530 Bytes
f75c5b2
 
 
 
 
a3fc1ff
 
f75c5b2
 
 
 
 
 
 
 
 
a3fc1ff
f75c5b2
 
 
 
a3fc1ff
f75c5b2
 
 
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
 
 
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
 
 
a3fc1ff
 
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
 
 
f75c5b2
a3fc1ff
 
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
 
 
 
 
 
f75c5b2
a3fc1ff
f75c5b2
 
a3fc1ff
f75c5b2
 
 
a3fc1ff
f75c5b2
 
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
 
 
 
a3fc1ff
 
f75c5b2
 
a3fc1ff
 
f75c5b2
 
 
 
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
 
 
a3fc1ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f75c5b2
 
 
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
a3fc1ff
f75c5b2
 
a3fc1ff
f75c5b2
 
a3fc1ff
 
f75c5b2
 
a3fc1ff
f75c5b2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
"""
test_hindi_stt.py — Test Hindi speech-to-text support
"""
import sys
from pathlib import Path
SCRIPT_DIR = Path(__file__).parent
sys.path.insert(0, str(SCRIPT_DIR))

from utils.logger import logger
import inspect

def test_hindi_stt_fixed():
    """Test that Hindi STT support is now fixed"""
    logger.info("=" * 70)
    logger.info("TESTING: Hindi Speech-to-Text Support")
    logger.info("=" * 70)

    # Test 1: Check transcriber1.py has Hindi support
    logger.info("\n1. Checking transcriber1.py for Hindi support parameters...")
    try:
        from speech_module.transcriber1 import SpeechTranscriber, WhisperTranscriber

        # Check SpeechTranscriber.transcribe signature
        sig = inspect.signature(SpeechTranscriber.transcribe)
        params = list(sig.parameters.keys())

        logger.info(f"   SpeechTranscriber.transcribe() parameters: {params}")

        if 'language' in params and 'task' in params:
            logger.info("   [PASS] FIXED: language and task parameters present")
        else:
            logger.error("   [FAIL] FAILED: language or task parameters missing")
            return False

        # Check WhisperTranscriber.transcribe signature
        sig_whisper = inspect.signature(WhisperTranscriber.transcribe)
        params_whisper = list(sig_whisper.parameters.keys())

        logger.info(f"   WhisperTranscriber.transcribe() parameters: {params_whisper}")

        if 'language' in params_whisper and 'task' in params_whisper:
            logger.info("   [PASS] FIXED: WhisperTranscriber has Hindi support")
        else:
            logger.error("   [FAIL] FAILED: WhisperTranscriber missing parameters")
            return False

    except Exception as e:
        logger.error(f"   [FAIL] FAILED: {e}")
        return False

    # Test 2: Check __init__.py imports from transcriber1
    logger.info("\n2. Checking speech_module/__init__.py imports...")
    try:
        init_path = SCRIPT_DIR / "speech_module" / "__init__.py"
        with open(init_path, "r") as f:
            init_content = f.read()

        if "transcriber1" in init_content:
            logger.info("   [PASS] __init__.py imports from transcriber1.py")
        else:
            logger.error("   [FAIL] __init__.py does not import from transcriber1.py")
            return False

    except Exception as e:
        logger.error(f"   [FAIL] FAILED: {e}")
        return False

    # Test 3: Check app.py has language selection (Hindi audio tab)
    logger.info("\n3. Checking app.py for Hindi language support...")
    try:
        app_path = SCRIPT_DIR / "app.py"
        with open(app_path, "r", encoding="utf-8") as f:
            app_content = f.read()

        checks = {
            "transcribe_audio function has language parameter": 'language: str = None' in app_content,
            "analyze_hindi_audio function exists": 'def analyze_hindi_audio(audio_path):' in app_content,
            "Hindi audio tab exists": 'Hindi audio' in app_content,
            "task=translate for Hindi": 'task="translate"' in app_content,
            "language=hi for Hindi": 'language="hi"' in app_content,
            "analyze_english_audio function exists": 'def analyze_english_audio(audio_path):' in app_content,
        }

        all_passed = True
        for check_name, result in checks.items():
            status = "[PASS]" if result else "[FAIL]"
            logger.info(f"   {status} {check_name}")
            if not result:
                all_passed = False

        if not all_passed:
            return False

    except Exception as e:
        logger.error(f"   [FAIL] FAILED: {e}")
        return False

    # Test 4: Import and verify the updated modules work
    logger.info("\n4. Testing import and initialization...")
    try:
        from speech_module import SpeechTranscriber
        logger.info("   [PASS] SpeechTranscriber imported successfully")

        # Try to instantiate
        transcriber = SpeechTranscriber()
        logger.info("   [PASS] SpeechTranscriber instantiated successfully")

        # Check method exists and has right signature
        method = getattr(transcriber, 'transcribe')
        sig = inspect.signature(method)
        if 'language' in sig.parameters:
            logger.info("   [PASS] transcribe method accepts language parameter")
        else:
            logger.error("   [FAIL] transcribe method missing language parameter")
            return False

    except Exception as e:
        logger.error(f"   [FAIL] FAILED: {e}")
        import traceback
        traceback.print_exc()
        return False

    # Test 5: Verify anti-hallucination parameters in transcriber1.py
    logger.info("\n5. Checking anti-hallucination decode parameters...")
    try:
        t1_path = SCRIPT_DIR / "speech_module" / "transcriber1.py"
        with open(t1_path, "r", encoding="utf-8") as f:
            t1_content = f.read()

        checks = {
            "temperature=0.0 set": '"temperature": 0.0' in t1_content,
            "condition_on_previous_text=False": '"condition_on_previous_text": False' in t1_content,
            "initial_prompt set": '"initial_prompt"' in t1_content,
            "without_timestamps=True": '"without_timestamps": True' in t1_content,
            "suppress_tokens set": '"suppress_tokens"' in t1_content,
        }

        all_passed = True
        for check_name, result in checks.items():
            status = "[PASS]" if result else "[FAIL]"
            logger.info(f"   {status} {check_name}")
            if not result:
                all_passed = False

        if not all_passed:
            return False

    except Exception as e:
        logger.error(f"   [FAIL] FAILED: {e}")
        return False

    return True

if __name__ == "__main__":
    logger.info("\n[HINDI STT CONFIGURATION TEST]")
    logger.info("This test verifies that Hindi speech-to-text support is properly configured.")

    success = test_hindi_stt_fixed()

    logger.info("\n" + "=" * 70)
    if success:
        logger.info("[PASS] ALL TESTS PASSED - Hindi STT support is now fully configured!")
        logger.info("\nYou can now:")
        logger.info("  1. Upload/record Hindi audio")
        logger.info("  2. Select 'Hindi audio' tab")
        logger.info("  3. Click 'Transcribe Hindi & analyze'")
        logger.info("  4. Whisper will transcribe and translate to English automatically")
    else:
        logger.error("[FAIL] SOME TESTS FAILED - Please review the errors above")
        sys.exit(1)
    logger.info("=" * 70)