File size: 3,532 Bytes
76db545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d351193
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76db545
 
 
d351193
 
76db545
 
 
d351193
 
 
76db545
 
d351193
 
 
 
76db545
 
d351193
 
 
 
76db545
 
 
 
d351193
 
 
76db545
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
"""
Maps transcribed Bambara/Fula text to structured intents for IoT sensor queries.
Uses keyword matching (no ML required for v1).
Confidence = fraction of intent keywords present in the transcription.
"""
from __future__ import annotations

from dataclasses import dataclass, field


@dataclass
class Intent:
    action: str          # e.g., "check_soil", "check_weather"
    entity: str          # e.g., "soil", "weather"
    parameters: dict = field(default_factory=dict)
    confidence: float = 0.0


# Intent keyword taxonomy for Bambara (bam) and Fula (ful)
INTENT_KEYWORDS: dict[str, dict[str, list[str]]] = {
    # Social / conversational
    "greeting": {
        "bam": ["i ni ce", "i ni sogoma", "i ni wula", "ani sogoma", "an ka"],
        "ful": ["jam waali", "jam hiiri", "jam na", "no mbadda"],
        "fr":  ["bonjour", "bonsoir", "salut", "bonne nuit"],
        "en":  ["hello", "hi", "good morning", "good evening", "hey"],
    },
    "thanks": {
        "bam": ["aw ni ce", "i ni ce barika", "a ni barika"],
        "ful": ["jaraama", "barakallahu"],
        "fr":  ["merci", "je vous remercie"],
        "en":  ["thank", "thanks"],
    },
    "farewell": {
        "bam": ["kana tɛmɛ", "i ka taa", "sini"],
        "ful": ["yahdu jam", "o yahdu", "jemma"],
        "fr":  ["au revoir", "à bientôt", "bonne journée"],
        "en":  ["goodbye", "bye", "see you"],
    },
    # Agricultural
    "check_soil": {
        "bam": ["bunding", "nɔgɔ", "dugu", "foro", "sani"],
        "ful": ["leydi", "ngesa", "ladde"],
        "fr":  ["sol", "terre", "humidité"],
        "en":  ["soil", "ground", "moisture", "dirt"],
    },
    "check_weather": {
        "bam": ["teliman", "sanji", "dibi", "sira"],
        "ful": ["yeeso", "fuɗorde", "ndiyam"],
        "fr":  ["météo", "temps", "pluie", "chaleur"],
        "en":  ["weather", "rain", "temperature", "hot"],
    },
    "irrigation_status": {
        "bam": ["ji", "sanji", "foro ji"],
        "ful": ["ndiyam", "ngesa ndiyam"],
        "fr":  ["irrigation", "arrosage", "eau"],
        "en":  ["irrigation", "water", "watering"],
    },
    "pest_alert": {
        "bam": ["kungoloni", "suruku", "dɔgɔw"],
        "ful": ["biñ-biñ", "kuuje"],
        "fr":  ["insecte", "nuisible", "ravageur"],
        "en":  ["pest", "insect", "bug"],
    },
}

INTENT_ENTITIES = {
    "greeting": "social",
    "thanks": "social",
    "farewell": "social",
    "check_soil": "soil",
    "check_weather": "weather",
    "irrigation_status": "irrigation",
    "pest_alert": "pest",
}


class IntentParser:
    """Parses a transcription string into a structured Intent."""

    def parse(self, text: str, language: str) -> Intent:
        """
        Find the best matching intent by counting keyword overlaps.
        Returns the highest-confidence intent.
        """
        text_lower = text.lower()
        best_action = "unknown"
        best_confidence = 0.0

        for action, lang_keywords in INTENT_KEYWORDS.items():
            keywords = lang_keywords.get(language, [])
            if not keywords:
                continue

            matches = sum(1 for kw in keywords if kw in text_lower)
            confidence = matches / len(keywords)

            if confidence > best_confidence:
                best_confidence = confidence
                best_action = action

        return Intent(
            action=best_action,
            entity=INTENT_ENTITIES.get(best_action, "unknown"),
            confidence=round(best_confidence, 3),
        )