ground-zero / src /iot /intent_parser.py
jefffffff9
Improve Bambara/Fula quality + add full correction UI
d351193
"""
Maps transcribed Bambara/Fula text to structured intents for IoT sensor queries.
Uses keyword matching (no ML required for v1).
Confidence = fraction of intent keywords present in the transcription.
"""
from __future__ import annotations
from dataclasses import dataclass, field
@dataclass
class Intent:
action: str # e.g., "check_soil", "check_weather"
entity: str # e.g., "soil", "weather"
parameters: dict = field(default_factory=dict)
confidence: float = 0.0
# Intent keyword taxonomy for Bambara (bam) and Fula (ful)
INTENT_KEYWORDS: dict[str, dict[str, list[str]]] = {
# Social / conversational
"greeting": {
"bam": ["i ni ce", "i ni sogoma", "i ni wula", "ani sogoma", "an ka"],
"ful": ["jam waali", "jam hiiri", "jam na", "no mbadda"],
"fr": ["bonjour", "bonsoir", "salut", "bonne nuit"],
"en": ["hello", "hi", "good morning", "good evening", "hey"],
},
"thanks": {
"bam": ["aw ni ce", "i ni ce barika", "a ni barika"],
"ful": ["jaraama", "barakallahu"],
"fr": ["merci", "je vous remercie"],
"en": ["thank", "thanks"],
},
"farewell": {
"bam": ["kana tɛmɛ", "i ka taa", "sini"],
"ful": ["yahdu jam", "o yahdu", "jemma"],
"fr": ["au revoir", "à bientôt", "bonne journée"],
"en": ["goodbye", "bye", "see you"],
},
# Agricultural
"check_soil": {
"bam": ["bunding", "nɔgɔ", "dugu", "foro", "sani"],
"ful": ["leydi", "ngesa", "ladde"],
"fr": ["sol", "terre", "humidité"],
"en": ["soil", "ground", "moisture", "dirt"],
},
"check_weather": {
"bam": ["teliman", "sanji", "dibi", "sira"],
"ful": ["yeeso", "fuɗorde", "ndiyam"],
"fr": ["météo", "temps", "pluie", "chaleur"],
"en": ["weather", "rain", "temperature", "hot"],
},
"irrigation_status": {
"bam": ["ji", "sanji", "foro ji"],
"ful": ["ndiyam", "ngesa ndiyam"],
"fr": ["irrigation", "arrosage", "eau"],
"en": ["irrigation", "water", "watering"],
},
"pest_alert": {
"bam": ["kungoloni", "suruku", "dɔgɔw"],
"ful": ["biñ-biñ", "kuuje"],
"fr": ["insecte", "nuisible", "ravageur"],
"en": ["pest", "insect", "bug"],
},
}
INTENT_ENTITIES = {
"greeting": "social",
"thanks": "social",
"farewell": "social",
"check_soil": "soil",
"check_weather": "weather",
"irrigation_status": "irrigation",
"pest_alert": "pest",
}
class IntentParser:
"""Parses a transcription string into a structured Intent."""
def parse(self, text: str, language: str) -> Intent:
"""
Find the best matching intent by counting keyword overlaps.
Returns the highest-confidence intent.
"""
text_lower = text.lower()
best_action = "unknown"
best_confidence = 0.0
for action, lang_keywords in INTENT_KEYWORDS.items():
keywords = lang_keywords.get(language, [])
if not keywords:
continue
matches = sum(1 for kw in keywords if kw in text_lower)
confidence = matches / len(keywords)
if confidence > best_confidence:
best_confidence = confidence
best_action = action
return Intent(
action=best_action,
entity=INTENT_ENTITIES.get(best_action, "unknown"),
confidence=round(best_confidence, 3),
)