Spaces:
Running
Running
| """ | |
| Maps transcribed Bambara/Fula text to structured intents for IoT sensor queries. | |
| Uses keyword matching (no ML required for v1). | |
| Confidence = fraction of intent keywords present in the transcription. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import dataclass, field | |
| class Intent: | |
| action: str # e.g., "check_soil", "check_weather" | |
| entity: str # e.g., "soil", "weather" | |
| parameters: dict = field(default_factory=dict) | |
| confidence: float = 0.0 | |
| # Intent keyword taxonomy for Bambara (bam) and Fula (ful) | |
| INTENT_KEYWORDS: dict[str, dict[str, list[str]]] = { | |
| # Social / conversational | |
| "greeting": { | |
| "bam": ["i ni ce", "i ni sogoma", "i ni wula", "ani sogoma", "an ka"], | |
| "ful": ["jam waali", "jam hiiri", "jam na", "no mbadda"], | |
| "fr": ["bonjour", "bonsoir", "salut", "bonne nuit"], | |
| "en": ["hello", "hi", "good morning", "good evening", "hey"], | |
| }, | |
| "thanks": { | |
| "bam": ["aw ni ce", "i ni ce barika", "a ni barika"], | |
| "ful": ["jaraama", "barakallahu"], | |
| "fr": ["merci", "je vous remercie"], | |
| "en": ["thank", "thanks"], | |
| }, | |
| "farewell": { | |
| "bam": ["kana tɛmɛ", "i ka taa", "sini"], | |
| "ful": ["yahdu jam", "o yahdu", "jemma"], | |
| "fr": ["au revoir", "à bientôt", "bonne journée"], | |
| "en": ["goodbye", "bye", "see you"], | |
| }, | |
| # Agricultural | |
| "check_soil": { | |
| "bam": ["bunding", "nɔgɔ", "dugu", "foro", "sani"], | |
| "ful": ["leydi", "ngesa", "ladde"], | |
| "fr": ["sol", "terre", "humidité"], | |
| "en": ["soil", "ground", "moisture", "dirt"], | |
| }, | |
| "check_weather": { | |
| "bam": ["teliman", "sanji", "dibi", "sira"], | |
| "ful": ["yeeso", "fuɗorde", "ndiyam"], | |
| "fr": ["météo", "temps", "pluie", "chaleur"], | |
| "en": ["weather", "rain", "temperature", "hot"], | |
| }, | |
| "irrigation_status": { | |
| "bam": ["ji", "sanji", "foro ji"], | |
| "ful": ["ndiyam", "ngesa ndiyam"], | |
| "fr": ["irrigation", "arrosage", "eau"], | |
| "en": ["irrigation", "water", "watering"], | |
| }, | |
| "pest_alert": { | |
| "bam": ["kungoloni", "suruku", "dɔgɔw"], | |
| "ful": ["biñ-biñ", "kuuje"], | |
| "fr": ["insecte", "nuisible", "ravageur"], | |
| "en": ["pest", "insect", "bug"], | |
| }, | |
| } | |
| INTENT_ENTITIES = { | |
| "greeting": "social", | |
| "thanks": "social", | |
| "farewell": "social", | |
| "check_soil": "soil", | |
| "check_weather": "weather", | |
| "irrigation_status": "irrigation", | |
| "pest_alert": "pest", | |
| } | |
| class IntentParser: | |
| """Parses a transcription string into a structured Intent.""" | |
| def parse(self, text: str, language: str) -> Intent: | |
| """ | |
| Find the best matching intent by counting keyword overlaps. | |
| Returns the highest-confidence intent. | |
| """ | |
| text_lower = text.lower() | |
| best_action = "unknown" | |
| best_confidence = 0.0 | |
| for action, lang_keywords in INTENT_KEYWORDS.items(): | |
| keywords = lang_keywords.get(language, []) | |
| if not keywords: | |
| continue | |
| matches = sum(1 for kw in keywords if kw in text_lower) | |
| confidence = matches / len(keywords) | |
| if confidence > best_confidence: | |
| best_confidence = confidence | |
| best_action = action | |
| return Intent( | |
| action=best_action, | |
| entity=INTENT_ENTITIES.get(best_action, "unknown"), | |
| confidence=round(best_confidence, 3), | |
| ) | |