Commit ·
32a135f
1
Parent(s): ebce6ef
Fix FIX-42b: Allow first-letter changes when root lengths differ
Browse filesFIX-42b was blocking الولاد→الأولاد because after stripping ال,
roots ولاد(4)→أولاد(5) have different first letters. But this is
a valid hamza-addition correction, not a consonant swap.
Fix: only block when roots have SAME length (true consonant swap).
Different-length roots indicate character addition/removal which is OK.
Blocked: افهمه(5)→تفهمة(5) ✓, واحتاج(5)→وتحتاج(5) ✓
Allowed: ولاد(4)→أولاد(5) ✓
Tests: 39 passing.
- src/app.py +3 -0
src/app.py
CHANGED
|
@@ -864,8 +864,11 @@ def _is_small_spelling_change(orig_word, corr_word, vocab_manager=None):
|
|
| 864 |
_c_root = _c_root[len(_pfx):]
|
| 865 |
break
|
| 866 |
# If roots start with different letters AND this isn't an orthographic pair
|
|
|
|
|
|
|
| 867 |
_HAMZA_CHARS = set('أإآاء')
|
| 868 |
if (_o_root and _c_root and _o_root[0] != _c_root[0]
|
|
|
|
| 869 |
and not (_o_root[0] in _HAMZA_CHARS and _c_root[0] in _HAMZA_CHARS)):
|
| 870 |
logger.info(
|
| 871 |
f"[SPELLING] Blocked first-letter change: '{orig_word}'→'{corr_word}' "
|
|
|
|
| 864 |
_c_root = _c_root[len(_pfx):]
|
| 865 |
break
|
| 866 |
# If roots start with different letters AND this isn't an orthographic pair
|
| 867 |
+
# AND roots have same length (true consonant swap, not a character addition)
|
| 868 |
+
# Exception: الولاد→الأولاد has roots ولاد(4)→أولاد(5) — different length = allow
|
| 869 |
_HAMZA_CHARS = set('أإآاء')
|
| 870 |
if (_o_root and _c_root and _o_root[0] != _c_root[0]
|
| 871 |
+
and len(_o_root) == len(_c_root) # same-length roots only
|
| 872 |
and not (_o_root[0] in _HAMZA_CHARS and _c_root[0] in _HAMZA_CHARS)):
|
| 873 |
logger.info(
|
| 874 |
f"[SPELLING] Blocked first-letter change: '{orig_word}'→'{corr_word}' "
|