Commit ·
e653046
1
Parent(s): d85261e
fix: Relax analyze post-filters to show AraSpell corrections in editor - Allow 3 char edits (was 2), 50% ratio (was 40%) - Lower word-split threshold from 12 to 5 chars
Browse files- src/app.py +5 -4
src/app.py
CHANGED
|
@@ -659,8 +659,9 @@ def _is_small_spelling_change(orig_word, corr_word):
|
|
| 659 |
dist = _levenshtein(orig_word, corr_word)
|
| 660 |
max_len = max(len(orig_word), len(corr_word))
|
| 661 |
|
| 662 |
-
# Allow at most
|
| 663 |
-
|
|
|
|
| 664 |
|
| 665 |
|
| 666 |
@app.route('/api/analyze', methods=['POST'])
|
|
@@ -740,9 +741,9 @@ def analyze_text():
|
|
| 740 |
else:
|
| 741 |
new_words.append(current_text[start_idx:end_idx])
|
| 742 |
elif len(o_segment) == 1 and len(c_segment) > 1:
|
| 743 |
-
# 1-word → N words: accept
|
| 744 |
o_word = o_segment[0]
|
| 745 |
-
if len(o_word) >=
|
| 746 |
corr_str = " ".join(c_segment)
|
| 747 |
new_words.append(corr_str)
|
| 748 |
suggestions.append({
|
|
|
|
| 659 |
dist = _levenshtein(orig_word, corr_word)
|
| 660 |
max_len = max(len(orig_word), len(corr_word))
|
| 661 |
|
| 662 |
+
# Allow at most 3 character edits and at most 50% of the word
|
| 663 |
+
# AraSpell has its own validation pipeline, so we can be more permissive here
|
| 664 |
+
return dist <= 3 and (dist / max_len) <= 0.5
|
| 665 |
|
| 666 |
|
| 667 |
@app.route('/api/analyze', methods=['POST'])
|
|
|
|
| 741 |
else:
|
| 742 |
new_words.append(current_text[start_idx:end_idx])
|
| 743 |
elif len(o_segment) == 1 and len(c_segment) > 1:
|
| 744 |
+
# 1-word → N words: accept word splits (e.g. فيالمدرسة → في المدرسة)
|
| 745 |
o_word = o_segment[0]
|
| 746 |
+
if len(o_word) >= 5 and ' ' not in o_word:
|
| 747 |
corr_str = " ".join(c_segment)
|
| 748 |
new_words.append(corr_str)
|
| 749 |
suggestions.append({
|