youssefreda9 commited on
Commit
e653046
·
1 Parent(s): d85261e

fix: Relax analyze post-filters to show AraSpell corrections in editor - Allow 3 char edits (was 2), 50% ratio (was 40%) - Lower word-split threshold from 12 to 5 chars

Browse files
Files changed (1) hide show
  1. src/app.py +5 -4
src/app.py CHANGED
@@ -659,8 +659,9 @@ def _is_small_spelling_change(orig_word, corr_word):
659
  dist = _levenshtein(orig_word, corr_word)
660
  max_len = max(len(orig_word), len(corr_word))
661
 
662
- # Allow at most 2 character edits and at most 40% of the word
663
- return dist <= 2 and (dist / max_len) <= 0.4
 
664
 
665
 
666
  @app.route('/api/analyze', methods=['POST'])
@@ -740,9 +741,9 @@ def analyze_text():
740
  else:
741
  new_words.append(current_text[start_idx:end_idx])
742
  elif len(o_segment) == 1 and len(c_segment) > 1:
743
- # 1-word → N words: accept when original is long (likely concatenated)
744
  o_word = o_segment[0]
745
- if len(o_word) >= 12 and ' ' not in o_word:
746
  corr_str = " ".join(c_segment)
747
  new_words.append(corr_str)
748
  suggestions.append({
 
659
  dist = _levenshtein(orig_word, corr_word)
660
  max_len = max(len(orig_word), len(corr_word))
661
 
662
+ # Allow at most 3 character edits and at most 50% of the word
663
+ # AraSpell has its own validation pipeline, so we can be more permissive here
664
+ return dist <= 3 and (dist / max_len) <= 0.5
665
 
666
 
667
  @app.route('/api/analyze', methods=['POST'])
 
741
  else:
742
  new_words.append(current_text[start_idx:end_idx])
743
  elif len(o_segment) == 1 and len(c_segment) > 1:
744
+ # 1-word → N words: accept word splits (e.g. فيالمدرسة في المدرسة)
745
  o_word = o_segment[0]
746
+ if len(o_word) >= 5 and ' ' not in o_word:
747
  corr_str = " ".join(c_segment)
748
  new_words.append(corr_str)
749
  suggestions.append({