Commit ·
c8cc451
1
Parent(s): 7954757
FIX-25: Block grammar punct spacing (حالك؟→حالك ؟)
Browse filesGrammar model inserts spaces around punctuation marks which is wrong
in Arabic. This affects P012-P015, H026, SC015-SC017 and more.
Traced from full failure analysis: 10+ failures share this pattern.
Added normalize-then-compare filter to catch spacing-only diffs.
- src/app.py +21 -0
- tests/phase10/reports/phase10_results.json +0 -0
src/app.py
CHANGED
|
@@ -1672,6 +1672,27 @@ def analyze_text():
|
|
| 1672 |
)
|
| 1673 |
continue
|
| 1674 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1675 |
# ── FIX-06: Directional block protection for grammar ──
|
| 1676 |
# Prevents meaning-changing substitutions (كان→كأن etc.)
|
| 1677 |
# especially critical when spelling is skipped (>1000 chars).
|
|
|
|
| 1672 |
)
|
| 1673 |
continue
|
| 1674 |
|
| 1675 |
+
# ── FIX-25: Grammar punctuation spacing blocker ──
|
| 1676 |
+
# The grammar model inserts spaces around punctuation:
|
| 1677 |
+
# e.g., 'حالك؟' → 'حالك ؟', 'المكتبة،' → 'المكتبة ،'
|
| 1678 |
+
# Block diffs where the only change is spacing around punct.
|
| 1679 |
+
if orig_text and corr_text:
|
| 1680 |
+
import re as _re_psp
|
| 1681 |
+
# Normalize: collapse spaces around common punct marks
|
| 1682 |
+
def _norm_punct_spacing(t):
|
| 1683 |
+
# Remove spaces before/after common punct
|
| 1684 |
+
t = _re_psp.sub(r'\s+([.,:;!?\u060C\u061B\u061F\u0021%$)}\]>])', r'\1', t)
|
| 1685 |
+
t = _re_psp.sub(r'([({\[<])\s+', r'\1', t)
|
| 1686 |
+
return t
|
| 1687 |
+
_orig_normed = _norm_punct_spacing(orig_text)
|
| 1688 |
+
_corr_normed = _norm_punct_spacing(corr_text)
|
| 1689 |
+
if _orig_normed == _corr_normed and orig_text != corr_text:
|
| 1690 |
+
logger.info(
|
| 1691 |
+
f"[GRAMMAR] Blocked punct spacing: "
|
| 1692 |
+
f"'{orig_text}'\u2192'{corr_text}'"
|
| 1693 |
+
)
|
| 1694 |
+
continue
|
| 1695 |
+
|
| 1696 |
# ── FIX-06: Directional block protection for grammar ──
|
| 1697 |
# Prevents meaning-changing substitutions (كان→كأن etc.)
|
| 1698 |
# especially critical when spelling is skipped (>1000 chars).
|
tests/phase10/reports/phase10_results.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|