Spaces:
Sleeping
Sleeping
| """HTML diff highlighting utilities.""" | |
| import difflib | |
| import html | |
| # Visible markers for whitespace differences so they don't become invisible | |
| # inside colored diff spans. | |
| # ␣ (U+2423 OPEN BOX) for regular space | |
| # ↵ (U+21B5 DOWNWARDS ARROW WITH CORNER LEFTWARDS) for newline | |
| # → (U+2192 RIGHTWARDS ARROW) for tab | |
| _WHITESPACE_MARKERS = { | |
| " ": "␣", | |
| "\t": "→", | |
| "\u00a0": "␣", # non-breaking space | |
| } | |
| def _visualize_whitespace(escaped: str) -> str: | |
| """Replace whitespace chars with visible markers inside a diff span. | |
| Operates on already HTML-escaped text. Newlines are handled separately | |
| by the final `\n` -> `<br>` pass, but we mark them inline too so the | |
| reader sees *where* a newline was inserted/removed before the <br>. | |
| """ | |
| parts: list[str] = [] | |
| for ch in escaped: | |
| if ch == "\n": | |
| parts.append( | |
| '<span style="opacity:0.6;">↵</span>\n' | |
| ) # marker + real newline (for <br>) | |
| elif ch in _WHITESPACE_MARKERS: | |
| parts.append(f'<span style="opacity:0.6;">{_WHITESPACE_MARKERS[ch]}</span>') | |
| else: | |
| parts.append(ch) | |
| return "".join(parts) | |
| def _render_segment(escaped: str) -> str: | |
| """Render a diff segment, adding whitespace markers only when the whole | |
| segment is whitespace. | |
| Rationale: a pure-whitespace diff (e.g. a single inserted space) is | |
| otherwise invisible, so we show ␣/↵/→. But when the segment already | |
| contains visible text, the colored background is enough — marking the | |
| incidental spaces would just add noise to word-level edits. | |
| """ | |
| if escaped and escaped.strip() == "": | |
| return _visualize_whitespace(escaped) | |
| return escaped | |
| def highlight_diff(original: str, corrected: str) -> str: | |
| """Generate HTML highlighting differences between original and corrected text. | |
| Uses character-level difflib.SequenceMatcher to produce inline HTML | |
| with strikethrough for deletions and green highlights for insertions. | |
| Whitespace changes are rendered with visible markers (␣ for space, | |
| ↵ for newline) so spacing-only edits are perceivable. | |
| Args: | |
| original: Original text. | |
| corrected: Corrected text. | |
| Returns: | |
| HTML string with diff highlights. Safe against XSS. | |
| """ | |
| # Escape HTML entities BEFORE diffing to prevent XSS | |
| original_escaped = html.escape(original) | |
| corrected_escaped = html.escape(corrected) | |
| matcher = difflib.SequenceMatcher(None, original_escaped, corrected_escaped) | |
| result_parts: list[str] = [] | |
| del_style = "background:#ffecec;text-decoration:line-through;" | |
| ins_style = "background:#e6ffec;" | |
| for tag, i1, i2, j1, j2 in matcher.get_opcodes(): | |
| orig_seg = original_escaped[i1:i2] | |
| corr_seg = corrected_escaped[j1:j2] | |
| if tag == "equal": | |
| result_parts.append(f"<span>{orig_seg}</span>") | |
| elif tag == "replace": | |
| result_parts.append( | |
| f'<span style="{del_style}">{_render_segment(orig_seg)}</span>' | |
| ) | |
| result_parts.append( | |
| f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>' | |
| ) | |
| elif tag == "delete": | |
| result_parts.append( | |
| f'<span style="{del_style}">{_render_segment(orig_seg)}</span>' | |
| ) | |
| elif tag == "insert": | |
| result_parts.append( | |
| f'<span style="{ins_style}">{_render_segment(corr_seg)}</span>' | |
| ) | |
| result_html = "".join(result_parts) | |
| # Convert real newlines (outside diff spans or inside equal spans) to <br> | |
| return result_html.replace("\n", "<br>") | |