"""
Compare two HTML files with a unified diff.
"""
import argparse
import difflib
from pathlib import Path
def normalize(text: str, ignore_whitespace: bool) -> str:
text = text.replace("\r\n", "\n").replace("\r", "\n")
if ignore_whitespace:
return " ".join(text.split())
return text
def main() -> int:
parser = argparse.ArgumentParser(description="Compare HTML files.")
parser.add_argument("--baseline", type=Path, required=True, help="Baseline HTML path")
parser.add_argument("--candidate", type=Path, required=True, help="Candidate HTML path")
parser.add_argument("--ignore-whitespace", action="store_true", help="Normalize whitespace before diff")
parser.add_argument("--max-lines", type=int, default=200, help="Max diff lines to print")
args = parser.parse_args()
base_text = normalize(args.baseline.read_text(encoding="utf-8"), args.ignore_whitespace)
cand_text = normalize(args.candidate.read_text(encoding="utf-8"), args.ignore_whitespace)
base_lines = base_text.splitlines(keepends=True)
cand_lines = cand_text.splitlines(keepends=True)
diff = list(difflib.unified_diff(base_lines, cand_lines, fromfile=str(args.baseline), tofile=str(args.candidate)))
if not diff:
print("No differences found.")
return 0
print("Differences found:")
for line in diff[: args.max_lines]:
print(line, end="")
if len(diff) > args.max_lines:
print(f"\n... truncated ({len(diff)} total diff lines).")
return 1
if __name__ == "__main__":
raise SystemExit(main())