from __future__ import annotations import re from collections import defaultdict from collections.abc import Mapping, Sequence from dataclasses import dataclass from typing import Any HTML_COMMENT_PATTERN = re.compile(r"", re.DOTALL) DEFAULT_TEMPLATE_CLEANUP_MODE = "merge_defaults" DEFAULT_STRIP_HTML_COMMENTS = True DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX = True DEFAULT_TEMPLATE_SECTION_PATTERNS = ( r"^#{1,6}\s*code agent policy\s*$", r"^#{1,6}\s*before submitting\s*$", r"^#{1,6}\s*who can review\?\s*$", ) DEFAULT_TEMPLATE_LINE_PATTERNS = ( r"^#{1,6}\s*what does this pr do\?\s*$", r"^(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s*#\s*\(?issue\)?\s*$", ) PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN = re.compile( r""" ^ (?P\s*(?:fix(?:e[sd])?|close[sd]?|resolve[sd]?)\s+) (?: (?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+ (?:\s*(?:,|and)\s*(?:[a-z0-9_.-]+/[a-z0-9_.-]+)?\#\s*\d+)* ) \s*(?:[:\-\u2013\u2014]\s*)? (?P.*) $ """, re.IGNORECASE | re.VERBOSE, ) def compile_casefold_patterns(patterns: Sequence[str]) -> tuple[re.Pattern[str], ...]: return tuple(re.compile(pattern, re.IGNORECASE) for pattern in patterns if pattern.strip()) @dataclass(slots=True, frozen=True) class TemplateCleanupSettings: strip_html_comments: bool trim_closing_reference_prefix: bool section_patterns: tuple[re.Pattern[str], ...] line_patterns: tuple[re.Pattern[str], ...] def build_template_cleanup_settings( *, mode: str = DEFAULT_TEMPLATE_CLEANUP_MODE, strip_html_comments: bool = DEFAULT_STRIP_HTML_COMMENTS, trim_closing_reference_prefix: bool = DEFAULT_TRIM_CLOSING_REFERENCE_PREFIX, section_patterns: Sequence[str] = (), line_patterns: Sequence[str] = (), ) -> TemplateCleanupSettings: if mode == "off": return TemplateCleanupSettings( strip_html_comments=False, trim_closing_reference_prefix=False, section_patterns=(), line_patterns=(), ) if mode == "merge_defaults": section_sources = (*DEFAULT_TEMPLATE_SECTION_PATTERNS, *section_patterns) line_sources = (*DEFAULT_TEMPLATE_LINE_PATTERNS, *line_patterns) elif mode == "replace_defaults": section_sources = tuple(section_patterns) line_sources = tuple(line_patterns) else: raise ValueError(f"Unknown PR template cleanup mode: {mode}") return TemplateCleanupSettings( strip_html_comments=strip_html_comments, trim_closing_reference_prefix=trim_closing_reference_prefix, section_patterns=compile_casefold_patterns(section_sources), line_patterns=compile_casefold_patterns(line_sources), ) def strip_pull_request_template( body: str | None, *, settings: TemplateCleanupSettings | None = None, ) -> str: text = (body or "").replace("\r\n", "\n").replace("\r", "\n") if not text: return "" cleanup = settings or build_template_cleanup_settings() if cleanup.strip_html_comments: text = HTML_COMMENT_PATTERN.sub("\n", text) cleaned_lines: list[str] = [] skip_section = False for raw_line in text.splitlines(): line = raw_line.rstrip() normalized = line.strip() if any(pattern.match(normalized) for pattern in cleanup.line_patterns): continue if any(pattern.match(normalized) for pattern in cleanup.section_patterns): skip_section = True continue if skip_section: if normalized.startswith("#"): skip_section = False else: continue if cleanup.trim_closing_reference_prefix: trimmed_reference = _trim_closing_reference_prefix(normalized) if trimmed_reference == "": continue if trimmed_reference is not None: cleaned_lines.append(trimmed_reference) continue cleaned_lines.append(line) return collapse_blank_lines(cleaned_lines) def collapse_blank_lines(lines: list[str]) -> str: collapsed: list[str] = [] previous_blank = True for line in lines: stripped = line.strip() if not stripped: if previous_blank: continue collapsed.append("") previous_blank = True continue collapsed.append(stripped) previous_blank = False while collapsed and not collapsed[-1]: collapsed.pop() return "\n".join(collapsed) def _trim_closing_reference_prefix(line: str) -> str | None: match = PR_TEMPLATE_CLOSING_REFERENCE_PREFIX_PATTERN.match(line) if match is None: return None return match.group("rest").strip() @dataclass(slots=True, frozen=True) class ClusterSuppressionRule: id: str title_patterns: tuple[re.Pattern[str], ...] = () body_patterns: tuple[re.Pattern[str], ...] = () path_patterns: tuple[re.Pattern[str], ...] = () def matches(self, *, title: str, body: str, paths: Sequence[str]) -> bool: if not (self.title_patterns or self.body_patterns or self.path_patterns): return False if self.title_patterns and not any( pattern.search(title) for pattern in self.title_patterns ): return False if self.body_patterns and not any(pattern.search(body) for pattern in self.body_patterns): return False return not self.path_patterns or any( pattern.search(path) for pattern in self.path_patterns for path in paths ) def compile_cluster_suppression_rules( payload: Sequence[Mapping[str, Any]], ) -> tuple[ClusterSuppressionRule, ...]: rules: list[ClusterSuppressionRule] = [] for index, raw_rule in enumerate(payload, start=1): rule_id = str(raw_rule.get("id") or raw_rule.get("name") or f"rule-{index}").strip() if not rule_id: rule_id = f"rule-{index}" rules.append( ClusterSuppressionRule( id=rule_id, title_patterns=compile_casefold_patterns( _string_list(raw_rule.get("title_patterns")) ), body_patterns=compile_casefold_patterns( _string_list(raw_rule.get("body_patterns")) ), path_patterns=compile_casefold_patterns( _string_list(raw_rule.get("path_patterns")) ), ) ) return tuple(rules) def suppressed_pull_request_reasons( pull_requests: Sequence[Mapping[str, Any]], pr_files: Sequence[Mapping[str, Any]], rules: Sequence[ClusterSuppressionRule], ) -> dict[int, list[str]]: if not rules: return {} paths_by_pr: defaultdict[int, list[str]] = defaultdict(list) for row in pr_files: pr_number = row.get("pull_request_number") filename = str(row.get("filename") or "").strip() if pr_number is None or not filename: continue paths_by_pr[int(pr_number)].append(filename) suppressed: dict[int, list[str]] = {} for row in pull_requests: number = row.get("number") if number is None: continue pr_number = int(number) title = str(row.get("title") or "") body = str(row.get("body") or "") matched = [ rule.id for rule in rules if rule.matches(title=title, body=body, paths=paths_by_pr.get(pr_number, [])) ] if matched: suppressed[pr_number] = matched return suppressed def _string_list(value: Any) -> tuple[str, ...]: if not isinstance(value, list): return () return tuple(str(item) for item in value if str(item).strip())