Wendy-Fly
/

Sound

Model card Files Files and versions

xet

Community

Wendy-Fly commited on 14 days ago

Commit

91c518d

verified ·

1 Parent(s): b8df358

Upload pairwise_comparison.py with huggingface_hub

Browse files

Files changed (1) hide show

pairwise_comparison.py +1060 -0

pairwise_comparison.py ADDED Viewed

	@@ -0,0 +1,1060 @@

+"""
+Pairwise comparison handler for Bradley-Terry ranking model.
+"""
+import logging
+import time
+import random
+import re
+import os
+import json
+import base64
+from pathlib import Path
+from typing import Dict, List, Tuple, Union, Callable, Any
+from threading import Lock
+from concurrent.futures import ThreadPoolExecutor
+from src.llm.base import BaseLLM
+from lxml import etree
+from tqdm import tqdm
+from collections import defaultdict
+import bytedes
+import html
+from functools import lru_cache
+logger = logging.getLogger(__name__)
+def format_comments_as_xml(comments: List[Dict]) -> str:
+    """Format comments into XML string.
+    Args:
+        comments: List of comment dictionaries
+    Returns:
+        str: XML string representation of comments
+    """
+    logger.debug(f"Building XML input for {len(comments)} comments")
+    xml_comments = etree.Element("comments")
+    for comment in comments:
+        xml_comment = etree.SubElement(xml_comments, "comment")
+        # xml_comment_id = etree.SubElement(xml_comment, "comment_id")
+        # xml_comment_id.text = str(comment["comment_id"])
+        xml_comment_text = etree.SubElement(xml_comment, "comment_text")
+        xml_comment_text.text = comment["text"]
+        # Add video context if available
+        video_context = {k: v for k, v in comment.items() if
+                         k in ['video_title', 'video_tag', 'video_description', 'text_in_video'] and
+                         comment[k] is not None and comment[k] != ''}
+        if len(video_context) > 0:
+            xml_video_context = etree.SubElement(xml_comment, "video_context")
+            for key, value in video_context.items():
+                xml_video_context_key = etree.SubElement(xml_video_context, key)
+                xml_video_context_key.text = str(value)
+    xml_comments_str = etree.tostring(xml_comments, pretty_print=True, encoding="utf-8").decode("utf-8")
+    logger.debug(f"Generated XML: \n{xml_comments_str}")
+    return xml_comments_str
+def format_convs_as_xml(convs: List[Dict]) -> str:
+    """Format convs into XML string.
+    Args:
+        convs: List of conv dictionaries
+    Returns:
+        str: XML string representation of convs
+    """
+    logger.debug(f"Building XML input for {len(convs)} convs")
+    xml_convs = etree.Element("convs")
+    for conv in convs:
+        alias2age_map = conv['alias2age_map']
+        alias2age_text = ", ".join([f'user_{k} is {v}' for k, v in alias2age_map.items()])
+        xml_conv = etree.SubElement(xml_convs, "conv")
+        xml_conv_id = etree.SubElement(xml_conv, "conversation_id")
+        xml_conv_id.text = str(conv["conversation_id"])
+        xml_conv_text = etree.SubElement(xml_conv, "conv_text")
+        xml_conv_text.text = conv["conv_text"]
+        xml_conv_ageinfo = etree.SubElement(xml_conv, "conv_ageinfo")
+        xml_conv_ageinfo.text = alias2age_text
+        xml_conv_region = etree.SubElement(xml_conv, "region")
+        xml_conv_region.text = conv["store_region"]
+        if conv.get("lang_fasttext", None) is not None:
+            xml_conv_language = etree.SubElement(xml_conv, "language")
+            xml_conv_language.text = conv["lang_fasttext"]
+    xml_convs_str = etree.tostring(xml_convs, pretty_print=True, encoding="utf-8").decode("utf-8")
+    logger.debug(f"Generated XML: \n{xml_convs_str}")
+    return xml_convs_str
+@lru_cache(maxsize=int(os.environ.get('IMAGE_CACHE_SIZE', '512')))
+def image_to_base64(image_path: str) -> str:
+    with open(image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode('utf-8')
+def text_with_placeholders_to_content(
+    text: str,
+    image_paths: List[Union[str, None]],
+    placeholder_pattern: str = r'\[IMG(\d+)\]'
+) -> List[Dict]:
+    """
+    text: 原始文本，含占位符如 [IMG1]、[IMG2]等
+    image_paths: 图片路径列表，顺序与占位符编号一致（占位符从1开始）
+    placeholder_pattern: 占位符正则
+    返回: OpenAI API支持的content列表（text + image_url混排）
+    """
+    content: List[Dict] = []
+    pos = 0
+    for match in re.finditer(placeholder_pattern, text):
+        start, end = match.span()
+        img_idx = int(match.group(1)) - 1  # 占位符编号从1开始
+        # 前面的文本
+        if start > pos:
+            sub_text = text[pos:start]
+            if sub_text.strip():
+                content.append({"type": "text", "text": sub_text})
+        # 插入图片
+        if 0 <= img_idx < len(image_paths) and image_paths[img_idx] is not None:
+            content.append({
+                "type": "image_url",
+                "image_url": {
+                    "url": f"data:image/jpeg;base64,{image_to_base64(image_paths[img_idx])}"
+                }
+            })
+        pos = end
+    # 剩余文本
+    if pos < len(text):
+        sub_text = text[pos:]
+        if sub_text.strip():
+            content.append({"type": "text", "text": sub_text})
+    return content
+def format_convs_as_xml_image(convs: List[Dict]) -> List[Dict]:
+    """
+    支持多模态convs：用 [IMGn] 占位符标记图片，并返回OpenAI兼容的 content 列表。
+    约定（尽量兼容现有数据）：
+    - 单图：conv['img_path'] 或 conv['img']
+    - 多图：conv['image_paths'] / conv['img_paths'] / conv['imgs'] / conv['images']（list[str]）
+    模式（自动）：
+    - **嵌入式**：若 conv['conv_text'] 中包含 [IMGk]，则 k 按“该 conv 的 image_paths（从1开始）”索引，
+      生成XML时会把这些 [IMGk] 重写为全局 [IMGn]，并在最终 content 中按位置插入图片。
+    - **非嵌入式**：若 conv_text 不包含占位符，则会在 XML 中为该 conv 追加 <image>[IMGn]</image> 节点。
+    """
+    logger.debug(f"Building multimodal XML input for {len(convs)} convs")
+    xml_convs = etree.Element("convs")
+    image_paths_flat: List[Union[str, None]] = []
+    img_counter = 0  # 全局图片序号，从1开始
+    def _extract_image_paths(conv: Dict) -> List[str]:
+        paths: List[str] = []
+        single = conv.get('img_path', conv.get('img', None))
+        if isinstance(single, str) and single:
+            paths.append(single)
+        for k in ("image_paths", "img_paths", "imgs", "images"):
+            v = conv.get(k, None)
+            if isinstance(v, list):
+                for p in v:
+                    if isinstance(p, str) and p:
+                        paths.append(p)
+        return paths
+    for conv in convs:
+        alias2age_map = conv['alias2age_map']
+        alias2age_text = ", ".join([f'user_{k} is {v}' for k, v in alias2age_map.items()])
+        xml_conv = etree.SubElement(xml_convs, "conv")
+        xml_conv_id = etree.SubElement(xml_conv, "conversation_id")
+        xml_conv_id.text = str(conv["conversation_id"])
+        xml_conv_text = etree.SubElement(xml_conv, "conv_text")
+        conv_text = conv["conv_text"]
+        xml_conv_ageinfo = etree.SubElement(xml_conv, "conv_ageinfo")
+        xml_conv_ageinfo.text = alias2age_text
+        xml_conv_region = etree.SubElement(xml_conv, "region")
+        xml_conv_region.text = conv["store_region"]
+        if conv.get("lang_fasttext", None) is not None:
+            xml_conv_language = etree.SubElement(xml_conv, "language")
+            xml_conv_language.text = conv["lang_fasttext"]
+        img_paths = _extract_image_paths(conv)
+        # If conv_text has placeholders, treat them as *local indices* into img_paths and rewrite to global ids.
+        placeholder_pattern = r'\[IMG(\d+)\]'
+        if isinstance(conv_text, str) and re.search(placeholder_pattern, conv_text):
+            local_to_global: Dict[int, int] = {}
+            def _replace(m: re.Match) -> str:
+                nonlocal img_counter
+                try:
+                    local_idx = int(m.group(1)) - 1  # local placeholders are 1-based
+                except Exception:
+                    return m.group(0)
+                if not (0 <= local_idx < len(img_paths)):
+                    return m.group(0)
+                if local_idx not in local_to_global:
+                    img_counter += 1
+                    local_to_global[local_idx] = img_counter
+                    image_paths_flat.append(img_paths[local_idx])
+                return f'[IMG{local_to_global[local_idx]}]'
+            xml_conv_text.text = re.sub(placeholder_pattern, _replace, conv_text)
+        else:
+            # Non-embedded: keep text unchanged and append <image> nodes
+            xml_conv_text.text = conv_text
+            # for p in img_paths:
+            #     img_counter += 1
+            #     image_paths_flat.append(p)
+            #     xml_img = etree.SubElement(xml_conv, "image")
+            #     xml_img.text = f'[IMG{img_counter}]'
+    xml_str = etree.tostring(xml_convs, pretty_print=True, encoding="utf-8").decode("utf-8")
+    logger.debug(f"Generated multimodal XML: \n{xml_str}")
+    return text_with_placeholders_to_content(xml_str, image_paths_flat)
+def parse_unit_judgment(text_info: dict) -> int:
+    """Parse the judgment from LLM response for units.
+    Args:
+        text_info: dict
+         - text: Response text from LLM
+         - unit: unit the LLM is processing
+    Returns:
+        int: 1 if Unit A > Unit B, -1 if Unit A < Unit B, 0 if equal
+    Raises:
+        ValueError: If judgment cannot be parsed
+    """
+    text = text_info.get("text")
+    unit = text_info.get("unit", "comment")
+    m_res = re.search(r'<\s*result\s*>(.*?)<\s*/\s*result\s*>',
+                      text, re.IGNORECASE | re.DOTALL)
+    if not m_res:
+        logger.error("No result block found in LLM response")
+        raise ValueError("No result block found")
+    result_body = m_res.group(1)
+    m_j = re.search(r'<\s*judgment\s*>(.*?)<\s*/\s*judgment\s*>',
+                    result_body, re.IGNORECASE | re.DOTALL)
+    if not m_j:
+        logger.error("No judgment found in result block")
+        raise ValueError("No judgment found")
+    judgment_text = html.unescape(m_j.group(1).strip())
+    judgment_text = re.sub(r'\bFinal\s+Judgment\b', '',
+                           judgment_text, flags=re.IGNORECASE).strip()
+    judgment_text = (judgment_text
+                     .replace("：", ":")
+                     .replace("＞", ">")
+                     .replace("＜", "<")
+                     .replace("＝", "="))
+    # m_cmp = re.search(
+    #     r'(?i)comment\s*([ab])\s*([<>]=?|==?)\s*comment\s*([ab])',
+    #     judgment_text
+    # )
+    # Extracts content in between <judgement></judgement>
+    pattern = rf'(?i){re.escape(unit)}\s*([ab])\s*([<>]=?|==?)\s*{re.escape(unit)}\s*([ab])'
+    m_cmp = re.search(pattern, judgment_text)
+    if not m_cmp:
+        logger.error(f"Invalid judgment format: {judgment_text!r}")
+        raise ValueError(f"Invalid judgment format: {judgment_text!r}")
+    left, op, right = m_cmp.groups()
+    left, right = left.upper(), right.upper()
+    # Extract the violative message
+    violative_msg_dict = {}
+    violative_content = re.search(r'<violative_messages>(.*?)</violative_messages>', text, re.DOTALL)
+    if violative_content:
+        content = violative_content.group(1).strip()
+        # Extract tags and their contents (<A>m1,m2</A>)
+        matches = re.findall(r'<([A-Z])>(.*?)</\1>', content)
+        violative_msg_dict = {tag: val.strip().strip("<>[]\"").split(",") for tag, val in matches}
+    else:
+        logger.warning("No violative_messages found.")
+    label = -1
+    if op == '>':
+        label = 1 if left == 'A' else -1
+    elif op == '<':
+        label = -1 if left == 'A' else 1
+    elif op in ('=', '=='):
+        label = 0
+    else:
+        logger.error(f"Unsupported operator in judgment: {op!r}")
+        raise ValueError(f"Unsupported operator: {op!r}")
+    return label, violative_msg_dict
+class PairwiseComparison:
+    """
+    Handles pairwise comparisons between items using LLM.
+    Manages comparison results, counts, and statistics.
+    """
+    def __init__(self,
+                 llm: BaseLLM,
+                 prompt_templates: Dict[str, str],
+                 format_items: Callable[[List[Dict]], Any],
+                 parse_judgment: Callable[[str], int],
+                 data_dir: str,
+                 es_index: str,
+                 es_psm: str = "byte.es.ranking_moderation_cmt.service.my",
+                 business: str = "comment",
+                 max_comparisons_per_pair: int = 3,
+                 max_workers: int = 4,
+                 max_retries: int = 8,
+                 initial_retry_delay: float = 2.0,
+                 max_backups: int = 3,
+                 detect_msg_violations: bool = True,
+                 log_gpt_io: bool = False,
+                 log_sample_rate: float = 0.01,
+                 local_cache_path: Union[str, Path, None] = None,
+                 local_cache_enabled: bool = False):
+        """Initialize pairwise comparison handler.
+        Args:
+            llm: Language model for pairwise comparisons
+            prompt_templates: Dictionary of prompt templates
+            format_items: Function to format items into LLM input. Can return either str or List[Dict]
+            parse_judgment: Function to parse LLM response into judgment
+            data_dir: Directory to store state files
+            max_comparisons_per_pair: Maximum number of comparisons for each pair
+            max_workers: Maximum number of worker processes
+            max_retries: Maximum number of retry attempts for failed comparisons
+            initial_retry_delay: Initial delay in seconds before first retry
+            max_backups: Maximum number of backup files to keep (default: 3)
+        """
+        self.llm = llm
+        self.prompt_templates = prompt_templates
+        self.format_items = format_items
+        self.parse_judgment = parse_judgment
+        self.max_comparisons_per_pair = max_comparisons_per_pair
+        self.max_workers = max_workers
+        self.max_retries = max_retries
+        self.initial_retry_delay = initial_retry_delay
+        self.max_backups = max_backups
+        self.data_dir = Path(data_dir)
+        self.es_index = es_index
+        if self.es_index == 'None':
+            self.es_index = None
+        if local_cache_path == 'None':
+            local_cache_path = None
+        self.data_dir.mkdir(parents=True, exist_ok=True)
+        self.local_cache_enabled = bool(local_cache_enabled or local_cache_path)
+        self.local_cache_path = None
+        if self.local_cache_enabled:
+            if local_cache_path is None or local_cache_path is True:
+                self.local_cache_path = self.data_dir / "pairwise_comparisons.jsonl"
+            else:
+                self.local_cache_path = Path(local_cache_path)
+            self.local_cache_path.parent.mkdir(parents=True, exist_ok=True)
+        self._local_cache_loaded = False
+        if self.es_index is not None:
+            self.client = bytedes.make_client(psm=es_psm, cluster="data",scheme="https",
+                             verify_certs=False, use_ssl=True, ssl_show_warn=False, maxsize=50)
+        else:
+            self.client = None
+        if business.lower() == "comment":
+            self.process_unit = "comment"
+        elif business.lower() in ["dm", "dm_mm"]:
+            self.process_unit = "conversation"
+        else:
+            raise NotImplementedError(f"Ranking moderation for business {business} is not implemented!")
+        # Initialize state with thread-safe data structures
+        self._cache_lock = Lock()
+        self.comparison_results = defaultdict(list)  # Store pairwise comparison results
+        self.detect_msg_violations = detect_msg_violations
+        self.log_gpt_io = log_gpt_io
+        self.log_sample_rate = log_sample_rate
+    def get_state_file_path(self, filename: str) -> Path:
+        """Get the full path for a state file.
+        Args:
+            filename: Name of the state file
+        Returns:
+            Path: Full path to the state file
+        """
+        return self.data_dir / filename
+    def get_pair_key(self, item_id1: str, item_id2: str) -> str:
+        """Get unique key for a pair of items.
+        Args:
+            item_id1: First item ID
+            item_id2: Second item ID
+        Returns:
+            str: Unique key for the pair
+        """
+        item_id1, item_id2 = str(item_id1), str(item_id2)
+        return f"{min(item_id1, item_id2)}_{max(item_id1, item_id2)}"
+    def get_ordered_pair(self, item_id1: str, item_id2: str) -> Tuple[str, str]:
+        """Get ordered pair of item IDs.
+        Args:
+            item_id1: First item ID
+            item_id2: Second item ID
+        Returns:
+            Tuple[str, str]: Ordered pair of item IDs (min, max)
+        """
+        return min(item_id1, item_id2), max(item_id1, item_id2)
+    def get_compare_result_from_es(self, pair_key: str) -> str:
+        # read from cache
+        with self._cache_lock:
+            if pair_key in self.comparison_results:
+                return self.comparison_results[pair_key]
+        if self.es_index is None:
+            if self.local_cache_enabled and not self._local_cache_loaded:
+                item_ids = None
+                try:
+                    id1, id2 = pair_key.split("_", 1)
+                    item_ids = [id1, id2]
+                except Exception:
+                    item_ids = None
+                self.load_data_to_cache_from_local(item_ids, load_detail=True)
+                with self._cache_lock:
+                    if pair_key in self.comparison_results:
+                        return self.comparison_results[pair_key]
+            return []
+        trial = 0
+        current_delay = self.initial_retry_delay
+        while trial < self.max_retries:
+            try:
+                query_body = {
+                    "query": {
+                        "term": {
+                            "pair_key": {
+                                "value": pair_key
+                            }
+                        }
+                    }
+                }
+                result = self.client.search(index=self.es_index, body=query_body, size=200)
+                compare_result = []
+                for hit in result['hits']['hits']:
+                    r = {
+                        "judgment": hit['_source']['judgment'],
+                        "raw_response": hit['_source']['raw_response'],
+                        "timestamp": hit['_source']["timestamp"],
+                        "item_id_a": hit['_source']['item_id_a'],
+                        "item_id_b": hit['_source']['item_id_b'],
+                        'ordered_ids': self.get_ordered_pair(hit['_source']['item_id_a'], hit['_source']['item_id_b']),
+                        'original_ids': (hit['_source']['item_id_a'], hit['_source']['item_id_b'])
+                    }
+                    if self.detect_msg_violations:
+                        r["violative_msg_map_str"] = hit['_source']['violative_msg_map_str']
+                    compare_result.append(r)
+                    with self._cache_lock:
+                        self.comparison_results[pair_key] = compare_result
+                return compare_result
+            except Exception as e:
+                trial += 1
+                if trial == self.max_retries:
+                    logger.error(f"Failed to get data from es after {self.max_retries} attempts: {str(e)}")
+                    raise
+                logger.warning(f"Failed to get data from es, Request failed (attempt {trial}/{self.max_retries}). "
+                               f"Retrying in {current_delay} seconds. Error: {str(e)}")
+                time.sleep(current_delay)
+                current_delay *= 2
+    def load_data_to_cache_from_es(self, item_ids: List[str], load_detail=True):
+        """Load all data from ES."""
+        # read from cache
+        if self.es_index is None:
+            return self.load_data_to_cache_from_local(item_ids, load_detail=load_detail)
+        def chunk_list(lst, chunk_size):
+            for i in range(0, len(lst), chunk_size):
+                yield lst[i:i + chunk_size]
+        comparison_results_temp = {}
+        scroll = "2m"
+        batch_size = 500
+        _source = ["pair_key", "judgment", "timestamp", "item_id_a", "item_id_b"]
+        if self.detect_msg_violations:
+            _source.append("violative_msg_map_str")
+        if load_detail:
+            _source.append("raw_response")
+        for id_batch in chunk_list(item_ids, batch_size):
+            query = {
+                "_source": _source,
+                "query": {
+                    "bool": {
+                        "should": [
+                            { "terms": { "item_id_a.keyword": id_batch }},
+                            { "terms": { "item_id_b.keyword": id_batch }}
+                        ],
+                        "minimum_should_match": 1
+                    }
+                }
+            }
+            page = self.client.search(index=self.es_index, body=query, scroll=scroll, size=5000)
+            sid = page["_scroll_id"]
+            scroll_size = len(page["hits"]["hits"])
+            while scroll_size > 0:
+                for hit in page["hits"]["hits"]:
+                    pair_key = hit['_source']['pair_key']
+                    r = {
+                        "judgment": hit['_source']['judgment'],
+                        "timestamp": hit['_source']["timestamp"],
+                        "item_id_a": hit['_source']['item_id_a'],
+                        "item_id_b": hit['_source']['item_id_b'],
+                        'ordered_ids': self.get_ordered_pair(hit['_source']['item_id_a'], hit['_source']['item_id_b']),
+                        'original_ids': (hit['_source']['item_id_a'], hit['_source']['item_id_b'])
+                    }
+                    if self.detect_msg_violations:
+                        r["violative_msg_map_str"] = hit['_source']['violative_msg_map_str']
+                    if 'raw_response' in hit['_source']:
+                        r['raw_response'] = hit['_source']['raw_response']
+                    exists = False
+                    with self._cache_lock:
+                        if pair_key in self.comparison_results:
+                            for comparison_result in self.comparison_results[pair_key]:
+                                if r['timestamp'] == comparison_result['timestamp']:
+                                    exists = True
+                                    break
+                        if not exists:
+                            if pair_key not in self.comparison_results:
+                                self.comparison_results[pair_key] = []
+                            self.comparison_results[pair_key].append(r)
+                    # save a temp relate to this batch
+                    exists = False
+                    if pair_key in comparison_results_temp:
+                        for comparison_result in comparison_results_temp[pair_key]:
+                            if r['timestamp'] == comparison_result['timestamp']:
+                                exists = True
+                                break
+                    if not exists:
+                        if pair_key not in comparison_results_temp:
+                            comparison_results_temp[pair_key] = []
+                        comparison_results_temp[pair_key].append(r)
+                page = self.client.scroll(scroll_id=sid, scroll=scroll)
+                sid = page["_scroll_id"]
+                scroll_size = len(page["hits"]["hits"])
+        return comparison_results_temp
+    def load_data_to_cache_from_local(self, item_ids: Union[List[str], None], load_detail=True):
+        if not self.local_cache_enabled or self.local_cache_path is None:
+            return {}
+        if not self.local_cache_path.exists():
+            self._local_cache_loaded = True
+            return {}
+        item_id_set = None
+        if item_ids is not None:
+            item_id_set = set(str(x) for x in item_ids)
+        comparison_results_temp = {}
+        with self._cache_lock:
+            self._local_cache_loaded = True
+        with open(self.local_cache_path, "r", encoding="utf-8") as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    doc = json.loads(line)
+                except Exception:
+                    continue
+                if "item_id_a" not in doc or "item_id_b" not in doc:
+                    continue
+                item_id_a = str(doc["item_id_a"])
+                item_id_b = str(doc["item_id_b"])
+                if item_id_set is not None and item_id_a not in item_id_set and item_id_b not in item_id_set:
+                    continue
+                pair_key = doc.get("pair_key", self.get_pair_key(item_id_a, item_id_b))
+                r = {
+                    "judgment": doc.get("judgment"),
+                    "timestamp": doc.get("timestamp"),
+                    "item_id_a": item_id_a,
+                    "item_id_b": item_id_b,
+                    "ordered_ids": self.get_ordered_pair(item_id_a, item_id_b),
+                    "original_ids": (item_id_a, item_id_b)
+                }
+                if self.detect_msg_violations and "violative_msg_map_str" in doc:
+                    r["violative_msg_map_str"] = doc["violative_msg_map_str"]
+                if load_detail and "raw_response" in doc:
+                    r["raw_response"] = doc["raw_response"]
+                exists = False
+                with self._cache_lock:
+                    if pair_key in self.comparison_results:
+                        for comparison_result in self.comparison_results[pair_key]:
+                            if r["timestamp"] == comparison_result.get("timestamp"):
+                                exists = True
+                                break
+                    if not exists:
+                        if pair_key not in self.comparison_results:
+                            self.comparison_results[pair_key] = []
+                        self.comparison_results[pair_key].append(r)
+                exists = False
+                if pair_key in comparison_results_temp:
+                    for comparison_result in comparison_results_temp[pair_key]:
+                        if r["timestamp"] == comparison_result.get("timestamp"):
+                            exists = True
+                            break
+                if not exists:
+                    if pair_key not in comparison_results_temp:
+                        comparison_results_temp[pair_key] = []
+                    comparison_results_temp[pair_key].append(r)
+        return comparison_results_temp
+    def write_compare_result_to_es(self, pair_key: str, comparison_result):
+        trial = 0
+        current_delay = self.initial_retry_delay
+        while trial < self.max_retries:
+            try:
+                doc = {}
+                doc['pair_key'] = pair_key
+                doc['judgment'] = comparison_result['judgment']
+                doc['raw_response'] = comparison_result['raw_response']
+                doc['timestamp'] = comparison_result['timestamp']
+                doc['item_id_a'] = comparison_result['original_ids'][0]
+                doc['item_id_b'] = comparison_result['original_ids'][1]
+                if self.detect_msg_violations:
+                    doc['violative_msg_map_str'] = comparison_result['violative_msg_map_str']
+                with self._cache_lock:
+                    if pair_key in self.comparison_results:
+                        self.comparison_results[pair_key].append(comparison_result)
+                    else:
+                        self.comparison_results[pair_key] = [comparison_result]
+                    if self.local_cache_enabled and self.local_cache_path is not None:
+                        with open(self.local_cache_path, "a", encoding="utf-8") as f:
+                            f.write(json.dumps(doc, ensure_ascii=False) + "\n")
+                if self.es_index is not None:
+                    return self.client.index(index=self.es_index, body=doc)
+                else:
+                    return
+            except Exception as e:
+                trial += 1
+                if trial == self.max_retries:
+                    logger.error(f"Failed to get data from es after {self.max_retries} attempts: {str(e)}")
+                    raise
+                logger.warning(f"Failed to get data from es, Request failed (attempt {trial}/{self.max_retries}). "
+                               f"Retrying in {current_delay} seconds. Error: {str(e)}")
+                time.sleep(current_delay)
+                current_delay *= 2
+    def compare_single_pair(self, pair: Tuple[Dict, Dict], random_swap: bool = True, use_cache: bool = True) -> Dict[str, str]:
+        """Compare a single pair of items and store the result.
+        Args:
+            pair: Tuple of (item1, item2)
+            random_swap: Whether to randomly swap the order of items.
+                        If False, keep the original order.
+                        Default is True for backward compatibility.
+            use_cache: Whether to use cached comparison results if available.
+                      If False, always perform new comparison.
+                      Default is True for backward compatibility.
+        """
+        item1, item2 = pair
+        # Only swap items if random_swap is True
+        if random_swap and random.random() < 0.5:
+            item1, item2 = item2, item1
+        item_id1 = str(item1['item_id'])
+        item_id2 = str(item2['item_id'])
+        pair_key = self.get_pair_key(item_id1, item_id2)
+        ordered_id1, ordered_id2 = self.get_ordered_pair(item_id1, item_id2)
+        # get result from es
+        if use_cache:
+            compare_result = self.get_compare_result_from_es(pair_key)
+            if len(compare_result) >= self.max_comparisons_per_pair:
+                return random.choice(compare_result)
+        # Format items into LLM input
+        formatted_input = self.format_items([item1, item2])
+        messages = [
+            {"role": "system", "content": self.prompt_templates['system_prompt']},
+            {"role": "user", "content": formatted_input}
+        ]
+        trial = 0
+        current_delay = self.initial_retry_delay
+        while trial < self.max_retries:
+            response = None
+            try:
+                response = self.llm.chat_completion(messages=messages, max_tokens=4000, temperature=0.0)
+                result = response['choices'][0]['message']['content']
+                result_info = {
+                    "text": result,
+                    "unit": self.process_unit
+                }
+                judgment, violative_msg_dict = self.parse_judgment(result_info)
+                comparison_result = {
+                    'judgment': judgment,
+                    'raw_response': result,
+                    'timestamp': time.time(),
+                    'ordered_ids': (ordered_id1, ordered_id2),
+                    'original_ids': (item_id1, item_id2),
+                    "item_id_a": str(item_id1),
+                    "item_id_b": str(item_id2)
+                }
+                if self.detect_msg_violations:
+                    # e.g. {"a": [m0,m1], "b": [m0]}
+                    comparison_result['violative_msg_map_str'] = json.dumps(violative_msg_dict)
+                if self.detect_msg_violations and self.log_gpt_io and random.random()<self.log_sample_rate:
+                    with self._cache_lock:
+                        with open(self.data_dir / "tmp_gpt_io.jsonl", "a") as f:
+                            if isinstance(formatted_input, str):
+                                formatted_input_for_regex = formatted_input
+                            else:
+                                # multimodal content: concatenate all text blocks
+                                formatted_input_for_regex = "".join(
+                                    c.get("text", "")
+                                    for c in formatted_input
+                                    if isinstance(c, dict) and c.get("type") == "text"
+                                )
+                            _convs = re.findall(r'<conv_text>(.*?)</conv_text>', formatted_input_for_regex.strip(), re.DOTALL)
+                            _convs = [html.unescape(c).strip() for c in _convs]
+                            _age_info = re.findall(r'<conv_ageinfo>(.*?)</conv_ageinfo>', formatted_input_for_regex, re.DOTALL)
+                            _data = {
+                                'timestamp': time.time(),
+                                "conversations": [{"conv_text": c, "conv_ageinfo": a} for c,a in zip(_convs, _age_info)],
+                                "judgment": judgment,
+                                "violative_msg_map_str": violative_msg_dict
+                            }
+                            f.write(json.dumps(_data, ensure_ascii=False)+"\n")
+                self.write_compare_result_to_es(pair_key, comparison_result)
+                return comparison_result
+            except Exception as e:
+                trial += 1
+                if str(e) == "No result block found" and trial == 2:
+                    logger.error(f"parse error, only try 2 times")
+                    return
+                if trial == self.max_retries:
+                    logger.error(f"Failed to compare pair after {self.max_retries} attempts: {str(e)}")
+                    raise
+                logger.warning(f"Request failed (attempt {trial}/{self.max_retries}). "
+                               f"Retrying in {current_delay} seconds. Error: {str(e)}")
+                if response is not None:
+                    logger.warning(f"Response: {response}")
+                time.sleep(current_delay)
+                current_delay *= 2
+    def compare_pairs(self, pairs: List[Tuple[Dict, Dict]], random_swap: bool = True, use_cache: bool = True, use_tqdm: bool = True) -> List[Tuple[Dict, Dict]]:
+        """Compare multiple pairs of items in parallel.
+        Args:
+            pairs: List of (item1, item2) tuples to compare
+            random_swap: Whether to randomly swap the order of items in each pair.
+                        If False, keep the original order.
+                        Default is True for backward compatibility.
+            use_cache: Whether to use cached comparison results if available.
+                      If False, always perform new comparison.
+                      Default is True for backward compatibility.
+        Returns:
+            List of successfully compared pairs
+        """
+        # Create a new process pool for each batch of comparisons
+        with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
+            futures = []
+            for pair in pairs:
+                future = executor.submit(self.compare_single_pair, pair, random_swap=random_swap, use_cache=use_cache)
+                futures.append((future, pair))
+            # Get successfully compared pairs
+            successful_pairs = []
+            for future, pair in tqdm(futures, desc="Comparing pairs", total=len(futures), leave=False, disable=not use_tqdm):
+                try:
+                    result = future.result()
+                    if result:  # If comparison was successful
+                        successful_pairs.append(pair)
+                except Exception as e:
+                    logger.error(f"Error comparing pair: {str(e)}")
+        return successful_pairs
+    def understand_by_pairs(self, item: Dict, compare_results: List[Dict]):
+        from lxml import etree
+        import random
+        rs_high, rs_low = [], []
+        target_id = str(item["item_id"])
+        for pair_key, r in compare_results.items():
+            r = r[0]
+            a, b, j = r["item_id_a"], r["item_id_b"], r["judgment"]
+            if target_id == a and j >= 0:
+                rs_low.append(r)
+            elif target_id == b and j < 0:
+                rs_low.append(r)
+            elif target_id == a and j < 0:
+                rs_high.append(r)
+            elif target_id == b and j >= 0:
+                rs_high.append(r)
+        random.shuffle(rs_high)
+        random.shuffle(rs_low)
+        examples = rs_high[:2] + rs_low[:2]
+        root = etree.Element("analysis_input")
+        target_block = etree.SubElement(root, "target_conversation")
+        xml_target = etree.fromstring(
+            self.format_items([item]).encode("utf-8")
+        )
+        target_block.append(xml_target)
+        comps_block = etree.SubElement(root, "comparisons")
+        for ex in examples:
+            # try:
+            #     comp_xml = etree.fromstring(ex["raw_response"].encode("utf-8"))
+            #     wrapped = etree.SubElement(comps_block, "comparison")
+            #     wrapped.append(comp_xml)
+            # except Exception:
+            txt = etree.SubElement(comps_block, "raw_response")
+            txt.text = etree.CDATA(ex["raw_response"])
+        xml_input = etree.tostring(root, pretty_print=True,
+                                   encoding="utf-8").decode("utf-8")
+        system_prompt = """You are an expert in content safety and moderation.
+        You will receive a set of pairwise comparison results between a **target conversation** and multiple other conversations. Your primary goal is to clearly inform users about the **exact meaning and severity** of the target conversation based solely on these comparisons. Avoid any external assumptions or information.
+        When crafting your analysis, follow these guidelines:
+        * Clearly interpret the **exact meaning** of the target conversation, thoroughly and explicitly derived from the provided comparisons. Provide sufficient details so users clearly grasp the context, intention, and potential implications of the conversation.
+        * Identify explicitly which conversations the target conversation is **more severe than** and which ones it is **less severe than**. Clearly quote or paraphrase each comparison conversation, and provide explicit reasons derived from comparison data.
+        * Ensure all explanations rely **only** on the provided comparison data—**do not speculate or infer beyond what is explicitly given**.
+        * Structure your response clearly and flexibly so users immediately understand:
+        * The precise and detailed meaning of the target conversation.
+        * The explicit reasons behind the conversation's classification as severe or non-severe.
+        * Specific examples of comparisons supporting your severity assessments, clearly quoting or paraphrasing each messages in the conversation.
+        Your output should be in **Markdown format**, providing clear bilingual summaries in **English and Chinese** to ensure consistency for bilingual users.
+        *This detailed analysis aims to provide users with a clear and explicit understanding of the exact meaning of the conversation, its severity, and concrete evidence from comparative conversation or messages.* make sure your response can achieve this
+        ---
+        **Detailed Output Example:**
+        ## 📝 Conversation Analysis
+        ### 🔹 English
+        **Direct translation:**
+        Translate this conversation into English. If the conversation is in English, simply copy the conversation as the output.
+            - Maintain all special tags message ids e.g. <m1> and user_ids e.g. user_0
+            - Do not remove existing new line characters
+            - Explain any cultural references / slang that could be unsafe or harmful in line e.g. user_0: Show me your "taye" [Note: "taye" means butt/ass in Algerian slang]
+            - Provide cultural context (within brackets inline - e.g. [Note: xxx]) when the translation in English sounds unnatural/incoherent
+            - Keep the explanations brief and concise (Keep a single note to a maximum of 10 words).
+        **Interpreted Meaning:**
+        The conversation explicitly expresses [detailed interpretation explicitly based on provided comparisons, clearly explaining context, intent, and implications].
+        **Severity Assessment:**
+        * **More Severe Than:**
+        * "Exact or paraphrased conversation A": because \[explicit reason from comparison clearly indicating severity difference].
+        * "Exact or paraphrased conversation B": because \[explicit reason from comparison clearly indicating severity difference].
+        * **Less Severe Than:**
+        * "Exact or paraphrased conversation C": because \[explicit reason from comparison clearly indicating severity difference].
+        ---
+        ### 🔸 中文
+        **直接翻译：**
+        将此对话翻译成中文。如果对话是中文，只需复制对话作为输出即可。
+            - 保留所有特殊标签消息 ID，例如 <m1> 和用户 ID，例如 user_0
+            - 不要删除现有的换行符
+            - 解释任何可能不安全或有害的文化指涉/俚语，例如 user_0：给我看看你的“taye”[注：“taye”在阿尔及利亚俚语中是屁股的意思]
+            - 如果英语翻译听起来不自然/语无伦次，请提供文化背景（在括号内，例如 [注：xxx]）。
+            - 解释请简洁明了（每条注释最多 10 个字）。
+        **解释性含义**：
+        对话明确表达了\[基于提供的比较结果的详细解释，清晰地解释了上下文、意图和含义]。
+        **严重程度评估**：
+        * **严重程度高于**：
+        * “对话 A 完全一致或转述”：因为\[比较结果的明确原因清楚地表明了严重程度的差异]。
+        * “对话 B 完全一致或转述”：因为\[比较结果的明确原因清楚地表明了严重程度的差异]。
+        * **严重程度低于**：
+        * “对话 C 完全一致或转述”：因为\[比较结果的明确原因清楚地表明了严重程度的差异]。
+        """
+        logger.debug(f"system_prompt: {system_prompt}")
+        logger.debug(f"user_prompt: {xml_input}")
+        messages = [
+            {"role": "system", "content": system_prompt},
+            {"role": "user",   "content": xml_input}
+        ]
+        max_retries = self.max_retries
+        trial = 0
+        current_delay = self.initial_retry_delay
+        response = None
+        while trial < max_retries:
+            try:
+                response = self.llm.chat_completion(messages=messages, max_tokens=4000, temperature=0.0)
+                llm_understanding = response['choices'][0]['message']['content']
+                logger.debug(f"llm response: {llm_understanding}")
+                return llm_understanding
+            except Exception as e:
+                trial += 1
+                if trial == self.max_retries:
+                    logger.error(f"Failed to get data by llm api, {self.max_retries} attempts: {str(e)}")
+                    raise
+                logger.warning(f"Failed to get data by llm api, Request failed (attempt {trial}/{self.max_retries}). "
+                            f"Retrying in {current_delay} seconds. Error: {str(e)}")
+                if response is not None:
+                    logger.warning(f"Response={response}")
+                time.sleep(current_delay)
+                current_delay *= 2
+    def get_comparison_result_by_id(self, item_id: str) -> List[Dict]:
+        if self.es_index is None:
+            if self.local_cache_enabled and not self._local_cache_loaded:
+                self.load_data_to_cache_from_local(None, load_detail=True)
+            item_id = str(item_id)
+            results = []
+            with self._cache_lock:
+                for pair_results in self.comparison_results.values():
+                    for r in pair_results:
+                        if str(r.get("item_id_a")) == item_id or str(r.get("item_id_b")) == item_id:
+                            results.append(r)
+            return results
+        trial = 0
+        current_delay = self.initial_retry_delay
+        while trial < self.max_retries:
+            try:
+                query_body = {
+                    "query": {
+                        "bool": {
+                            "should": [
+                                { "term": { "item_id_a.keyword": item_id }},
+                                { "term": { "item_id_b.keyword": item_id }}
+                            ],
+                            "minimum_should_match": 1
+                        }
+                    }
+                }
+                result = self.client.search(index=self.es_index, body=query_body, size=200)
+                compare_result = []
+                for hit in result['hits']['hits']:
+                    r = {
+                        "judgment": hit['_source']['judgment'],
+                        "raw_response": hit['_source']['raw_response'],
+                        "timestamp": hit['_source']["timestamp"],
+                        "item_id_a": hit['_source']['item_id_a'],
+                        "item_id_b": hit['_source']['item_id_b'],
+                        'ordered_ids': self.get_ordered_pair(hit['_source']['item_id_a'], hit['_source']['item_id_b']),
+                        'original_ids': (hit['_source']['item_id_a'], hit['_source']['item_id_b'])
+                    }
+                    if self.detect_msg_violations:
+                        r["violative_msg_map_str"] = hit['_source']['violative_msg_map_str']
+                    compare_result.append(r)
+                return compare_result
+            except Exception as e:
+                trial += 1
+                if trial == self.max_retries:
+                    logger.error(f"Failed to get data from es after {self.max_retries} attempts: {str(e)}")
+                    raise
+                logger.warning(f"Failed to get data from es, Request failed (attempt {trial}/{self.max_retries}). "
+                               f"Retrying in {current_delay} seconds. Error: {str(e)}")
+                time.sleep(current_delay)
+                current_delay *= 2
+    def get_comparison_count(self, pair_key: str) -> int:
+        """Get the number of comparisons for a pair.
+        Args:
+            pair_key: Key for the pair
+        Returns:
+            int: Number of comparisons for the pair
+        """
+        return len(self.get_comparison_results(pair_key))
+    def get_comparison_results(self, pair_key: str) -> List[Dict]:
+        """Get comparison results for a pair.
+        Args:
+            pair_key: Key for the pair
+        Returns:
+            List[Dict]: List of comparison results for the pair
+        """
+        return self.get_compare_result_from_es(pair_key)
+    def get_compare_information(self, item_ids: List[str], use_tqdm: bool = True) -> Dict[str, Any]:
+        """Get comprehensive comparison information between specified items.
+        Args:
+            item_ids: List of item IDs to get comparison information for
+        Returns:
+            Dict containing:
+            - comparison_results: Dict mapping pair keys to their comparison results
+            - comparison_counts: Dict mapping item IDs to their total comparison counts
+            - pair_comparison_counts: Dict mapping pair keys to their comparison counts
+        """
+        comparison_results = {}
+        comparison_counts = {}
+        pair_comparison_counts = {}
+        item_id_set = set(item_ids)
+        with self._cache_lock:
+            for pair_key, pair_results in tqdm(self.comparison_results.items(), desc="Scan Comparing pairs", total=len(self.comparison_results), leave=False, disable=not use_tqdm):
+                id1, id2 = pair_key.split('_')
+                if id1 in item_id_set and id2 in item_id_set:
+                    comparison_results[pair_key] = list(pair_results)
+                    comparison_counts[id1] = comparison_counts.get(id1, 0) + len(pair_results)
+                    comparison_counts[id2] = comparison_counts.get(id2, 0) + len(pair_results)
+                    pair_comparison_counts[pair_key] = len(pair_results)
+            return {
+                'comparison_results': comparison_results,
+                'comparison_counts': comparison_counts,
+                'pair_comparison_counts': pair_comparison_counts
+            }