Spaces:
Running
Running
| """ | |
| Render model definitions for visualization. | |
| This module defines a stable, serializable intermediate representation | |
| between model outputs and HTML rendering. | |
| """ | |
| from dataclasses import dataclass, field, asdict | |
| from typing import Any, Dict, List, Optional | |
| def _has_control_chars(text: str) -> bool: | |
| if not text: | |
| return False | |
| for ch in text: | |
| code = ord(ch) | |
| if code < 32 or code == 127: | |
| return True | |
| return False | |
| class TokenDisplay: | |
| text: str | |
| kind: str # "normal" | "control" | "raw" | |
| class TokenInfo: | |
| byte_start: int | |
| byte_end: int | |
| display: TokenDisplay | |
| is_word: bool | |
| word_id: Optional[int] = None | |
| word_key: Optional[str] = None | |
| bytes_hex: str = "" | |
| compression: Dict[str, str] = field(default_factory=dict) | |
| model_tokens: Dict[str, List[List[Any]]] = field(default_factory=dict) | |
| loss: Dict[str, float] = field(default_factory=dict) | |
| topk: Dict[str, Any] = field(default_factory=dict) | |
| raw_delta: float = 0.0 | |
| tuned_delta: float = 0.0 | |
| class RenderModel: | |
| text: str | |
| tokens: List[TokenInfo] | |
| meta: Dict[str, Any] = field(default_factory=dict) | |
| def to_dict(self) -> Dict[str, Any]: | |
| return asdict(self) | |
| def build_display(text: str, is_raw: bool = False) -> TokenDisplay: | |
| if is_raw: | |
| return TokenDisplay(text=text, kind="raw") | |
| if _has_control_chars(text): | |
| return TokenDisplay(text=text, kind="control") | |
| return TokenDisplay(text=text, kind="normal") | |