Compression-Lens / core /render_model.py
Jellyfish042's picture
improvements
48754a8
"""
Render model definitions for visualization.
This module defines a stable, serializable intermediate representation
between model outputs and HTML rendering.
"""
from dataclasses import dataclass, field, asdict
from typing import Any, Dict, List, Optional
def _has_control_chars(text: str) -> bool:
if not text:
return False
for ch in text:
code = ord(ch)
if code < 32 or code == 127:
return True
return False
@dataclass
class TokenDisplay:
text: str
kind: str # "normal" | "control" | "raw"
@dataclass
class TokenInfo:
byte_start: int
byte_end: int
display: TokenDisplay
is_word: bool
word_id: Optional[int] = None
word_key: Optional[str] = None
bytes_hex: str = ""
compression: Dict[str, str] = field(default_factory=dict)
model_tokens: Dict[str, List[List[Any]]] = field(default_factory=dict)
loss: Dict[str, float] = field(default_factory=dict)
topk: Dict[str, Any] = field(default_factory=dict)
raw_delta: float = 0.0
tuned_delta: float = 0.0
@dataclass
class RenderModel:
text: str
tokens: List[TokenInfo]
meta: Dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> Dict[str, Any]:
return asdict(self)
def build_display(text: str, is_raw: bool = False) -> TokenDisplay:
if is_raw:
return TokenDisplay(text=text, kind="raw")
if _has_control_chars(text):
return TokenDisplay(text=text, kind="control")
return TokenDisplay(text=text, kind="normal")