""" Fallback segmentation utilities. Used for offline tests or snapshot generation when model tokenizers are unavailable. """ from typing import Dict, List def fallback_token_info(text: str) -> Dict[str, List]: """Return minimal token info using UTF-8 codepoint boundaries.""" boundaries = [0] byte_pos = 0 for ch in text: byte_pos += len(ch.encode("utf-8")) boundaries.append(byte_pos) return { "common_boundaries": boundaries, "qwen_tokens": [], "rwkv_tokens": [], "byte_to_qwen": {}, "byte_to_rwkv": {}, }