Buckets:
MisterAI/LocalAI_Demo_backends / cpu-diffusers.upgrade-tmp /venv /lib /python3.10 /site-packages /wcwidth /_width.py
| """This is a high-level width() supporting terminal output.""" | |
| from typing import Literal | |
| # local | |
| from ._wcwidth import wcwidth | |
| from .bisearch import bisearch | |
| from ._wcswidth import wcswidth | |
| from ._constants import (_EMOJI_ZWJ_SET, | |
| _ISC_VIRAMA_SET, | |
| _CATEGORY_MC_TABLE, | |
| _FITZPATRICK_RANGE, | |
| _REGIONAL_INDICATOR_SET) | |
| from .table_vs16 import VS16_NARROW_TO_WIDE | |
| from .text_sizing import TextSizing, TextSizingParams | |
| from .control_codes import ILLEGAL_CTRL, VERTICAL_CTRL, HORIZONTAL_CTRL, ZERO_WIDTH_CTRL | |
| from .table_grapheme import ISC_CONSONANT | |
| from .escape_sequences import (_SEQUENCE_CLASSIFY, | |
| TEXT_SIZING_PATTERN, | |
| CURSOR_MOVEMENT_SEQUENCE, | |
| INDETERMINATE_EFFECT_SEQUENCE, | |
| strip_sequences) | |
| # In 'parse' mode, strings longer than this are checked for cursor-movement | |
| # controls (BS, TAB, CR, cursor sequences); when absent, mode downgrades to | |
| # 'ignore' to skip character-by-character parsing. The detection scan cost is | |
| # negligible for long strings but wasted on short ones like labels or headings. | |
| _WIDTH_FAST_PATH_MIN_LEN = 20 | |
| # Translation table to strip C0/C1 control characters for fast 'ignore' mode. | |
| _CONTROL_CHAR_TABLE = str.maketrans('', '', ( | |
| ''.join(chr(c) for c in range(0x00, 0x20)) + # C0: NUL through US (including tab) | |
| '\x7f' + # DEL | |
| ''.join(chr(c) for c in range(0x80, 0xa0)) # C1: U+0080-U+009F | |
| )) | |
| def _width_ignored_codes(text: str, ambiguous_width: int = 1) -> int: | |
| """ | |
| Fast path for width() with control_codes='ignore'. | |
| Strips escape sequences and control characters, then measures remaining text. | |
| """ | |
| return wcswidth( | |
| strip_sequences(text).translate(_CONTROL_CHAR_TABLE), | |
| ambiguous_width=ambiguous_width | |
| ) | |
| def width( | |
| text: str, | |
| *, | |
| control_codes: Literal['parse', 'strict', 'ignore'] = 'parse', | |
| tabsize: int = 8, | |
| ambiguous_width: int = 1, | |
| ) -> int: | |
| r""" | |
| Return printable width of text containing many kinds of control codes and sequences. | |
| Unlike :func:`wcswidth`, this function handles most control characters and many popular terminal | |
| output sequences. Never returns -1. | |
| :param text: String to measure. | |
| :param control_codes: How to handle control characters and sequences: | |
| - ``'parse'`` (default): Track horizontal cursor movement like BS ``\b``, CR ``\r``, TAB | |
| ``\t``, cursor left and right movement sequences. Vertical movement (LF, VT, FF) and | |
| indeterminate terminal sequences are zero-width. OSC 66 Kitty Text Sizing protocol, OSC 8 | |
| Hyperlink, and many other kinds of output sequences are parsed for displayed measurements. | |
| - ``'strict'``: Like parse, but raises :exc:`ValueError` on control characters with | |
| indeterminate results of the screen or cursor, like clear or vertical movement. Generally, | |
| these should be handled with a virtual terminal emulator (like 'pyte'). | |
| - ``'ignore'``: All C0 and C1 control characters and escape sequences are measured as | |
| width 0. This is the fastest measurement for text already filtered or known not to contain | |
| any kinds of control codes or sequences. TAB ``\t`` is zero-width; to ensure | |
| tab expansion, pre-process text using :func:`str.expandtabs`. | |
| :param tabsize: Tab stop width for ``'parse'`` and ``'strict'`` modes. Default is 8. | |
| Must be positive. Has no effect when ``control_codes='ignore'``. | |
| :param ambiguous_width: Width to use for East Asian Ambiguous (A) | |
| characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts. | |
| :returns: Maximum cursor position reached, "extent", accounting for cursor movement sequences | |
| present in ``text`` according to given parameters. This represents the rightmost column the | |
| cursor reaches. Always a non-negative integer. | |
| :raises ValueError: If ``control_codes='strict'`` and control characters with indeterminate | |
| effects, such as vertical movement or clear sequences are encountered, or on unexpected | |
| C0 or C1 control code. Also raised when ``control_codes`` is not one of the valid values. | |
| .. versionadded:: 0.3.0 | |
| .. versionchanged:: 0.7.0 | |
| Expanded strict-mode to raise :exc:`ValueError` when cursor-left movement | |
| (CSI D) would move beyond the beginning of the string. Previously, cursor-left | |
| was silently clamped to column 0 in all modes. | |
| Support horizontal cursor sequences (``cub``, ``cuf``, ``hpa``). Cursor-left (``cub``) or | |
| backspace (``\b``) now overwrites text. ``column_address`` (``hpa``) and carriage return | |
| (``\r``) are now parsed, and some values conditionally raise ``ValueError`` when | |
| ``control_codes='parse'``. | |
| Examples:: | |
| >>> width('hello') | |
| 5 | |
| >>> width('コンニチハ') | |
| 10 | |
| >>> width('\x1b[31mred\x1b[0m') | |
| 3 | |
| >>> width('\x1b[31mred\x1b[0m', control_codes='ignore') # same result (ignored) | |
| 3 | |
| >>> width('123\b4') # backspace overwrites previous cell (outputs '124') | |
| 3 | |
| >>> width('abc\t') # tab caused cursor to move to column 8 | |
| 8 | |
| >>> width('1\x1b[10C') # '1' + cursor right 10, cursor ends on column 11 | |
| 11 | |
| >>> width('1\x1b[10C', control_codes='ignore') # faster but wrong in this case | |
| 1 | |
| """ | |
| # pylint: disable=too-complex,too-many-branches,too-many-statements,too-many-locals | |
| # This could be broken into sub-functions (#1, #3, and #6 especially), but for reduced overhead | |
| # in consideration of this function a likely "hot path", they are inline, breaking many pylint | |
| # complexity rules. | |
| # Fast path for ASCII printable (no tabs, escapes, or control chars) | |
| if text.isascii() and text.isprintable(): | |
| return len(text) | |
| # Fast parse: if no horizontal cursor movements are possible, switch to 'ignore' mode. | |
| # Only check longer strings - the detection overhead hurts short string performance. | |
| if control_codes == 'parse' and len(text) > _WIDTH_FAST_PATH_MIN_LEN: | |
| # Check for cursor-affecting control characters | |
| if '\b' not in text and '\t' not in text and '\r' not in text: | |
| # Check for escape sequences, if none contain cursor movement or | |
| # text sizing, downgrade to 'ignore' | |
| if '\x1b' not in text or ( | |
| not CURSOR_MOVEMENT_SEQUENCE.search(text) | |
| and not TEXT_SIZING_PATTERN.search(text) | |
| ): | |
| control_codes = 'ignore' | |
| # Fast path for ignore mode, useful if you know the text is already free of control codes | |
| if control_codes == 'ignore': | |
| return _width_ignored_codes(text, ambiguous_width) | |
| strict = control_codes == 'strict' | |
| # Track absolute positions: tab stops need modulo on absolute column, CR resets to 0. | |
| # Initialize max_extent to 0 so backward movement (CR, BS) won't yield negative width. | |
| current_col = 0 | |
| max_extent = 0 | |
| idx = 0 | |
| text_len = len(text) | |
| # Select wcwidth call pattern for best lru_cache performance: | |
| # - ambiguous_width=1 (default): single-arg calls share cache with direct wcwidth() calls | |
| # - ambiguous_width=2: full positional args needed (results differ, separate cache is correct) | |
| _wcwidth = wcwidth if ambiguous_width == 1 else lambda c: wcwidth(c, 'auto', ambiguous_width) | |
| # grapheme-clustering state | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| last_was_virama = False | |
| conjunct_pending = False | |
| while idx < text_len: | |
| char = text[idx] | |
| # 1. ESC sequences | |
| if char == '\x1b': | |
| m = _SEQUENCE_CLASSIFY.match(text, idx) | |
| if not m: | |
| # 1a. Errant ESC or unknown sequence: only the first character is zero-width | |
| idx += 1 | |
| else: | |
| seq = m.group() | |
| if strict and INDETERMINATE_EFFECT_SEQUENCE.match(seq): | |
| raise ValueError(f"Indeterminate cursor sequence at position {idx}, {seq!r}") | |
| # 2b. horizontal position absolute (before forward/backward to | |
| # avoid other_seq match in _SEQUENCE_CLASSIFY) | |
| if (hpa_n := m.group('hpa_n')) is not None: | |
| target_col = int(hpa_n) if hpa_n else 1 | |
| if strict: | |
| raise ValueError( | |
| f"Indeterminate horizontal position at position {idx}, " | |
| f"{seq!r} (absolute column unknown)" | |
| ) | |
| current_col = target_col - 1 # HPA is 1-indexed, convert to 0-indexed | |
| # 2c. cursor forward, backward | |
| elif (cforward_n := m.group('cforward_n')) is not None: | |
| current_col += int(cforward_n) if cforward_n else 1 | |
| elif (cbackward_n := m.group('cbackward_n')) is not None: | |
| n_backward = int(cbackward_n) if cbackward_n else 1 | |
| if strict and n_backward > current_col: | |
| raise ValueError( | |
| f"Cursor left movement at position {idx} would move " | |
| f"{n_backward} cells left from column {current_col}, " | |
| f"exceeding string start" | |
| ) | |
| current_col = max(0, current_col - n_backward) | |
| # 2d. OSC 66 Text Sizing — has positive display width | |
| elif (ts_meta := m.group('ts_meta')) is not None: | |
| ts_text = m.group('ts_text') | |
| ts_term = m.group('ts_term') | |
| assert ts_text is not None and ts_term is not None | |
| text_size = TextSizing( | |
| TextSizingParams.from_params(ts_meta, control_codes=control_codes), | |
| ts_text, ts_term) | |
| current_col += text_size.display_width(ambiguous_width) | |
| # 2e. SGR and other zero-width sequences -- no column advance | |
| idx = m.end() | |
| # Escape sequences break VS16 adjacency: reset last-measured state | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| max_extent = max(max_extent, current_col) | |
| continue | |
| # 2. Vertical or Illegal control characters zero width or error when 'strict' | |
| if char in ILLEGAL_CTRL: | |
| if strict: | |
| raise ValueError(f"Illegal control character {ord(char):#x} at position {idx}") | |
| idx += 1 | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| continue | |
| if char in VERTICAL_CTRL: | |
| if strict: | |
| raise ValueError(f"Vertical movement character {ord(char):#x} at position {idx}") | |
| idx += 1 | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| continue | |
| # 3. Horizontal movement characters | |
| if char in HORIZONTAL_CTRL: | |
| if char == '\t' and tabsize > 0: | |
| current_col += tabsize - (current_col % tabsize) | |
| elif char == '\b': | |
| if current_col > 0: | |
| current_col -= 1 | |
| elif char == '\r': | |
| if strict: | |
| raise ValueError( | |
| f"Horizontal movement character \\r at position {idx}: " | |
| "indeterminate starting column" | |
| ) | |
| current_col = 0 | |
| max_extent = max(max_extent, current_col) | |
| idx += 1 | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| continue | |
| # 4. Zero-width control characters | |
| if char in ZERO_WIDTH_CTRL: | |
| idx += 1 | |
| last_measured_idx = -2 | |
| last_measured_ucs = -1 | |
| continue | |
| # 5. Inline grapheme-clustering: ZWJ, VS16, Regional Indicators, | |
| # Fitzpatrick, Virama conjuncts, Mc, wcwidth | |
| ucs = ord(char) | |
| # ZWJ (U+200D) | |
| if ucs == 0x200D: | |
| if last_was_virama: | |
| idx += 1 | |
| elif idx + 1 < text_len: | |
| last_was_virama = False | |
| idx += 2 | |
| else: | |
| last_was_virama = False | |
| idx += 1 | |
| continue | |
| # VS16 (U+FE0F): converts preceding narrow character to wide. | |
| if ucs == 0xFE0F and last_measured_idx >= 0: | |
| if bisearch(ord(text[last_measured_idx]), VS16_NARROW_TO_WIDE['9.0.0']): | |
| current_col += 1 | |
| max_extent = max(max_extent, current_col) | |
| last_measured_idx = -2 # prevent double application | |
| idx += 1 | |
| continue | |
| # Regional Indicator & Fitzpatrick (both above BMP) | |
| if ucs > 0xFFFF: | |
| if ucs in _REGIONAL_INDICATOR_SET: | |
| ri_before = 0 | |
| j = idx - 1 | |
| while j >= 0 and ord(text[j]) in _REGIONAL_INDICATOR_SET: | |
| ri_before += 1 | |
| j -= 1 | |
| if ri_before % 2 == 1: | |
| last_measured_ucs = ucs | |
| idx += 1 | |
| continue | |
| elif (_FITZPATRICK_RANGE[0] <= ucs <= _FITZPATRICK_RANGE[1] | |
| and last_measured_ucs in _EMOJI_ZWJ_SET): | |
| idx += 1 | |
| continue | |
| # Virama conjunct formation | |
| if last_was_virama and bisearch(ucs, ISC_CONSONANT): | |
| last_measured_idx = idx | |
| last_measured_ucs = ucs | |
| last_was_virama = False | |
| conjunct_pending = True | |
| idx += 1 | |
| continue | |
| # Normal character: measure with wcwidth | |
| w = _wcwidth(char) | |
| if w > 0: | |
| if conjunct_pending: | |
| current_col += 1 | |
| conjunct_pending = False | |
| current_col += w | |
| max_extent = max(max_extent, current_col) | |
| last_measured_idx = idx | |
| last_measured_ucs = ucs | |
| last_was_virama = False | |
| elif last_measured_idx >= 0 and bisearch(ucs, _CATEGORY_MC_TABLE): | |
| # Spacing Combining Mark (Mc) following a base character adds 1 | |
| current_col += 1 | |
| max_extent = max(max_extent, current_col) | |
| last_measured_idx = -2 | |
| last_was_virama = False | |
| conjunct_pending = False | |
| else: | |
| last_was_virama = ucs in _ISC_VIRAMA_SET | |
| idx += 1 | |
| if conjunct_pending: | |
| current_col += 1 | |
| max_extent = max(max_extent, current_col) | |
| return max_extent | |
Xet Storage Details
- Size:
- 14.8 kB
- Xet hash:
- 15f7c745a2ca2e0001581c265dba2d48a634fc34dc63640b4ff00adb3a1e0478
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.