Buckets:
MisterAI/LocalAI_Demo_backends / cpu-diffusers.upgrade-tmp /venv /lib /python3.10 /site-packages /wcwidth /_wcwidth.py
| """ | |
| This is a python implementation of wcwidth() and wcswidth(). | |
| https://github.com/jquast/wcwidth | |
| Derived from Markus Kuhn's C code, | |
| This is an implementation of wcwidth() and wcswidth() (defined in | |
| IEEE Std 1002.1-2001) for Unicode. | |
| http://www.opengroup.org/onlinepubs/007904975/functions/wcwidth.html | |
| http://www.opengroup.org/onlinepubs/007904975/functions/wcswidth.html | |
| In fixed-width output devices, Latin characters all occupy a single | |
| "cell" position of equal width, whereas ideographic CJK characters | |
| occupy two such cells. Interoperability between terminal-line | |
| applications and (teletype-style) character terminals using the | |
| UTF-8 encoding requires agreement on which character should advance | |
| the cursor by how many cell positions. No established formal | |
| standards exist at present on which Unicode character shall occupy | |
| how many cell positions on character terminals. These routines are | |
| a first attempt of defining such behavior based on simple rules | |
| applied to data provided by the Unicode Consortium. | |
| For some graphical characters, the Unicode standard explicitly | |
| defines a character-cell width via the definition of the East Asian | |
| FullWidth (F), Wide (W), Half-width (H), and Narrow (Na) classes. | |
| In all these cases, there is no ambiguity about which width a | |
| terminal shall use. For characters in the East Asian Ambiguous (A) | |
| class, the width choice depends purely on a preference of backward | |
| compatibility with either historic CJK or Western practice. | |
| Choosing single-width for these characters is easy to justify as | |
| the appropriate long-term solution, as the CJK practice of | |
| displaying these characters as double-width comes from historic | |
| implementation simplicity (8-bit encoded characters were displayed | |
| single-width and 16-bit ones double-width, even for Greek, | |
| Cyrillic, etc.) and not any typographic considerations. | |
| Much less clear is the choice of width for the Not East Asian | |
| (Neutral) class. Existing practice does not dictate a width for any | |
| of these characters. It would nevertheless make sense | |
| typographically to allocate two character cells to characters such | |
| as for instance EM SPACE or VOLUME INTEGRAL, which cannot be | |
| represented adequately with a single-width glyph. The following | |
| routines at present merely assign a single-cell width to all | |
| neutral characters, in the interest of simplicity. This is not | |
| entirely satisfactory and should be reconsidered before | |
| establishing a formal standard in this area. At the moment, the | |
| decision which Not East Asian (Neutral) characters should be | |
| represented by double-width glyphs cannot yet be answered by | |
| applying a simple rule from the Unicode database content. Setting | |
| up a proper standard for the behavior of UTF-8 character terminals | |
| will require a careful analysis not only of each Unicode character, | |
| but also of each presentation form, something the author of these | |
| routines has avoided to do so far. | |
| http://www.unicode.org/unicode/reports/tr11/ | |
| Latest version: http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c | |
| """ | |
| from __future__ import annotations | |
| # std imports | |
| from functools import lru_cache | |
| # local | |
| from .bisearch import bisearch | |
| from ._constants import _LATEST_VERSION, _AMBIGUOUS_TABLE, _ZERO_WIDTH_TABLE, _WIDE_EASTASIAN_TABLE | |
| def _wcversion_value(ver_string: str) -> tuple[int, ...]: # pragma: no cover | |
| """ | |
| Integer-mapped value of given dotted version string. | |
| .. deprecated:: 0.3.0 | |
| This function is no longer used internally by wcwidth but is retained | |
| for API compatibility with external tools. | |
| :param ver_string: Unicode version string, of form ``n.n.n``. | |
| :returns: tuple of digit tuples, ``tuple(int, [...])``. | |
| """ | |
| retval = tuple(map(int, (ver_string.split('.')))) | |
| return retval | |
| def _wcmatch_version(given_version: str) -> str: # pylint: disable=unused-argument | |
| """ | |
| Return the supported Unicode version level. | |
| .. deprecated:: 0.3.0 | |
| This function now always returns the latest version. | |
| This function is no longer used internally by wcwidth but is retained | |
| for API compatibility with external tools. | |
| :param given_version: Ignored. Any value is accepted for compatibility. | |
| :returns: The latest unicode version string. | |
| """ | |
| return _LATEST_VERSION | |
| # maxsize=1024: western scripts need ~64 unique codepoints per session, but | |
| # CJK sessions may use ~2000 of ~3500 common hanzi/kanji. 1024 accommodates | |
| # heavy CJK use. Performance floor at 32; bisearch is ~100ns per miss. | |
| def wcwidth(wc: str, unicode_version: str = 'auto', ambiguous_width: int = 1) -> int: # pylint: disable=unused-argument | |
| r""" | |
| Given one Unicode codepoint, return its printable length on a terminal. | |
| :param wc: A single Unicode character. | |
| :param unicode_version: Ignored. Retained for backwards compatibility. | |
| .. deprecated:: 0.3.0 | |
| Only the latest Unicode version is now shipped. | |
| :param ambiguous_width: Width to use for East Asian Ambiguous (A) | |
| characters. Default is ``1`` (narrow). Set to ``2`` for CJK contexts | |
| where ambiguous characters display as double-width. See | |
| :ref:`ambiguous_width` for details. | |
| :returns: The width, in cells, necessary to display the character of | |
| Unicode string character, ``wc``. Returns 0 if the ``wc`` argument has | |
| no printable effect on a terminal (such as NUL '\0'), -1 if ``wc`` is | |
| not printable, or has an indeterminate effect on the terminal, such as | |
| a control character. Otherwise, the number of column positions the | |
| character occupies on a graphic terminal (1 or 2) is returned. | |
| See :ref:`Specification` for details of cell measurement. | |
| """ | |
| ucs = ord(wc) if wc else 0 | |
| # small optimization: early return of 1 for printable ASCII, this provides | |
| # approximately 40% performance improvement for mostly-ascii documents, with | |
| # less than 1% impact to others. | |
| if 32 <= ucs < 0x7f: | |
| return 1 | |
| # C0/C1 control characters are -1 for compatibility with POSIX-like calls | |
| if ucs and ucs < 32 or 0x07F <= ucs < 0x0A0: | |
| return -1 | |
| # Zero width | |
| if bisearch(ucs, _ZERO_WIDTH_TABLE): | |
| return 0 | |
| # Wide (F/W categories) | |
| if bisearch(ucs, _WIDE_EASTASIAN_TABLE): | |
| return 2 | |
| # Ambiguous width (A category) - only when ambiguous_width=2 | |
| if ambiguous_width == 2 and bisearch(ucs, _AMBIGUOUS_TABLE): | |
| return 2 | |
| return 1 | |
Xet Storage Details
- Size:
- 6.5 kB
- Xet hash:
- 3722d1328b7860405d9491f4bb19d6e55dae64356c6b67e7a7576a49de6ed211
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.