| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| from __future__ import absolute_import |
| from __future__ import print_function |
| from __future__ import unicode_literals |
| import re |
| import sys |
|
|
|
|
|
|
| from hwp5.dataio import PrimitiveType |
| from hwp5.dataio import UINT32 |
| from hwp5.dataio import UINT16 |
| from hwp5.dataio import UINT8 |
|
|
|
|
| PY3 = sys.version_info.major == 3 |
| if PY3: |
| unichr = chr |
|
|
|
|
| class CHID(str, metaclass=PrimitiveType): |
|
|
| fixed_size = 4 |
|
|
| |
| GSO = 'gso ' |
| TBL = 'tbl ' |
| LINE = '$lin' |
| RECT = '$rec' |
| ELLI = '$ell' |
| ARC = '$arc' |
| POLY = '$pol' |
| CURV = '$cur' |
| EQED = 'eqed' |
| PICT = '$pic' |
| OLE = '$ole' |
| CONTAINER = '$con' |
|
|
| |
| SECD = 'secd' |
| COLD = 'cold' |
| HEADER = 'head' |
| FOOTER = 'foot' |
| FN = 'fn ' |
| EN = 'en ' |
| ATNO = 'atno' |
| NWNO = 'nwno' |
| PGHD = 'pghd' |
| PGCT = 'pgct' |
| PGNP = 'pgnp' |
| IDXM = 'idxm' |
| BOKM = 'bokm' |
| TCPS = 'tcps' |
| TDUT = 'tdut' |
| TCMT = 'tcmt' |
|
|
| |
| FIELD_UNK = '%unk' |
| FIELD_DTE = '%dte' |
| FIELD_DDT = '%ddt' |
| FIELD_PAT = '%pat' |
| FIELD_BMK = '%bmk' |
| FIELD_MMG = '%mmg' |
| FIELD_XRF = '%xrf' |
| FIELD_FMU = '%fmu' |
| FIELD_CLK = '%clk' |
| FIELD_SMR = '%smr' |
| FIELD_USR = '%usr' |
| FIELD_HLK = '%hlk' |
| FIELD_REVISION_SIGN = '%sig' |
| FIELD_REVISION_DELETE = '%%*d' |
| FIELD_REVISION_ATTACH = '%%*a' |
| FIELD_REVISION_CLIPPING = '%%*C' |
| FIELD_REVISION_SAWTOOTH = '%%*S' |
| FIELD_REVISION_THINKING = '%%*T' |
| FIELD_REVISION_PRAISE = '%%*P' |
| FIELD_REVISION_LINE = '%%*L' |
| FIELD_REVISION_SIMPLECHANGE = '%%*c' |
| FIELD_REVISION_HYPERLINK = '%%*h' |
| FIELD_REVISION_LINEATTACH = '%%*A' |
| FIELD_REVISION_LINELINK = '%%*i' |
| FIELD_REVISION_LINETRANSFER = '%%*t' |
| FIELD_REVISION_RIGHTMOVE = '%%*r' |
| FIELD_REVISION_LEFTMOVE = '%%*l' |
| FIELD_REVISION_TRANSFER = '%%*n' |
| FIELD_REVISION_SIMPLEINSERT = '%%*e' |
| FIELD_REVISION_SPLIT = '%spl' |
| FIELD_REVISION_CHANGE = '%%mr' |
| FIELD_MEMO = '%%me' |
| FIELD_PRIVATE_INFO_SECURITY = '%cpr' |
|
|
| def decode(bytes, context=None): |
| if PY3: |
| return ( |
| chr(bytes[3]) + |
| chr(bytes[2]) + |
| chr(bytes[1]) + |
| chr(bytes[0]) |
| ) |
| else: |
| return bytes[3] + bytes[2] + bytes[1] + bytes[0] |
| decode = staticmethod(decode) |
|
|
|
|
| class ControlChar(object): |
| class CHAR(object): |
| size = 1 |
|
|
| class INLINE(object): |
| size = 8 |
|
|
| class EXTENDED(object): |
| size = 8 |
| chars = {0x00: ('NULL', CHAR), |
| 0x01: ('CTLCHR01', EXTENDED), |
| 0x02: ('SECTION_COLUMN_DEF', EXTENDED), |
| 0x03: ('FIELD_START', EXTENDED), |
| 0x04: ('FIELD_END', INLINE), |
| 0x05: ('CTLCHR05', INLINE), |
| 0x06: ('CTLCHR06', INLINE), |
| 0x07: ('CTLCHR07', INLINE), |
| 0x08: ('TITLE_MARK', INLINE), |
| 0x09: ('TAB', INLINE), |
| 0x0a: ('LINE_BREAK', CHAR), |
| 0x0b: ('DRAWING_TABLE_OBJECT', EXTENDED), |
| 0x0c: ('CTLCHR0C', EXTENDED), |
| 0x0d: ('PARAGRAPH_BREAK', CHAR), |
| 0x0e: ('CTLCHR0E', EXTENDED), |
| 0x0f: ('HIDDEN_EXPLANATION', EXTENDED), |
| 0x10: ('HEADER_FOOTER', EXTENDED), |
| 0x11: ('FOOT_END_NOTE', EXTENDED), |
| 0x12: ('AUTO_NUMBER', EXTENDED), |
| 0x13: ('CTLCHR13', INLINE), |
| 0x14: ('CTLCHR14', INLINE), |
| 0x15: ('PAGE_CTLCHR', EXTENDED), |
| 0x16: ('BOOKMARK', EXTENDED), |
| 0x17: ('CTLCHR17', EXTENDED), |
| 0x18: ('HYPHEN', CHAR), |
| 0x1e: ('NONBREAK_SPACE', CHAR), |
| 0x1f: ('FIXWIDTH_SPACE', CHAR)} |
| names = dict((unichr(code), name) for code, (name, kind) in chars.items()) |
| kinds = dict((unichr(code), kind) for code, (name, kind) in chars.items()) |
|
|
| def _populate(cls): |
| for ch, name in cls.names.items(): |
| setattr(cls, name, ch) |
| _populate = classmethod(_populate) |
| REGEX_CONTROL_CHAR = re.compile(b'[\x00-\x1f]\x00') |
|
|
| def find(cls, data, start_idx): |
| while True: |
| m = cls.REGEX_CONTROL_CHAR.search(data, start_idx) |
| if m is not None: |
| i = m.start() |
| if i & 1 == 1: |
| start_idx = i + 1 |
| continue |
| if PY3: |
| char = unichr(data[i]) |
| else: |
| char = unichr(ord(data[i])) |
| size = cls.kinds[char].size |
| return i, i + (size * 2) |
| data_len = len(data) |
| return data_len, data_len |
| find = classmethod(find) |
|
|
| def decode(cls, bytes): |
| code = UINT16.decode(bytes[0:2]) |
| ch = unichr(code) |
| if cls.kinds[ch].size == 8: |
| bytes = bytes[2:2 + 12] |
| if ch == ControlChar.TAB: |
| param = dict(width=UINT32.decode(bytes[0:4]), |
| unknown0=UINT8.decode(bytes[4:5]), |
| unknown1=UINT8.decode(bytes[5:6]), |
| unknown2=bytes[6:]) |
| return dict(code=code, param=param) |
| else: |
| chid = CHID.decode(bytes[0:4]) |
| param = bytes[4:12] |
| return dict(code=code, chid=chid, param=param) |
| else: |
| return dict(code=code) |
| decode = classmethod(decode) |
|
|
| def get_kind_by_code(cls, code): |
| ch = unichr(code) |
| return cls.kinds[ch] |
| get_kind_by_code = classmethod(get_kind_by_code) |
|
|
| def get_name_by_code(cls, code): |
| ch = unichr(code) |
| return cls.names.get(ch, 'CTLCHR%02x' % code) |
| get_name_by_code = classmethod(get_name_by_code) |
|
|
|
|
| ControlChar._populate() |
|
|