File size: 7,707 Bytes
9d7cf7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Dict

import numpy as np
from numpy import ndarray

from typing import Union, List, Tuple, Optional
from dataclasses import dataclass

@dataclass
class TokenizeInput():
    # (J, 3)
    joints: ndarray
    
    # (J)
    parents: List[Union[None, int]]
    
    # string of class in tokenizer
    cls: Optional[str]=None
    
    joint_names: Optional[List[str]]=None
    
    @property
    def J(self) -> int:
        return self.joints.shape[0]
    
    @property
    def branch(self) -> ndarray:
        if not hasattr(self, '_branch'):
            branch = []
            last = None
            for i in range(self.J):
                if i == 0:
                    branch.append(False)
                else:
                    pid = self.parents[i]
                    branch.append(pid!=last)
                last = i
            self._branch = np.array(branch, dtype=bool)
        return self._branch
    
    @property
    def bones(self):
        _p = self.parents.copy()
        _p[0] = 0
        return np.concatenate([self.joints[_p], self.joints], axis=1)
    
    @property
    def num_bones(self):
        return self.bones.shape[0]

@dataclass
class DetokenizeOutput():
    # original tokens
    tokens: ndarray

    # (J, 6), (parent position, position)
    bones: ndarray
    
    # (J), parent of each bone
    parents: List[int]
    
    # string of class in tokenizer
    cls: Optional[str]=None
    
    # names of joints
    joint_names: Optional[List[str]]=None
    
    continuous_range: Optional[Tuple[float, float]]=None
    
    @property
    def joints(self):
        return self.bones[:, 3:]
    
    @property
    def p_joints(self):
        return self.bones[:, :3]
    
    @property
    def num_bones(self):
        return self.bones.shape[0]    
    
    @property
    def J(self):
        return self.bones.shape[0]
    
    def _get_parents(self) -> List[int]:
        parents = []
        for (i, bone) in enumerate(self.bones):
            p_joint = bone[:3]
            dis = 999999
            pid = -1
            for j in reversed(range(i)):
                n_dis = ((self.bones[j][3:] - p_joint)**2).sum()
                if n_dis < dis:
                    pid = j
                    dis = n_dis
            parents.append(pid)
        return parents

class Tokenizer(ABC):
    """
    Abstract class for tokenizer
    """
    
    @classmethod
    @abstractmethod
    def parse(cls, **kwags) -> 'Tokenizer':
        pass
    
    @abstractmethod
    def tokenize(self, input: TokenizeInput) -> ndarray:
        pass
    
    @abstractmethod
    def detokenize(self, ids: ndarray, **kwargs) -> DetokenizeOutput:
        pass
    
    @property
    @abstractmethod
    def vocab_size(self) -> int:
        """The vocabulary size"""
        raise NotImplementedError()
    
    @property
    def pad(self):
        raise NotImplementedError("{} has no attribute 'pad'".format(type(self).__name__))
    
    @property
    def bos(self):
        raise NotImplementedError("{} has no attribute 'bos'".format(type(self).__name__))

    @property
    def eos(self):
        raise NotImplementedError("{} has no attribute 'eos'".format(type(self).__name__))
    
    def cls_name_to_token(self, cls: str) -> int:
        raise NotImplementedError()
    
    def next_posible_token(self, ids: ndarray) -> List[int]:
        raise NotImplementedError()
    
    def bones_in_sequence(self, ids: ndarray) -> int:
        raise NotImplementedError()
    
    def make_cls_head(self, **kwargs) -> List[int]:
        raise NotImplementedError()

def make_skeleton(
    joints: ndarray,
    p_joints: ndarray,
    tails_dict: Dict[int, ndarray],
    convert_leaf_bones_to_tails: bool,
    extrude_tail_for_leaf: bool,
    extrude_tail_for_branch: bool,
    extrude_scale: float=0.5,
    strict: bool=False,
) -> Tuple[ndarray, ndarray, List[int], List[int]]:
    '''
    Args:
        joints: heads of bones
        
        p_joints: parent position of joints
        
        tails_dict: tail position of the i-th joint
        
        convert_leaf_bones_to_tails: remove leaf bones and make them tails of their parents
        
        extrude_tail_for_leaf: add a tail for leaf bone
        
        extrude_tail_for_branch: add a tail for joint with multiple children
        
        extrude_scale: length scale of tail offset
        
        strict: if true, raise error when there are joints in the same location
        
    Returns:
        bones, tails, available_bones_id, parents
    '''
    assert (convert_leaf_bones_to_tails & extrude_tail_for_leaf)==False, 'cannot extrude tail for leaf when convert_leaf_bones_to_tails is True'
    assert joints.shape[0] == p_joints.shape[0]
    # build parents
    bones = [] # (parent_position, position)
    parents = []
    for (i, joint) in enumerate(joints):
        if len(bones) == 0:
            bones.append(np.concatenate([joint, joint])) # root
            parents.append(-1)
            continue
        p_joint = p_joints[i]
        dis = 999999
        pid = None
        for j in reversed(range(i)):
            n_dis = ((bones[j][3:] - p_joint)**2).sum()
            if n_dis < dis:
                pid = j
                dis = n_dis
        bones.append(np.concatenate([joints[pid], joint]))
        parents.append(pid)
    bones = np.stack(bones)
    
    children = defaultdict(list)
    for (i, pid) in enumerate(parents):
        if pid == -1:
            continue
        children[pid].append(i)
    
    available_bones_id = []
    if convert_leaf_bones_to_tails:
        for (i, pid) in enumerate(parents):
            if len(children[i]) != 0:
                available_bones_id.append(i)
                continue
            tails_dict[pid] = bones[i, 3:]
    else:
        available_bones_id = [i for i in range(bones.shape[0])]
    
    # tail for leaf
    for (i, pid) in enumerate(parents):
        if len(children[i]) != 0:
            continue
        if extrude_tail_for_leaf:
            d = bones[i, 3:] - bones[pid, 3:]
            length = np.linalg.norm(d)
            if strict:
                assert length > 1e-9, 'two joints in the same point found'
            elif length <= 1e-9:
                d = np.array([0., 0., 1.])
            tails_dict[i] = bones[i, 3:] + d * extrude_scale
        else:
            tails_dict[i] = bones[i, 3:]
    
    # tail for branch
    for (i, pid) in enumerate(parents):
        if len(children[i]) <= 1:
            continue
        if extrude_tail_for_branch:
            if pid == -1: # root
                av_len = 0
                for child in children[i]:
                    av_len += np.linalg.norm(bones[i, 3:] - bones[child, 3:])
                av_len /= len(children[i])
                d = bones[i, 3:] + np.array([0., 0., extrude_scale * av_len])
            else:
                d = bones[i, 3:] - bones[pid, 3:]
                length = np.linalg.norm(d)
                if strict:
                    assert length > 1e-9, 'two joints in the same point found'
                elif length <= 1e-9:
                    d = np.array([0., 0., 1.])
            tails_dict[i] = bones[i, 3:] + d * extrude_scale
        else:
            tails_dict[i] = bones[i, 3:]
    
    # assign new tail
    for (i, pid) in enumerate(parents):
        if len(children[i]) != 1:
            continue
        child = children[i][0]
        tails_dict[i] = bones[child, 3:]
    
    tails = []
    for i in range(bones.shape[0]):
        tails.append(tails_dict[i])
    tails = np.stack(tails)
    return bones, tails, available_bones_id, parents