| def format_predictions(words, predictions): |
| ''' |
| Chuyển đổi danh sách từ và dự đoán sang định dạng (word, label) |
| ''' |
| formatted = [] |
| for word, label in zip(words, predictions): |
| formatted.append((word, label)) |
| return formatted |
|
|
| def process_predictions(predictions): |
| ''' |
| Tách các từ có dấu gạch dưới thành các từ riêng biệt với cùng nhãn |
| ''' |
| formatted = [] |
| for word, label in predictions: |
| if '_' in word: |
| formatted.append((word.replace('_', ' '), label)) |
| else: |
| formatted.append((word, label)) |
| return formatted |
|
|
|
|
| def combine_entities(predictions): |
| combined = [] |
| temp_entity = [] |
| temp_label = None |
|
|
| for word, label in predictions: |
| if label.startswith('B-'): |
| if temp_entity: |
| combined.append((' '.join(temp_entity), temp_label)) |
| temp_entity = [] |
| temp_entity.append(word) |
| temp_label = label |
| elif label.startswith('I-') and temp_label and label[2:] == temp_label[2:]: |
| temp_entity.append(word) |
| else: |
| if temp_entity: |
| combined.append((' '.join(temp_entity), temp_label)) |
| temp_entity = [] |
| temp_label = None |
| combined.append((word, label)) |
|
|
| if temp_entity: |
| combined.append((' '.join(temp_entity), temp_label)) |
|
|
| return combined |
|
|
|
|
|
|
|
|
| def remove_B_prefix(entities): |
| modified_entities = [] |
| for word, label in entities: |
| if label.startswith('B-'): |
| label = label[2:] |
| modified_entities.append((word, label)) |
| return modified_entities |
|
|
|
|
| def combine_i_tags(tokens_labels): |
| combined = [] |
| current_combination = [] |
| current_label = None |
|
|
| for token, label in tokens_labels: |
| if label.startswith('I-'): |
| label = label[2:] |
| if current_label is None: |
| current_label = label |
| current_combination.append(token) |
| elif current_label == label: |
| current_combination.append(token) |
| else: |
| combined.append((' '.join(current_combination), current_label)) |
| current_combination = [token] |
| current_label = label |
| else: |
| if current_combination: |
| combined.append((' '.join(current_combination), current_label)) |
| current_combination = [] |
| current_label = None |
| combined.append((token, label)) |
|
|
| if current_combination: |
| combined.append((' '.join(current_combination), current_label)) |
|
|
| return combined |
|
|
| |
|
|
| |
| |
|
|
|
|
|
|