Instructions to use refactai/codify_medium_multi with libraries, inference providers, notebooks, and local apps. Follow these links to get started.

Libraries

How to use refactai/codify_medium_multi with Transformers:

# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="refactai/codify_medium_multi", trust_remote_code=True)

# Load model directly
from transformers import AutoModel
model = AutoModel.from_pretrained("refactai/codify_medium_multi", trust_remote_code=True, dtype="auto")

Notebooks
Google Colab
Kaggle
Local Apps

vLLM

How to use refactai/codify_medium_multi with vLLM:

Install from pip and serve model

# Install vLLM from pip:
pip install vllm
# Start the vLLM server:
vllm serve "refactai/codify_medium_multi"
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:8000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "refactai/codify_medium_multi",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker

docker model run hf.co/refactai/codify_medium_multi

SGLang

How to use refactai/codify_medium_multi with SGLang:

Install from pip and serve model

# Install SGLang from pip:
pip install sglang
# Start the SGLang server:
python3 -m sglang.launch_server \
    --model-path "refactai/codify_medium_multi" \
    --host 0.0.0.0 \
    --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "refactai/codify_medium_multi",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Use Docker images

docker run --gpus all \
    --shm-size 32g \
    -p 30000:30000 \
    -v ~/.cache/huggingface:/root/.cache/huggingface \
    --env "HF_TOKEN=<secret>" \
    --ipc=host \
    lmsysorg/sglang:latest \
    python3 -m sglang.launch_server \
        --model-path "refactai/codify_medium_multi" \
        --host 0.0.0.0 \
        --port 30000
# Call the server using curl (OpenAI-compatible API):
curl -X POST "http://localhost:30000/v1/completions" \
	-H "Content-Type: application/json" \
	--data '{
		"model": "refactai/codify_medium_multi",
		"prompt": "Once upon a time,",
		"max_tokens": 512,
		"temperature": 0.5
	}'

Docker Model Runner
How to use refactai/codify_medium_multi with Docker Model Runner:
```
docker model run hf.co/refactai/codify_medium_multi
```

codify_medium_multi / codify /tokenization_codify_fast.py

smallcloudteam

add module

6146892 over 3 years ago

raw

history blame

3.64 kB

	import json
	from typing import TYPE_CHECKING, List, Optional, Tuple

	from tokenizers import pre_tokenizers

	from transformers.tokenization_utils_base import BatchEncoding
	from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
	from transformers.utils import logging


	if TYPE_CHECKING:
	from transformers.pipelines.conversational import Conversation


	logger = logging.get_logger(__name__)

	VOCAB_FILES_NAMES = {"tokenizer_file": "tokenizer.json"}

	PRETRAINED_VOCAB_FILES_MAP = {
	"tokenizer_file": {
	"smallcloudai/codify_medium_multi": "https://huggingface.co/smallcloudai/codify_medium_multi/blob/main/tokenizer.json",
	"smallcloudai/codify_3b_multi": "https://huggingface.co/smallcloudai/codify_3b_multi/blob/main/tokenizer.json",
	},
	}


	class CodifyTokenizerFast(PreTrainedTokenizerFast):
	vocab_files_names = VOCAB_FILES_NAMES
	pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
	model_input_names = ["input_ids", "attention_mask"]
	slow_tokenizer_class = None

	def __init__(
	self,
	vocab_file=None,
	merges_file=None,
	tokenizer_file=None,
	unk_token="<\|endoftext\|>",
	bos_token="<\|endoftext\|>",
	eos_token="<\|endoftext\|>",
	add_prefix_space=False,
	**kwargs
	):
	super().__init__(
	vocab_file,
	merges_file,
	tokenizer_file=tokenizer_file,
	unk_token=unk_token,
	bos_token=bos_token,
	eos_token=eos_token,
	add_prefix_space=add_prefix_space,
	**kwargs,
	)
	pre_tok_state = json.loads(self.backend_tokenizer.pre_tokenizer.__getstate__())
	if pre_tok_state.get("add_prefix_space", add_prefix_space) != add_prefix_space:
	pre_tok_class = getattr(pre_tokenizers, pre_tok_state.pop("type"))
	pre_tok_state["add_prefix_space"] = add_prefix_space
	self.backend_tokenizer.pre_tokenizer = pre_tok_class(**pre_tok_state)

	self.add_prefix_space = add_prefix_space

	def _batch_encode_plus(self, args, *kwargs) -> BatchEncoding:
	is_split_into_words = kwargs.get("is_split_into_words", False)
	if not (self.add_prefix_space or not is_split_into_words):
	raise Exception(
	f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True to use it with"
	" pretokenized inputs."
	)

	return super()._batch_encode_plus(args, *kwargs)

	def _encode_plus(self, args, *kwargs) -> BatchEncoding:
	is_split_into_words = kwargs.get("is_split_into_words", False)

	if not (self.add_prefix_space or not is_split_into_words):
	raise Exception(
	f"You need to instantiate {self.__class__.__name__} with add_prefix_space=True to use it with"
	" pretokenized inputs."
	)

	return super()._encode_plus(args, *kwargs)

	def save_vocabulary(self, save_directory: str, filename_prefix: Optional[str] = None) -> Tuple[str]:
	files = self._tokenizer.model.save(save_directory, name=filename_prefix)
	return tuple(files)

	def _build_conversation_input_ids(self, conversation: "Conversation") -> List[int]:
	"""This corresponds to DialoGPT variants of models."""
	input_ids = []
	for is_user, text in conversation.iter_texts():
	input_ids.extend(self.encode(text, add_special_tokens=False) + [self.eos_token_id])

	if len(input_ids) > self.model_max_length:
	input_ids = input_ids[-self.model_max_length :]
	return input_ids