import json class Tokenizer: def __init__(self, vocab): self.token_to_id = vocab self.id_to_token = {v: k for k, v in vocab.items()} @classmethod def load(cls, path): with open(path, "r", encoding="utf-8") as f: data = json.load(f) return cls(data["token_to_id"]) def encode(self, text): return [self.token_to_id.get(c, 0) for c in text] def decode(self, tokens): return "".join([self.id_to_token.get(t, "") for t in tokens])