| import torch |
| import json |
| import tiktoken |
| from safetensors.torch import load_file |
| from modeling_hrm_cosmicfish import HRMCosmicFish, HRMCosmicFishConfig |
|
|
|
|
| def load_model(model_dir, device="cpu"): |
| with open(f"{model_dir}/config.json") as f: |
| cfg = json.load(f) |
|
|
| config = HRMCosmicFishConfig( |
| vocab_size=cfg["vocab_size"], |
| n_embd=cfg["n_embd"], |
| block_size=cfg["block_size"], |
| n_head=cfg["n_head"], |
| n_kv_head=cfg["n_kv_head"], |
| n_input_layers=cfg["n_input_layers"], |
| n_output_layers=cfg["n_output_layers"], |
| hrm_H_layers=cfg["hrm_H_layers"], |
| hrm_L_layers=cfg["hrm_L_layers"], |
| hrm_H_cycles=cfg["hrm_H_cycles"], |
| hrm_L_cycles=cfg["hrm_L_cycles"], |
| hrm_max_steps=cfg["hrm_max_steps"], |
| dropout=0.0, |
| ) |
|
|
| state_dict = load_file(f"{model_dir}/model.safetensors") |
| model = HRMCosmicFish(config) |
| model.load_state_dict(state_dict) |
| model.to(device) |
| model.eval() |
|
|
| tokenizer = tiktoken.get_encoding("gpt2") |
| return model, tokenizer |
|
|
|
|
| def generate(model, tokenizer, prompt, device="cpu", max_new_tokens=100, temperature=0.7, top_k=40): |
| tokens = tokenizer.encode(prompt) |
| idx = torch.tensor(tokens, dtype=torch.long).unsqueeze(0).to(device) |
| with torch.no_grad(): |
| output = model.generate(idx, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k) |
| return tokenizer.decode(output[0].tolist()) |
|
|
|
|
| if __name__ == "__main__": |
| model, tokenizer = load_model(".") |
| prompts = [ |
| "What is the capital of France?", |
| "What is artificial intelligence?", |
| "What does def fibonacci(n): do?", |
| ] |
| for prompt in prompts: |
| result = generate(model, tokenizer, prompt) |
| print(f"Prompt: {prompt}") |
| print(f"Output: {result}") |
| print() |
|
|