| | --- |
| | license: apache-2.0 |
| | --- |
| | |
| | Here is a code to create this tiny model: |
| |
|
| | ```python |
| | import os |
| | import torch |
| | torch.set_default_dtype(torch.bfloat16) |
| | from transformers import AutoTokenizer, AutoConfig, Cohere2ForCausalLM, AutoModelForCausalLM |
| | |
| | model_id = "CohereLabs/tiny-aya-base" |
| | config = AutoConfig.from_pretrained(model_id) |
| | |
| | config.num_hidden_layers=2 |
| | config.layer_types=[ |
| | "sliding_attention", |
| | "full_attention", |
| | ] |
| | config.num_attention_heads=4 |
| | config.hidden_size=4 |
| | config.intermediate_size=5 |
| | |
| | model = Cohere2ForCausalLM(config) |
| | tokenizer = AutoTokenizer.from_pretrained(model_id) |
| | |
| | output_dir = "./tiny-random-aya-base/" |
| | os.makedirs(output_dir, exist_ok=True) |
| | model.save_pretrained(output_dir, safe_serialization=True) |
| | tokenizer.save_pretrained(output_dir) |
| | ``` |
| |
|