| | import torch |
| | import numpy as np |
| | from transformers import AutoModelForCausalLM, AutoTokenizer |
| | import coremltools as ct |
| |
|
| | |
| | model = AutoModelForCausalLM.from_pretrained("LSX-UniWue/LLaMmlein_1B") |
| | tokenizer = AutoTokenizer.from_pretrained("LSX-UniWue/LLaMmlein_1B") |
| |
|
| | |
| | model.eval() |
| |
|
| | |
| | text = "Ein Beispieltext" |
| | inputs = tokenizer(text, return_tensors="pt") |
| |
|
| | |
| | class ModelWrapper(torch.nn.Module): |
| | def __init__(self, model): |
| | super().__init__() |
| | self.model = model |
| |
|
| | def forward(self, input_ids): |
| | return self.model(input_ids).logits |
| |
|
| | |
| | wrapped_model = ModelWrapper(model) |
| | traced_model = torch.jit.trace(wrapped_model, inputs.input_ids) |
| |
|
| | |
| | model_mlpackage = ct.convert( |
| | traced_model, |
| | inputs=[ |
| | ct.TensorType( |
| | name="input_ids", |
| | shape=inputs.input_ids.shape, |
| | dtype=np.int32 |
| | ) |
| | ], |
| | source="pytorch", |
| | minimum_deployment_target=ct.target.iOS16, |
| | convert_to="mlprogram", |
| | compute_precision=ct.precision.FLOAT16, |
| | compute_units=ct.ComputeUnit.ALL, |
| | ) |
| |
|
| | model_mlpackage.save("LLaMmlein_1B.mlpackage") |
| |
|