| | import os |
| | from typing import Dict, Tuple, Union, Optional |
| |
|
| | from torch.nn import Module |
| | from transformers import AutoModel |
| |
|
| |
|
| | def auto_configure_device_map(num_gpus: int) -> Dict[str, int]: |
| | |
| | |
| | |
| | |
| | num_trans_layers = 28 |
| | per_gpu_layers = 30 / num_gpus |
| |
|
| | |
| | |
| | |
| | |
| | |
| | |
| | device_map = {'transformer.word_embeddings': 0, |
| | 'transformer.final_layernorm': 0, 'lm_head': 0} |
| |
|
| | used = 2 |
| | gpu_target = 0 |
| | for i in range(num_trans_layers): |
| | if used >= per_gpu_layers: |
| | gpu_target += 1 |
| | used = 0 |
| | assert gpu_target < num_gpus |
| | device_map[f'transformer.layers.{i}'] = gpu_target |
| | used += 1 |
| |
|
| | return device_map |
| |
|
| |
|
| | def load_model_on_gpus(checkpoint_path: Union[str, os.PathLike], num_gpus: int = 2, |
| | device_map: Optional[Dict[str, int]] = None, **kwargs) -> Module: |
| | if num_gpus < 2 and device_map is None: |
| | model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half().cuda() |
| | else: |
| | from accelerate import dispatch_model |
| |
|
| | model = AutoModel.from_pretrained(checkpoint_path, trust_remote_code=True, **kwargs).half() |
| |
|
| | if device_map is None: |
| | device_map = auto_configure_device_map(num_gpus) |
| |
|
| | model = dispatch_model(model, device_map=device_map) |
| |
|
| | return model |
| |
|
| |
|
| |
|