| import torch |
| from diffsynth import load_state_dict |
| from safetensors.torch import save_file |
|
|
|
|
| class SingleKVCacheModel(torch.nn.Module): |
| def __init__(self, shape): |
| super().__init__() |
| self.k = torch.nn.Parameter(torch.zeros(shape)) |
| self.v = torch.nn.Parameter(torch.zeros(shape)) |
| |
| def forward(self): |
| return (self.k, self.v) |
|
|
|
|
| class StaticKVCacheModel(torch.nn.Module): |
| def __init__(self): |
| super().__init__() |
| self.block_names = [f"double_{i}" for i in range(5)] + [f"single_{i}" for i in range(20)] |
| self.cache = torch.nn.ModuleList([SingleKVCacheModel((1, 4608, 24, 128)) for _ in self.block_names]) |
|
|
| def load_from_kv_cache(self, kv_cache): |
| state_dict = {} |
| for block_id, block_name in enumerate(self.block_names): |
| state_dict[f"cache.{block_id}.k"] = kv_cache[block_name][0] |
| state_dict[f"cache.{block_id}.v"] = kv_cache[block_name][1] |
| self.load_state_dict(state_dict) |
|
|
| @torch.no_grad() |
| def process_inputs(self, **kwargs): |
| return {} |
|
|
| def forward(self, **kwargs): |
| kv_cache = {} |
| for block_name, cache in zip(self.block_names, self.cache): |
| kv_cache[block_name] = cache() |
| return {"kv_cache": kv_cache} |
|
|
|
|
| def convert_from_kv_cache(kv_cache, path): |
| model = StaticKVCacheModel().to(torch.bfloat16) |
| model.load_from_kv_cache(kv_cache) |
| save_file(model.state_dict(), path) |
|
|
|
|
| class DataAnnotator: |
| def __call__(self, **kwargs): |
| return kwargs |
|
|
|
|
| TEMPLATE_MODEL = StaticKVCacheModel |
| TEMPLATE_MODEL_PATH = "model.safetensors" |
| TEMPLATE_DATA_PROCESSOR = DataAnnotator |
|
|