Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /evaluation /benchmarks.py
| """Benchmark scaffolds for TinyMind PureField.""" | |
| from __future__ import annotations | |
| import time | |
| from typing import Iterable | |
| import torch | |
| import torch.nn as nn | |
| from model.config import OmegaConfig | |
| from model.sparse_int4 import export_sparse_int4_model | |
| LONG_CONTEXT_TARGETS = (1_024, 8_192, 32_768, 128_000) | |
| def build_context_smoke_plan(max_length: int | None = None) -> list[int]: | |
| if max_length is None: | |
| return list(LONG_CONTEXT_TARGETS) | |
| return [length for length in LONG_CONTEXT_TARGETS if length <= max_length] | |
| def run_purefield_context_smoke( | |
| model: nn.Module, | |
| cfg: OmegaConfig, | |
| lengths: Iterable[int] | None = None, | |
| device: torch.device | str = "cpu", | |
| ) -> list[dict]: | |
| model = model.to(device).eval() | |
| rows: list[dict] = [] | |
| for length in (list(lengths) if lengths is not None else build_context_smoke_plan()): | |
| input_ids = torch.randint(4, cfg.vocab_size, (1, int(length)), device=device) | |
| t0 = time.perf_counter() | |
| out = model(input_ids) | |
| elapsed = max(time.perf_counter() - t0, 1e-9) | |
| caches = out.get("kv_caches", []) | |
| first_cache = caches[0] if caches else {} | |
| memory = first_cache.get("memory") | |
| local_k = first_cache.get("local_k") | |
| rows.append( | |
| { | |
| "context_tokens": int(length), | |
| "elapsed_s": elapsed, | |
| "prefill_tokens_per_sec": float(length / elapsed), | |
| "logits_finite": bool(torch.isfinite(out["logits"]).all().item()), | |
| "memory_shape": list(memory.shape) if memory is not None else [], | |
| "local_window_tokens": int(local_k.shape[1]) if local_k is not None else 0, | |
| } | |
| ) | |
| return rows | |
| def summarize_int4_export(model: nn.Module, cfg: OmegaConfig) -> dict: | |
| artifact = export_sparse_int4_model(model, quality_gate_delta=cfg.quality_gate_delta) | |
| total_bytes = 0 | |
| for layer in artifact["layers"].values(): | |
| total_bytes += int(layer.packed_weight.numel()) | |
| total_bytes += int(layer.metadata.numel()) * layer.metadata.element_size() | |
| total_bytes += int(layer.scales.numel()) * layer.scales.element_size() | |
| bias = getattr(layer, "bias", None) | |
| if bias is not None: | |
| total_bytes += int(bias.numel()) * bias.element_size() | |
| return { | |
| "format": artifact["format"], | |
| "user_alias": artifact["user_alias"], | |
| "layers": len(artifact["layers"]), | |
| "artifact_mb_estimate": total_bytes / (1024 * 1024), | |
| "quality_gate_delta": cfg.quality_gate_delta, | |
| } | |
Xet Storage Details
- Size:
- 2.6 kB
- Xet hash:
- 3fd3053e9af50cae1932db310e7edab2fdc6401e56a0619c4f2fdead1eec2bc1
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.