bbkdevops's picture
download
raw
2.6 kB
"""Benchmark scaffolds for TinyMind PureField."""
from __future__ import annotations
import time
from typing import Iterable
import torch
import torch.nn as nn
from model.config import OmegaConfig
from model.sparse_int4 import export_sparse_int4_model
LONG_CONTEXT_TARGETS = (1_024, 8_192, 32_768, 128_000)
def build_context_smoke_plan(max_length: int | None = None) -> list[int]:
if max_length is None:
return list(LONG_CONTEXT_TARGETS)
return [length for length in LONG_CONTEXT_TARGETS if length <= max_length]
@torch.no_grad()
def run_purefield_context_smoke(
model: nn.Module,
cfg: OmegaConfig,
lengths: Iterable[int] | None = None,
device: torch.device | str = "cpu",
) -> list[dict]:
model = model.to(device).eval()
rows: list[dict] = []
for length in (list(lengths) if lengths is not None else build_context_smoke_plan()):
input_ids = torch.randint(4, cfg.vocab_size, (1, int(length)), device=device)
t0 = time.perf_counter()
out = model(input_ids)
elapsed = max(time.perf_counter() - t0, 1e-9)
caches = out.get("kv_caches", [])
first_cache = caches[0] if caches else {}
memory = first_cache.get("memory")
local_k = first_cache.get("local_k")
rows.append(
{
"context_tokens": int(length),
"elapsed_s": elapsed,
"prefill_tokens_per_sec": float(length / elapsed),
"logits_finite": bool(torch.isfinite(out["logits"]).all().item()),
"memory_shape": list(memory.shape) if memory is not None else [],
"local_window_tokens": int(local_k.shape[1]) if local_k is not None else 0,
}
)
return rows
def summarize_int4_export(model: nn.Module, cfg: OmegaConfig) -> dict:
artifact = export_sparse_int4_model(model, quality_gate_delta=cfg.quality_gate_delta)
total_bytes = 0
for layer in artifact["layers"].values():
total_bytes += int(layer.packed_weight.numel())
total_bytes += int(layer.metadata.numel()) * layer.metadata.element_size()
total_bytes += int(layer.scales.numel()) * layer.scales.element_size()
bias = getattr(layer, "bias", None)
if bias is not None:
total_bytes += int(bias.numel()) * bias.element_size()
return {
"format": artifact["format"],
"user_alias": artifact["user_alias"],
"layers": len(artifact["layers"]),
"artifact_mb_estimate": total_bytes / (1024 * 1024),
"quality_gate_delta": cfg.quality_gate_delta,
}

Xet Storage Details

Size:
2.6 kB
·
Xet hash:
3fd3053e9af50cae1932db310e7edab2fdc6401e56a0619c4f2fdead1eec2bc1

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.