Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /evaluation /benchmarks.py

bbkdevops

about 1 month ago

download

raw

2.6 kB

	"""Benchmark scaffolds for TinyMind PureField."""

	from __future__ import annotations

	import time
	from typing import Iterable

	import torch
	import torch.nn as nn

	from model.config import OmegaConfig
	from model.sparse_int4 import export_sparse_int4_model


	LONG_CONTEXT_TARGETS = (1_024, 8_192, 32_768, 128_000)


	def build_context_smoke_plan(max_length: int \| None = None) -> list[int]:
	if max_length is None:
	return list(LONG_CONTEXT_TARGETS)
	return [length for length in LONG_CONTEXT_TARGETS if length <= max_length]


	@torch.no_grad()
	def run_purefield_context_smoke(
	model: nn.Module,
	cfg: OmegaConfig,
	lengths: Iterable[int] \| None = None,
	device: torch.device \| str = "cpu",
	) -> list[dict]:
	model = model.to(device).eval()
	rows: list[dict] = []
	for length in (list(lengths) if lengths is not None else build_context_smoke_plan()):
	input_ids = torch.randint(4, cfg.vocab_size, (1, int(length)), device=device)
	t0 = time.perf_counter()
	out = model(input_ids)
	elapsed = max(time.perf_counter() - t0, 1e-9)
	caches = out.get("kv_caches", [])
	first_cache = caches[0] if caches else {}
	memory = first_cache.get("memory")
	local_k = first_cache.get("local_k")
	rows.append(
	{
	"context_tokens": int(length),
	"elapsed_s": elapsed,
	"prefill_tokens_per_sec": float(length / elapsed),
	"logits_finite": bool(torch.isfinite(out["logits"]).all().item()),
	"memory_shape": list(memory.shape) if memory is not None else [],
	"local_window_tokens": int(local_k.shape[1]) if local_k is not None else 0,
	}
	)
	return rows


	def summarize_int4_export(model: nn.Module, cfg: OmegaConfig) -> dict:
	artifact = export_sparse_int4_model(model, quality_gate_delta=cfg.quality_gate_delta)
	total_bytes = 0
	for layer in artifact["layers"].values():
	total_bytes += int(layer.packed_weight.numel())
	total_bytes += int(layer.metadata.numel()) * layer.metadata.element_size()
	total_bytes += int(layer.scales.numel()) * layer.scales.element_size()
	bias = getattr(layer, "bias", None)
	if bias is not None:
	total_bytes += int(bias.numel()) * bias.element_size()
	return {
	"format": artifact["format"],
	"user_alias": artifact["user_alias"],
	"layers": len(artifact["layers"]),
	"artifact_mb_estimate": total_bytes / (1024 * 1024),
	"quality_gate_delta": cfg.quality_gate_delta,
	}

Xet Storage Details

Size:: 2.6 kB
Xet hash:: 3fd3053e9af50cae1932db310e7edab2fdc6401e56a0619c4f2fdead1eec2bc1

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.