bbkdevops's picture
download
raw
3.39 kB
"""Long-memory evidence helpers for TinyMind PureField."""
from __future__ import annotations
import json
import math
from pathlib import Path
from typing import Iterable, Mapping
def _shape_numel(shape: list[int]) -> int:
total = 1
for value in shape:
total *= int(value)
return total
def compute_memory_efficiency(rows: Iterable[Mapping], dtype_bytes: int = 4) -> dict:
data = sorted((dict(row) for row in rows), key=lambda row: int(row["context_tokens"]))
if not data:
raise ValueError("expected at least one context smoke row")
shapes = [list(row.get("memory_shape", [])) for row in data]
constant_memory = all(shape == shapes[0] for shape in shapes) and all(
bool(row.get("logits_finite", False)) for row in data
)
state_bytes = _shape_numel(shapes[0]) * int(dtype_bytes)
min_context = int(data[0]["context_tokens"])
max_context = int(data[-1]["context_tokens"])
local_window_max = max(int(row.get("local_window_tokens", 0)) for row in data)
return {
"claim": "TinyMind PureField recurrent memory state remains constant over measured context lengths.",
"world_best_claim": False,
"constant_memory": constant_memory,
"contexts": [int(row["context_tokens"]) for row in data],
"min_context_tokens": min_context,
"max_context_tokens": max_context,
"memory_shape": shapes[0],
"state_bytes": state_bytes,
"dtype_bytes": int(dtype_bytes),
"local_window_max": local_window_max,
"state_bytes_per_token_at_min_context": state_bytes / max(min_context, 1),
"state_bytes_per_token_at_max_context": state_bytes / max(max_context, 1),
"compression_ratio_vs_full_kv_proxy": max_context / max(local_window_max, 1),
"finite_all": all(bool(row.get("logits_finite", False)) for row in data),
}
def _markdown(report: Mapping) -> str:
lines = [
"# TinyMind Long-Memory Evidence",
"",
f"- Claim: {report['claim']}",
"- World-best claim: not asserted",
f"- Constant recurrent memory: {report['constant_memory']}",
f"- Measured contexts: {report['contexts']}",
f"- Max context tokens: {report['max_context_tokens']}",
f"- Memory shape: {report['memory_shape']}",
f"- State bytes: {report['state_bytes']}",
f"- State bytes/token at max context: {report['state_bytes_per_token_at_max_context']:.6f}",
f"- Local window max: {report['local_window_max']}",
f"- Full-KV proxy compression ratio: {report['compression_ratio_vs_full_kv_proxy']:.2f}x",
"",
"This report proves a measured local property only. External rank-1 comparison is still required before any best-in-world claim.",
"",
]
return "\n".join(lines)
def write_long_memory_report(
rows: Iterable[Mapping],
json_path: str | Path,
dtype_bytes: int = 4,
) -> dict:
out = Path(json_path)
out.parent.mkdir(parents=True, exist_ok=True)
report = compute_memory_efficiency(rows, dtype_bytes=dtype_bytes)
out.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
markdown_path = out.with_suffix(".md")
markdown_path.write_text(_markdown(report), encoding="utf-8")
return {"json_path": str(out), "markdown_path": str(markdown_path), **report}

Xet Storage Details

Size:
3.39 kB
·
Xet hash:
e54a6973cf534cc740da1cb895a01dbdedf8c04e5a44962584cf60c678b3ceeb

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.