bbkdevops's picture
download
raw
6.88 kB
"""RTX 3090 runtime governor for TinyMind 12B-class experiments.
This module does not promise impossible perpetual full-load perfection. It
turns the hardware boundary into explicit gates: use compressed 12B-class
PureField/ReGenesis, keep full dense Adam blocked, reserve VRAM headroom, and
surface telemetry needed for long-running watchdogs.
"""
from __future__ import annotations
from datetime import datetime, timezone
import csv
import io
import json
from pathlib import Path
import subprocess
from evaluation.model_sizing import build_12b_preflight
def query_nvidia_smi() -> dict:
command = [
"nvidia-smi",
"--query-gpu=name,memory.total,memory.free,power.limit,temperature.gpu",
"--format=csv,noheader,nounits",
]
try:
proc = subprocess.run(command, capture_output=True, text=True, timeout=10, check=False)
except (OSError, subprocess.SubprocessError) as exc:
return {"available": False, "error": str(exc), "command": command}
if proc.returncode != 0:
return {"available": False, "error": proc.stderr.strip(), "command": command}
rows = list(csv.reader(io.StringIO(proc.stdout.strip())))
if not rows:
return {"available": False, "error": "nvidia-smi returned no rows", "command": command}
name, mem_total, mem_free, power_limit, temp = [cell.strip() for cell in rows[0][:5]]
return {
"available": True,
"name": name,
"memory_total_mb": float(mem_total),
"memory_free_mb": float(mem_free),
"power_limit_w": float(power_limit),
"temperature_c": float(temp),
"command": command,
}
def _risk_level(gpu: dict, required_vram_gb: float, max_temp_c: float) -> str:
if not gpu.get("available"):
return "unknown"
free_gb = float(gpu.get("memory_free_mb", 0.0)) / 1024.0
temp = float(gpu.get("temperature_c", 999.0))
if free_gb < required_vram_gb or temp >= max_temp_c:
return "block"
if free_gb < required_vram_gb + 2.0 or temp >= max_temp_c - 8.0:
return "throttle"
return "run"
def build_gpu_runtime_governor(
out_dir: str | Path,
preflight_path: str | Path | None = None,
max_temp_c: float = 82.0,
min_free_vram_gb: float | None = None,
telemetry: dict | None = None,
) -> dict:
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
if preflight_path and Path(preflight_path).exists():
preflight = json.loads(Path(preflight_path).read_text(encoding="utf-8"))
else:
preflight = build_12b_preflight(out / "preflight")
gpu = telemetry or query_nvidia_smi()
int4_vram_gb = float(preflight["purefield_vram"]["int4_raw_weights_gb"])
bf16_vram_gb = float(preflight["purefield_vram"]["bf16_weights_gb"])
required_vram_gb = float(min_free_vram_gb) if min_free_vram_gb is not None else max(6.0, int4_vram_gb + 4.0)
risk = _risk_level(gpu, required_vram_gb, max_temp_c)
run_allowed = risk in {"run", "throttle"} and preflight["purefield_vram"]["rtx_3090_24gb_int4_or_adapter_feasible"]
report = {
"schema_version": "tinymind-rtx3090-runtime-governor-v1",
"created_at": datetime.now(timezone.utc).isoformat(),
"hardware_target": "RTX 3090 24GB",
"model_target": "TinyMind PureField/ReGenesis 12B-class compressed runtime",
"gpu": gpu,
"preflight": {
"path": preflight.get("report_path"),
"dense_class_params": preflight["dense_class_params"],
"purefield_estimated_params": preflight["purefield_estimated_params"],
"bf16_weights_gb": bf16_vram_gb,
"int4_raw_weights_gb": int4_vram_gb,
},
"runtime_profile": {
"precision": "int4_4x8_pairwise_sparse",
"training_mode": "adapter_or_bitsharp_delta_only",
"full_dense_adam_training": "blocked",
"microbatch": 1,
"gradient_checkpointing": True,
"cpu_nvme_offload": True,
"max_persistent_tokens": 10_000_000,
"exact_memory": "Evidence Ledger + ReGenesis-KV; no full 10M KV cache growth.",
},
"watchdog": {
"max_temp_c": max_temp_c,
"min_free_vram_gb": required_vram_gb,
"poll_seconds": 20,
"actions": [
"pause_or_checkpoint_on_throttle",
"stop_before_oom_when_free_vram_below_floor",
"write_resume_state_every_eval_interval",
"rerun_quality_gate_after_quantized_export",
],
},
"decision": {
"risk_level": risk,
"run_12b_compressed_on_3090_allowed": run_allowed,
"dense_12b_full_train_on_3090_allowed": False,
"reason": (
"12B-class dense Adam is blocked; compressed INT4/adapter/offload path is allowed "
"only while telemetry stays within watchdog limits."
),
},
"claim_gate": {
"no_bottleneck_forever_claim_allowed": False,
"stable_long_run_claim_requires_soak_test_hours": 24,
"world_best_performance_claim_allowed": False,
},
}
json_path = out / "rtx3090_runtime_governor_report.json"
md_path = out / "rtx3090_runtime_governor_report.md"
report["json_path"] = str(json_path)
report["markdown_path"] = str(md_path)
json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
md_path.write_text(_markdown(report), encoding="utf-8")
return report
def _markdown(report: dict) -> str:
gpu = report["gpu"]
pre = report["preflight"]
decision = report["decision"]
lines = [
"# TinyMind RTX 3090 Runtime Governor",
"",
f"- GPU: {gpu.get('name', 'unavailable')}",
f"- Free VRAM GB: {float(gpu.get('memory_free_mb', 0.0)) / 1024.0:.2f}" if gpu.get("available") else "- Free VRAM GB: unknown",
f"- Temperature C: {gpu.get('temperature_c', 'unknown')}",
f"- Dense class params: {pre['dense_class_params']:,}",
f"- Compressed PureField estimate: {pre['purefield_estimated_params']:,}",
f"- INT4 raw weights GB: {pre['int4_raw_weights_gb']:.4f}",
f"- Risk level: {decision['risk_level']}",
f"- Run compressed 12B on 3090 allowed: {decision['run_12b_compressed_on_3090_allowed']}",
f"- Dense 12B full train allowed: {decision['dense_12b_full_train_on_3090_allowed']}",
"- Forever no-bottleneck claim allowed: false",
"",
"## Runtime Profile",
"",
]
for key, value in report["runtime_profile"].items():
lines.append(f"- {key}: {value}")
lines.extend(["", "## Watchdog", ""])
for key, value in report["watchdog"].items():
lines.append(f"- {key}: {value}")
return "\n".join(lines) + "\n"

Xet Storage Details

Size:
6.88 kB
·
Xet hash:
666dee97d067738ced93e6d29c4ca9559f94fe0be88f3452e38a0994fb7927fd

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.