Buckets:

bbkdevops
/

unicosys-hypergraph-bucket

Files

xet

bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /gpu_runtime_governor.py

bbkdevops

about 1 month ago

download

raw

6.88 kB

	"""RTX 3090 runtime governor for TinyMind 12B-class experiments.

	This module does not promise impossible perpetual full-load perfection. It
	turns the hardware boundary into explicit gates: use compressed 12B-class
	PureField/ReGenesis, keep full dense Adam blocked, reserve VRAM headroom, and
	surface telemetry needed for long-running watchdogs.
	"""

	from __future__ import annotations

	from datetime import datetime, timezone
	import csv
	import io
	import json
	from pathlib import Path
	import subprocess

	from evaluation.model_sizing import build_12b_preflight


	def query_nvidia_smi() -> dict:
	command = [
	"nvidia-smi",
	"--query-gpu=name,memory.total,memory.free,power.limit,temperature.gpu",
	"--format=csv,noheader,nounits",
	]
	try:
	proc = subprocess.run(command, capture_output=True, text=True, timeout=10, check=False)
	except (OSError, subprocess.SubprocessError) as exc:
	return {"available": False, "error": str(exc), "command": command}
	if proc.returncode != 0:
	return {"available": False, "error": proc.stderr.strip(), "command": command}

	rows = list(csv.reader(io.StringIO(proc.stdout.strip())))
	if not rows:
	return {"available": False, "error": "nvidia-smi returned no rows", "command": command}
	name, mem_total, mem_free, power_limit, temp = [cell.strip() for cell in rows[0][:5]]
	return {
	"available": True,
	"name": name,
	"memory_total_mb": float(mem_total),
	"memory_free_mb": float(mem_free),
	"power_limit_w": float(power_limit),
	"temperature_c": float(temp),
	"command": command,
	}


	def _risk_level(gpu: dict, required_vram_gb: float, max_temp_c: float) -> str:
	if not gpu.get("available"):
	return "unknown"
	free_gb = float(gpu.get("memory_free_mb", 0.0)) / 1024.0
	temp = float(gpu.get("temperature_c", 999.0))
	if free_gb < required_vram_gb or temp >= max_temp_c:
	return "block"
	if free_gb < required_vram_gb + 2.0 or temp >= max_temp_c - 8.0:
	return "throttle"
	return "run"


	def build_gpu_runtime_governor(
	out_dir: str \| Path,
	preflight_path: str \| Path \| None = None,
	max_temp_c: float = 82.0,
	min_free_vram_gb: float \| None = None,
	telemetry: dict \| None = None,
	) -> dict:
	out = Path(out_dir)
	out.mkdir(parents=True, exist_ok=True)

	if preflight_path and Path(preflight_path).exists():
	preflight = json.loads(Path(preflight_path).read_text(encoding="utf-8"))
	else:
	preflight = build_12b_preflight(out / "preflight")

	gpu = telemetry or query_nvidia_smi()
	int4_vram_gb = float(preflight["purefield_vram"]["int4_raw_weights_gb"])
	bf16_vram_gb = float(preflight["purefield_vram"]["bf16_weights_gb"])
	required_vram_gb = float(min_free_vram_gb) if min_free_vram_gb is not None else max(6.0, int4_vram_gb + 4.0)
	risk = _risk_level(gpu, required_vram_gb, max_temp_c)

	run_allowed = risk in {"run", "throttle"} and preflight["purefield_vram"]["rtx_3090_24gb_int4_or_adapter_feasible"]
	report = {
	"schema_version": "tinymind-rtx3090-runtime-governor-v1",
	"created_at": datetime.now(timezone.utc).isoformat(),
	"hardware_target": "RTX 3090 24GB",
	"model_target": "TinyMind PureField/ReGenesis 12B-class compressed runtime",
	"gpu": gpu,
	"preflight": {
	"path": preflight.get("report_path"),
	"dense_class_params": preflight["dense_class_params"],
	"purefield_estimated_params": preflight["purefield_estimated_params"],
	"bf16_weights_gb": bf16_vram_gb,
	"int4_raw_weights_gb": int4_vram_gb,
	},
	"runtime_profile": {
	"precision": "int4_4x8_pairwise_sparse",
	"training_mode": "adapter_or_bitsharp_delta_only",
	"full_dense_adam_training": "blocked",
	"microbatch": 1,
	"gradient_checkpointing": True,
	"cpu_nvme_offload": True,
	"max_persistent_tokens": 10_000_000,
	"exact_memory": "Evidence Ledger + ReGenesis-KV; no full 10M KV cache growth.",
	},
	"watchdog": {
	"max_temp_c": max_temp_c,
	"min_free_vram_gb": required_vram_gb,
	"poll_seconds": 20,
	"actions": [
	"pause_or_checkpoint_on_throttle",
	"stop_before_oom_when_free_vram_below_floor",
	"write_resume_state_every_eval_interval",
	"rerun_quality_gate_after_quantized_export",
	],
	},
	"decision": {
	"risk_level": risk,
	"run_12b_compressed_on_3090_allowed": run_allowed,
	"dense_12b_full_train_on_3090_allowed": False,
	"reason": (
	"12B-class dense Adam is blocked; compressed INT4/adapter/offload path is allowed "
	"only while telemetry stays within watchdog limits."
	),
	},
	"claim_gate": {
	"no_bottleneck_forever_claim_allowed": False,
	"stable_long_run_claim_requires_soak_test_hours": 24,
	"world_best_performance_claim_allowed": False,
	},
	}

	json_path = out / "rtx3090_runtime_governor_report.json"
	md_path = out / "rtx3090_runtime_governor_report.md"
	report["json_path"] = str(json_path)
	report["markdown_path"] = str(md_path)
	json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
	md_path.write_text(_markdown(report), encoding="utf-8")
	return report


	def _markdown(report: dict) -> str:
	gpu = report["gpu"]
	pre = report["preflight"]
	decision = report["decision"]
	lines = [
	"# TinyMind RTX 3090 Runtime Governor",
	"",
	f"- GPU: {gpu.get('name', 'unavailable')}",
	f"- Free VRAM GB: {float(gpu.get('memory_free_mb', 0.0)) / 1024.0:.2f}" if gpu.get("available") else "- Free VRAM GB: unknown",
	f"- Temperature C: {gpu.get('temperature_c', 'unknown')}",
	f"- Dense class params: {pre['dense_class_params']:,}",
	f"- Compressed PureField estimate: {pre['purefield_estimated_params']:,}",
	f"- INT4 raw weights GB: {pre['int4_raw_weights_gb']:.4f}",
	f"- Risk level: {decision['risk_level']}",
	f"- Run compressed 12B on 3090 allowed: {decision['run_12b_compressed_on_3090_allowed']}",
	f"- Dense 12B full train allowed: {decision['dense_12b_full_train_on_3090_allowed']}",
	"- Forever no-bottleneck claim allowed: false",
	"",
	"## Runtime Profile",
	"",
	]
	for key, value in report["runtime_profile"].items():
	lines.append(f"- {key}: {value}")
	lines.extend(["", "## Watchdog", ""])
	for key, value in report["watchdog"].items():
	lines.append(f"- {key}: {value}")
	return "\n".join(lines) + "\n"

Xet Storage Details

Size:: 6.88 kB
Xet hash:: 666dee97d067738ced93e6d29c4ca9559f94fe0be88f3452e38a0994fb7927fd

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.