Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /evaluation /gpu_runtime_governor.py
| """RTX 3090 runtime governor for TinyMind 12B-class experiments. | |
| This module does not promise impossible perpetual full-load perfection. It | |
| turns the hardware boundary into explicit gates: use compressed 12B-class | |
| PureField/ReGenesis, keep full dense Adam blocked, reserve VRAM headroom, and | |
| surface telemetry needed for long-running watchdogs. | |
| """ | |
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import csv | |
| import io | |
| import json | |
| from pathlib import Path | |
| import subprocess | |
| from evaluation.model_sizing import build_12b_preflight | |
| def query_nvidia_smi() -> dict: | |
| command = [ | |
| "nvidia-smi", | |
| "--query-gpu=name,memory.total,memory.free,power.limit,temperature.gpu", | |
| "--format=csv,noheader,nounits", | |
| ] | |
| try: | |
| proc = subprocess.run(command, capture_output=True, text=True, timeout=10, check=False) | |
| except (OSError, subprocess.SubprocessError) as exc: | |
| return {"available": False, "error": str(exc), "command": command} | |
| if proc.returncode != 0: | |
| return {"available": False, "error": proc.stderr.strip(), "command": command} | |
| rows = list(csv.reader(io.StringIO(proc.stdout.strip()))) | |
| if not rows: | |
| return {"available": False, "error": "nvidia-smi returned no rows", "command": command} | |
| name, mem_total, mem_free, power_limit, temp = [cell.strip() for cell in rows[0][:5]] | |
| return { | |
| "available": True, | |
| "name": name, | |
| "memory_total_mb": float(mem_total), | |
| "memory_free_mb": float(mem_free), | |
| "power_limit_w": float(power_limit), | |
| "temperature_c": float(temp), | |
| "command": command, | |
| } | |
| def _risk_level(gpu: dict, required_vram_gb: float, max_temp_c: float) -> str: | |
| if not gpu.get("available"): | |
| return "unknown" | |
| free_gb = float(gpu.get("memory_free_mb", 0.0)) / 1024.0 | |
| temp = float(gpu.get("temperature_c", 999.0)) | |
| if free_gb < required_vram_gb or temp >= max_temp_c: | |
| return "block" | |
| if free_gb < required_vram_gb + 2.0 or temp >= max_temp_c - 8.0: | |
| return "throttle" | |
| return "run" | |
| def build_gpu_runtime_governor( | |
| out_dir: str | Path, | |
| preflight_path: str | Path | None = None, | |
| max_temp_c: float = 82.0, | |
| min_free_vram_gb: float | None = None, | |
| telemetry: dict | None = None, | |
| ) -> dict: | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| if preflight_path and Path(preflight_path).exists(): | |
| preflight = json.loads(Path(preflight_path).read_text(encoding="utf-8")) | |
| else: | |
| preflight = build_12b_preflight(out / "preflight") | |
| gpu = telemetry or query_nvidia_smi() | |
| int4_vram_gb = float(preflight["purefield_vram"]["int4_raw_weights_gb"]) | |
| bf16_vram_gb = float(preflight["purefield_vram"]["bf16_weights_gb"]) | |
| required_vram_gb = float(min_free_vram_gb) if min_free_vram_gb is not None else max(6.0, int4_vram_gb + 4.0) | |
| risk = _risk_level(gpu, required_vram_gb, max_temp_c) | |
| run_allowed = risk in {"run", "throttle"} and preflight["purefield_vram"]["rtx_3090_24gb_int4_or_adapter_feasible"] | |
| report = { | |
| "schema_version": "tinymind-rtx3090-runtime-governor-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "hardware_target": "RTX 3090 24GB", | |
| "model_target": "TinyMind PureField/ReGenesis 12B-class compressed runtime", | |
| "gpu": gpu, | |
| "preflight": { | |
| "path": preflight.get("report_path"), | |
| "dense_class_params": preflight["dense_class_params"], | |
| "purefield_estimated_params": preflight["purefield_estimated_params"], | |
| "bf16_weights_gb": bf16_vram_gb, | |
| "int4_raw_weights_gb": int4_vram_gb, | |
| }, | |
| "runtime_profile": { | |
| "precision": "int4_4x8_pairwise_sparse", | |
| "training_mode": "adapter_or_bitsharp_delta_only", | |
| "full_dense_adam_training": "blocked", | |
| "microbatch": 1, | |
| "gradient_checkpointing": True, | |
| "cpu_nvme_offload": True, | |
| "max_persistent_tokens": 10_000_000, | |
| "exact_memory": "Evidence Ledger + ReGenesis-KV; no full 10M KV cache growth.", | |
| }, | |
| "watchdog": { | |
| "max_temp_c": max_temp_c, | |
| "min_free_vram_gb": required_vram_gb, | |
| "poll_seconds": 20, | |
| "actions": [ | |
| "pause_or_checkpoint_on_throttle", | |
| "stop_before_oom_when_free_vram_below_floor", | |
| "write_resume_state_every_eval_interval", | |
| "rerun_quality_gate_after_quantized_export", | |
| ], | |
| }, | |
| "decision": { | |
| "risk_level": risk, | |
| "run_12b_compressed_on_3090_allowed": run_allowed, | |
| "dense_12b_full_train_on_3090_allowed": False, | |
| "reason": ( | |
| "12B-class dense Adam is blocked; compressed INT4/adapter/offload path is allowed " | |
| "only while telemetry stays within watchdog limits." | |
| ), | |
| }, | |
| "claim_gate": { | |
| "no_bottleneck_forever_claim_allowed": False, | |
| "stable_long_run_claim_requires_soak_test_hours": 24, | |
| "world_best_performance_claim_allowed": False, | |
| }, | |
| } | |
| json_path = out / "rtx3090_runtime_governor_report.json" | |
| md_path = out / "rtx3090_runtime_governor_report.md" | |
| report["json_path"] = str(json_path) | |
| report["markdown_path"] = str(md_path) | |
| json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| md_path.write_text(_markdown(report), encoding="utf-8") | |
| return report | |
| def _markdown(report: dict) -> str: | |
| gpu = report["gpu"] | |
| pre = report["preflight"] | |
| decision = report["decision"] | |
| lines = [ | |
| "# TinyMind RTX 3090 Runtime Governor", | |
| "", | |
| f"- GPU: {gpu.get('name', 'unavailable')}", | |
| f"- Free VRAM GB: {float(gpu.get('memory_free_mb', 0.0)) / 1024.0:.2f}" if gpu.get("available") else "- Free VRAM GB: unknown", | |
| f"- Temperature C: {gpu.get('temperature_c', 'unknown')}", | |
| f"- Dense class params: {pre['dense_class_params']:,}", | |
| f"- Compressed PureField estimate: {pre['purefield_estimated_params']:,}", | |
| f"- INT4 raw weights GB: {pre['int4_raw_weights_gb']:.4f}", | |
| f"- Risk level: {decision['risk_level']}", | |
| f"- Run compressed 12B on 3090 allowed: {decision['run_12b_compressed_on_3090_allowed']}", | |
| f"- Dense 12B full train allowed: {decision['dense_12b_full_train_on_3090_allowed']}", | |
| "- Forever no-bottleneck claim allowed: false", | |
| "", | |
| "## Runtime Profile", | |
| "", | |
| ] | |
| for key, value in report["runtime_profile"].items(): | |
| lines.append(f"- {key}: {value}") | |
| lines.extend(["", "## Watchdog", ""]) | |
| for key, value in report["watchdog"].items(): | |
| lines.append(f"- {key}: {value}") | |
| return "\n".join(lines) + "\n" | |
Xet Storage Details
- Size:
- 6.88 kB
- Xet hash:
- 666dee97d067738ced93e6d29c4ca9559f94fe0be88f3452e38a0994fb7927fd
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.