Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /train /evo_continue.py
| from __future__ import annotations | |
| from dataclasses import dataclass | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| ROOT = Path(__file__).resolve().parents[1] | |
| class EvoContinuePlan: | |
| wait_pid: int | |
| base_adapter: Path | |
| dataset: Path | |
| output_adapter: Path | |
| max_steps: int = 400 | |
| max_seq_length: int = 2048 | |
| run_root: Path = ROOT / "reports" / "qlora_runs" | |
| python_exe: Path = ROOT / ".venv312_cuda" / "Scripts" / "python.exe" | |
| train_script: Path = ROOT / "model" / "tinymind-12b" / "train_12b_qlora.py" | |
| data_manifest: Path = ROOT / "reports" / "dataset_quality_governor" / "dataset_quality_governor_manifest.json" | |
| def _read_json(path: Path) -> dict[str, Any]: | |
| return json.loads(path.read_text(encoding="utf-8")) | |
| def _purity_summary(manifest: dict[str, Any]) -> dict[str, Any]: | |
| kept = int(manifest.get("kept_records", 0)) | |
| rejected = int(manifest.get("rejected_records", 0)) | |
| total = kept + rejected | |
| reject_counts = manifest.get("reject_counts", {}) | |
| hard_noise = sum(int(reject_counts.get(k, 0)) for k in ("secret_like_token", "encoded_blob", "repetition_loop", "symbol_noise")) | |
| return { | |
| "input_records": total, | |
| "kept_records": kept, | |
| "rejected_records": rejected, | |
| "rejection_rate": round(rejected / total, 6) if total else 0.0, | |
| "hard_noise_rejected": hard_noise, | |
| "domain_counts": manifest.get("domain_counts", {}), | |
| "reject_counts": reject_counts, | |
| } | |
| def _ps_escape(value: str | Path) -> str: | |
| return str(value).replace("'", "''") | |
| def write_evo_continue_plan(plan: EvoContinuePlan, out_dir: str | Path) -> dict[str, Any]: | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| data_manifest = _read_json(plan.data_manifest) if plan.data_manifest.exists() else {} | |
| stamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") | |
| run_dir = plan.run_root / f"tinymind_12b_evo_continue_{stamp}" | |
| log_path = run_dir / "train.log" | |
| run_manifest = run_dir / "evo_run_manifest.json" | |
| script_path = out / "run_evo_after_current.ps1" | |
| plan_path = out / "evo_continue_plan.json" | |
| script = f"""param() | |
| $ErrorActionPreference = "Stop" | |
| $waitPid = {plan.wait_pid} | |
| $baseAdapter = '{_ps_escape(plan.base_adapter)}' | |
| $dataset = '{_ps_escape(plan.dataset)}' | |
| $output = '{_ps_escape(plan.output_adapter)}' | |
| $runDir = '{_ps_escape(run_dir)}' | |
| $log = '{_ps_escape(log_path)}' | |
| $manifestPath = '{_ps_escape(run_manifest)}' | |
| New-Item -ItemType Directory -Force -Path $runDir | Out-Null | |
| $manifest = [ordered]@{{ | |
| schema_version = "tinymind-evo-continue-run-v1" | |
| created_at = (Get-Date).ToString("o") | |
| wait_pid = $waitPid | |
| base_adapter = $baseAdapter | |
| dataset = $dataset | |
| output_adapter = $output | |
| max_steps = {plan.max_steps} | |
| max_seq_length = {plan.max_seq_length} | |
| log = $log | |
| status = "waiting_for_prior_evo_stage" | |
| claim_gate = [ordered]@{{ | |
| training_started = $false | |
| training_completed = $false | |
| external_eval_completed = $false | |
| world_best_claim_allowed = $false | |
| }} | |
| }} | |
| $manifest | ConvertTo-Json -Depth 8 | Set-Content -Path $manifestPath -Encoding UTF8 | |
| while ($waitPid -gt 0 -and (Get-Process -Id $waitPid -ErrorAction SilentlyContinue)) {{ | |
| Start-Sleep -Seconds 60 | |
| }} | |
| if (-not (Test-Path -LiteralPath (Join-Path $baseAdapter "adapter_config.json"))) {{ | |
| $manifest.status = "blocked_missing_base_adapter" | |
| $manifest.finished_at = (Get-Date).ToString("o") | |
| $manifest | ConvertTo-Json -Depth 8 | Set-Content -Path $manifestPath -Encoding UTF8 | |
| throw "Base adapter is not ready: $baseAdapter" | |
| }} | |
| if (-not (Test-Path -LiteralPath $dataset)) {{ | |
| $manifest.status = "blocked_missing_dataset" | |
| $manifest.finished_at = (Get-Date).ToString("o") | |
| $manifest | ConvertTo-Json -Depth 8 | Set-Content -Path $manifestPath -Encoding UTF8 | |
| throw "Dataset is not ready: $dataset" | |
| }} | |
| $manifest.status = "training_started" | |
| $manifest.started_at = (Get-Date).ToString("o") | |
| $manifest.claim_gate.training_started = $true | |
| $manifest | ConvertTo-Json -Depth 8 | Set-Content -Path $manifestPath -Encoding UTF8 | |
| & '{_ps_escape(plan.python_exe)}' '{_ps_escape(plan.train_script)}' ` | |
| --dataset $dataset ` | |
| --resume-adapter $baseAdapter ` | |
| --output $output ` | |
| --max-steps {plan.max_steps} ` | |
| --max-seq-length {plan.max_seq_length} *> $log | |
| $exitCode = $LASTEXITCODE | |
| $manifest.exit_code = $exitCode | |
| $manifest.finished_at = (Get-Date).ToString("o") | |
| if ($exitCode -eq 0) {{ | |
| $manifest.status = "training_completed" | |
| $manifest.claim_gate.training_completed = $true | |
| }} else {{ | |
| $manifest.status = "training_failed" | |
| }} | |
| $manifest | ConvertTo-Json -Depth 8 | Set-Content -Path $manifestPath -Encoding UTF8 | |
| exit $exitCode | |
| """ | |
| script_path.write_text(script, encoding="utf-8", newline="\n") | |
| report = { | |
| "schema_version": "tinymind-evo-continue-plan-v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "script_path": str(script_path), | |
| "planned_run_dir": str(run_dir), | |
| "planned_run_manifest": str(run_manifest), | |
| "planned_log": str(log_path), | |
| "wait_pid": plan.wait_pid, | |
| "base_adapter": str(plan.base_adapter), | |
| "dataset": str(plan.dataset), | |
| "output_adapter": str(plan.output_adapter), | |
| "max_steps": plan.max_steps, | |
| "max_seq_length": plan.max_seq_length, | |
| "data_purity": _purity_summary(data_manifest), | |
| "evo_policy": { | |
| "optimize_for": [ | |
| "lower eval loss on clean governed data", | |
| "lower repetition and secret-leak risk", | |
| "continued adapter improvement without overwriting prior evidence", | |
| ], | |
| "stop_rule": "Do not claim frontier/world-best until raw external gates and saved evals pass.", | |
| "resource_rule": "Use queued 3090 QLoRA stages; do not interrupt active training.", | |
| }, | |
| "claim_gate": { | |
| "evo_training_queued": False, | |
| "external_rank1_claim_allowed": False, | |
| "reason": "This plan can continue training, but capability claims require completed train/eval and external results.", | |
| }, | |
| } | |
| plan_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8") | |
| report["plan_path"] = str(plan_path) | |
| return report | |
Xet Storage Details
- Size:
- 6.35 kB
- Xet hash:
- bcdbbaa70edb606347ced7e9de8278edf3226f5daba869d726eb3990738e0e87
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.