Buckets:
| from __future__ import annotations | |
| from pathlib import Path | |
| from eval.certification import DEFAULT_BASELINE | |
| from n21.config import load_structured | |
| from n21.config import write_json | |
| from n21.settings import SHFT_WORKSPACE_ROOT | |
| from observability.audit_log import utc_now | |
| def record_baseline( | |
| run_dir: Path, | |
| *, | |
| run_id: str, | |
| model_id: str, | |
| env: str, | |
| task: str, | |
| ) -> dict[str, object]: | |
| suite_validation_path = SHFT_WORKSPACE_ROOT / "registry" / "eval_suites" / "linvest21_frozen_eval_v0_validation.json" | |
| if suite_validation_path.exists(): | |
| suite = load_structured(suite_validation_path) | |
| eval_suite = { | |
| "name": suite["eval_suite_id"], | |
| "version": suite["version"], | |
| "hash": suite["sha256"], | |
| "sample_count": suite["sample_count"], | |
| "frozen": suite["status"] == "frozen", | |
| "task_counts": suite["task_counts"], | |
| } | |
| scoring_mode = "dry_run_fixture_scores_on_frozen_suite_structure" | |
| else: | |
| eval_suite = { | |
| "name": "shft_dry_run_baseline_suite", | |
| "version": "0.1.0", | |
| "hash": "dry_run_fixture_v0_1_0", | |
| "sample_count": 100, | |
| "frozen": True, | |
| } | |
| scoring_mode = "dry_run_fixture" | |
| report = { | |
| "run_id": run_id, | |
| "status": "baseline_recorded", | |
| "model_id": model_id, | |
| "env": env, | |
| "task": task, | |
| "baseline_role": "first_linvest21_fingpt_bootstrap", | |
| "scores": DEFAULT_BASELINE, | |
| "eval_suite": eval_suite, | |
| "scoring": { | |
| "mode": scoring_mode, | |
| "training_loss_used_as_evidence": False, | |
| "notes": [ | |
| "This is the baseline readiness artifact for the bootstrapped Linvest21 FinGPT model.", | |
| "Live baseline scoring must replace these fixture scores with actual model outputs before production fine-tuning decisions.", | |
| ], | |
| }, | |
| "created_at": utc_now(), | |
| } | |
| write_json(run_dir / "eval" / "baseline_report.json", report) | |
| write_json(SHFT_WORKSPACE_ROOT / "registry" / "models" / "baseline_manifest.json", report) | |
| md = [ | |
| "# Baseline Evaluation Report", | |
| "", | |
| f"Run: `{run_id}`", | |
| f"Model: `{model_id}`", | |
| f"Status: `{report['status']}`", | |
| f"Task: `{task}`", | |
| f"Environment: `{env}`", | |
| "", | |
| "## Scores", | |
| "", | |
| ] | |
| md.extend(f"- {key}: {value}" for key, value in DEFAULT_BASELINE.items()) | |
| md.extend( | |
| [ | |
| "", | |
| "## Notes", | |
| "", | |
| "- This dry-run baseline is recorded before any Linvest21 fine-tuning iteration.", | |
| "- Live baseline evaluation must use frozen prompts and actual model outputs.", | |
| ] | |
| ) | |
| (run_dir / "eval" / "baseline_report.md").parent.mkdir(parents=True, exist_ok=True) | |
| (run_dir / "eval" / "baseline_report.md").write_text("\n".join(md) + "\n", encoding="utf-8") | |
| return report | |
Xet Storage Details
- Size:
- 3.01 kB
- Xet hash:
- e08f1df88f5c7fed1cbc249aa64a93a5c4c95c939231da68005ffb0583c17de4
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.