Buckets:

linvest21
/

shft-artifacts

Files

xet

linvest21/shft-artifacts / code /self_healing_finetuning /eval /baseline.py

linvest21

4 days ago

download

raw

3.01 kB

	from __future__ import annotations

	from pathlib import Path

	from eval.certification import DEFAULT_BASELINE
	from n21.config import load_structured
	from n21.config import write_json
	from n21.settings import SHFT_WORKSPACE_ROOT
	from observability.audit_log import utc_now


	def record_baseline(
	run_dir: Path,
	*,
	run_id: str,
	model_id: str,
	env: str,
	task: str,
	) -> dict[str, object]:
	suite_validation_path = SHFT_WORKSPACE_ROOT / "registry" / "eval_suites" / "linvest21_frozen_eval_v0_validation.json"
	if suite_validation_path.exists():
	suite = load_structured(suite_validation_path)
	eval_suite = {
	"name": suite["eval_suite_id"],
	"version": suite["version"],
	"hash": suite["sha256"],
	"sample_count": suite["sample_count"],
	"frozen": suite["status"] == "frozen",
	"task_counts": suite["task_counts"],
	}
	scoring_mode = "dry_run_fixture_scores_on_frozen_suite_structure"
	else:
	eval_suite = {
	"name": "shft_dry_run_baseline_suite",
	"version": "0.1.0",
	"hash": "dry_run_fixture_v0_1_0",
	"sample_count": 100,
	"frozen": True,
	}
	scoring_mode = "dry_run_fixture"
	report = {
	"run_id": run_id,
	"status": "baseline_recorded",
	"model_id": model_id,
	"env": env,
	"task": task,
	"baseline_role": "first_linvest21_fingpt_bootstrap",
	"scores": DEFAULT_BASELINE,
	"eval_suite": eval_suite,
	"scoring": {
	"mode": scoring_mode,
	"training_loss_used_as_evidence": False,
	"notes": [
	"This is the baseline readiness artifact for the bootstrapped Linvest21 FinGPT model.",
	"Live baseline scoring must replace these fixture scores with actual model outputs before production fine-tuning decisions.",
	],
	},
	"created_at": utc_now(),
	}
	write_json(run_dir / "eval" / "baseline_report.json", report)
	write_json(SHFT_WORKSPACE_ROOT / "registry" / "models" / "baseline_manifest.json", report)
	md = [
	"# Baseline Evaluation Report",
	"",
	f"Run: `{run_id}`",
	f"Model: `{model_id}`",
	f"Status: `{report['status']}`",
	f"Task: `{task}`",
	f"Environment: `{env}`",
	"",
	"## Scores",
	"",
	]
	md.extend(f"- {key}: {value}" for key, value in DEFAULT_BASELINE.items())
	md.extend(
	[
	"",
	"## Notes",
	"",
	"- This dry-run baseline is recorded before any Linvest21 fine-tuning iteration.",
	"- Live baseline evaluation must use frozen prompts and actual model outputs.",
	]
	)
	(run_dir / "eval" / "baseline_report.md").parent.mkdir(parents=True, exist_ok=True)
	(run_dir / "eval" / "baseline_report.md").write_text("\n".join(md) + "\n", encoding="utf-8")
	return report

Xet Storage Details

Size:: 3.01 kB
Xet hash:: e08f1df88f5c7fed1cbc249aa64a93a5c4c95c939231da68005ffb0583c17de4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.