linvest21's picture
download
raw
3.01 kB
from __future__ import annotations
from pathlib import Path
from eval.certification import DEFAULT_BASELINE
from n21.config import load_structured
from n21.config import write_json
from n21.settings import SHFT_WORKSPACE_ROOT
from observability.audit_log import utc_now
def record_baseline(
run_dir: Path,
*,
run_id: str,
model_id: str,
env: str,
task: str,
) -> dict[str, object]:
suite_validation_path = SHFT_WORKSPACE_ROOT / "registry" / "eval_suites" / "linvest21_frozen_eval_v0_validation.json"
if suite_validation_path.exists():
suite = load_structured(suite_validation_path)
eval_suite = {
"name": suite["eval_suite_id"],
"version": suite["version"],
"hash": suite["sha256"],
"sample_count": suite["sample_count"],
"frozen": suite["status"] == "frozen",
"task_counts": suite["task_counts"],
}
scoring_mode = "dry_run_fixture_scores_on_frozen_suite_structure"
else:
eval_suite = {
"name": "shft_dry_run_baseline_suite",
"version": "0.1.0",
"hash": "dry_run_fixture_v0_1_0",
"sample_count": 100,
"frozen": True,
}
scoring_mode = "dry_run_fixture"
report = {
"run_id": run_id,
"status": "baseline_recorded",
"model_id": model_id,
"env": env,
"task": task,
"baseline_role": "first_linvest21_fingpt_bootstrap",
"scores": DEFAULT_BASELINE,
"eval_suite": eval_suite,
"scoring": {
"mode": scoring_mode,
"training_loss_used_as_evidence": False,
"notes": [
"This is the baseline readiness artifact for the bootstrapped Linvest21 FinGPT model.",
"Live baseline scoring must replace these fixture scores with actual model outputs before production fine-tuning decisions.",
],
},
"created_at": utc_now(),
}
write_json(run_dir / "eval" / "baseline_report.json", report)
write_json(SHFT_WORKSPACE_ROOT / "registry" / "models" / "baseline_manifest.json", report)
md = [
"# Baseline Evaluation Report",
"",
f"Run: `{run_id}`",
f"Model: `{model_id}`",
f"Status: `{report['status']}`",
f"Task: `{task}`",
f"Environment: `{env}`",
"",
"## Scores",
"",
]
md.extend(f"- {key}: {value}" for key, value in DEFAULT_BASELINE.items())
md.extend(
[
"",
"## Notes",
"",
"- This dry-run baseline is recorded before any Linvest21 fine-tuning iteration.",
"- Live baseline evaluation must use frozen prompts and actual model outputs.",
]
)
(run_dir / "eval" / "baseline_report.md").parent.mkdir(parents=True, exist_ok=True)
(run_dir / "eval" / "baseline_report.md").write_text("\n".join(md) + "\n", encoding="utf-8")
return report

Xet Storage Details

Size:
3.01 kB
·
Xet hash:
e08f1df88f5c7fed1cbc249aa64a93a5c4c95c939231da68005ffb0583c17de4

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.