Buckets:
| from __future__ import annotations | |
| DEFAULT_THRESHOLDS = { | |
| "hard_gates": { | |
| "pii_blocked_findings_max": 0, | |
| "mnpi_blocked_findings_max": 0, | |
| "license_blocked_findings_max": 0, | |
| "mandatory_prompt_replay_pass_rate": 1.0, | |
| }, | |
| "quality_gates": { | |
| "aggregate_fineval_min": 0.88, | |
| "financebench_regression_abs_max": 0.02, | |
| "convfinqa_numeric_accuracy_min": 0.85, | |
| "phrasebank_macro_f1_min": 0.90, | |
| "private_prompt_replay_pass_rate_min": 0.95, | |
| }, | |
| "improvement_gates": { | |
| "require_prod_comparison": True, | |
| "aggregate_score_delta_min_abs": 0.005, | |
| "aggregate_score_delta_min_pct": 0.50, | |
| "private_replay_delta_min_abs": 0.0, | |
| "private_replay_delta_min_pct": 0.0, | |
| "no_critical_regression": True, | |
| "max_task_regression_abs": 0.02, | |
| "require_rationale": True, | |
| }, | |
| } | |
| ENV_PROFILES = { | |
| "dev": {"aggregate_fineval_min": 0.75, "allow_mock_providers": True, "require_human_approval": False}, | |
| "stage": {"aggregate_fineval_min": 0.85, "require_human_approval": False, "require_prod_comparison": True}, | |
| "prod": {"aggregate_fineval_min": 0.88, "mandatory_prompt_replay_pass_rate": 1.0, "require_human_approval": True}, | |
| } | |
| def resolve_thresholds(env: str, task: str | None = None) -> dict[str, object]: | |
| resolved = {k: dict(v) for k, v in DEFAULT_THRESHOLDS.items()} | |
| profile = ENV_PROFILES.get(env, {}) | |
| for key, value in profile.items(): | |
| if key in resolved["quality_gates"]: | |
| resolved["quality_gates"][key] = value | |
| else: | |
| resolved.setdefault("environment", {})[key] = value | |
| if task == "quantitative_qa": | |
| resolved["quality_gates"]["convfinqa_numeric_accuracy_min"] = 0.88 | |
| if task == "sentiment": | |
| resolved["quality_gates"]["phrasebank_macro_f1_min"] = 0.90 | |
| return resolved | |
Xet Storage Details
- Size:
- 1.88 kB
- Xet hash:
- 93215e1dc946065492635299c45ed7a90ca7874f27d7eabe1e6b9187fcb3b714
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.