Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-8b-remote-handoff /bundle /data /native_transfer_curriculum.py
| from __future__ import annotations | |
| from datetime import datetime, timezone | |
| import json | |
| from pathlib import Path | |
| from typing import Any | |
| STAGES = [ | |
| { | |
| "id": "stem_perception", | |
| "layer_span": "embedding+cnn_stem", | |
| "instruction": "Identify what changed from raw_response to repaired_response. Focus on visible token/schema differences.", | |
| "loss_weight": 1.2, | |
| }, | |
| { | |
| "id": "mid_reasoning", | |
| "layer_span": "purefield_mid_layers", | |
| "instruction": "Explain the missing invariant and why the repaired response is more correct.", | |
| "loss_weight": 1.6, | |
| }, | |
| { | |
| "id": "tool_schema", | |
| "layer_span": "tool_and_format_lanes", | |
| "instruction": "Emit the repaired response exactly. Preserve JSON, code, math, and evidence constraints.", | |
| "loss_weight": 2.3, | |
| }, | |
| { | |
| "id": "self_assessment", | |
| "layer_span": "self_assessment_core", | |
| "instruction": "State the verification rule that would catch the raw failure before final output.", | |
| "loss_weight": 1.8, | |
| }, | |
| ] | |
| def _read(path: str | Path) -> dict[str, Any]: | |
| return json.loads(Path(path).read_text(encoding="utf-8")) | |
| def _samples(report: dict[str, Any]) -> list[dict[str, Any]]: | |
| for result in report.get("results", []): | |
| if result.get("model_id") == "TinyMind-12B-LoRA" or result.get("source") == "local_tinymind_adapter": | |
| return [dict(row) for row in result.get("samples", [])] | |
| results = report.get("results") or [] | |
| return [dict(row) for row in (results[0].get("samples", []) if results else [])] | |
| def _event_samples(report: dict[str, Any]) -> list[dict[str, Any]]: | |
| return [ | |
| row | |
| for row in _samples(report) | |
| if row.get("constraint_events") and row.get("raw_response") and row.get("response") | |
| ] | |
| def _assistant_for_stage(stage: dict[str, Any], sample: dict[str, Any]) -> str: | |
| raw = str(sample["raw_response"]).strip() | |
| repaired = str(sample["response"]).strip() | |
| events = ", ".join(str(event) for event in sample.get("constraint_events", [])) | |
| stage_id = stage["id"] | |
| if stage_id == "stem_perception": | |
| return f"raw_response differs from repaired_response because runtime event(s) `{events}` corrected missing or invalid surface structure." | |
| if stage_id == "mid_reasoning": | |
| return f"The raw response is not sufficient because `{events}` indicates a broken invariant. The repaired response satisfies the task constraints." | |
| if stage_id == "tool_schema": | |
| return repaired | |
| if stage_id == "self_assessment": | |
| return f"Before final output, verify the response against `{events}`; reject or repair it if the check fails." | |
| return repaired or raw | |
| def _row(sample: dict[str, Any], stage: dict[str, Any], variant: int) -> dict[str, Any]: | |
| axis = str(sample.get("axis") or "unknown") | |
| raw = str(sample.get("raw_response") or "") | |
| repaired = str(sample.get("response") or "") | |
| events = [str(event) for event in sample.get("constraint_events", [])] | |
| user = ( | |
| f"{stage['instruction']}\n" | |
| f"axis: {axis}\n" | |
| f"constraint_events: {events}\n" | |
| f"raw_response:\n{raw}\n\n" | |
| f"repaired_response:\n{repaired}" | |
| ) | |
| return { | |
| "source": "tinymind_native_transfer_from_runtime_events", | |
| "category": axis, | |
| "messages": [ | |
| { | |
| "role": "system", | |
| "content": ( | |
| "You are TinyMind-native DeepWeave learning to internalize runtime repairs. " | |
| "Do not claim external superiority; learn the invariant and produce the repaired behavior." | |
| ), | |
| }, | |
| {"role": "user", "content": user}, | |
| {"role": "assistant", "content": _assistant_for_stage(stage, sample)}, | |
| ], | |
| "metadata": { | |
| "teacher_system": "mistral_lora_runtime_repair", | |
| "axis": axis, | |
| "variant": variant, | |
| "native_stage": stage["id"], | |
| "layer_span": stage["layer_span"], | |
| "constraint_events": events, | |
| "loss_weight": float(stage["loss_weight"]), | |
| "quality_tags": ["native_transfer", "runtime_event_distill", "deepweave_t0_candidate"], | |
| }, | |
| } | |
| def build_native_transfer_curriculum( | |
| out_dir: str | Path, | |
| *, | |
| probe_report: str | Path, | |
| variants_per_event: int = 4, | |
| ) -> dict[str, Any]: | |
| if variants_per_event <= 0: | |
| raise ValueError("variants_per_event must be positive") | |
| out = Path(out_dir) | |
| out.mkdir(parents=True, exist_ok=True) | |
| report = _read(probe_report) | |
| events = _event_samples(report) | |
| rows: list[dict[str, Any]] = [] | |
| for sample in events: | |
| for stage in STAGES: | |
| for variant in range(variants_per_event): | |
| rows.append(_row(sample, stage, variant)) | |
| sft_path = out / "native_transfer_curriculum.jsonl" | |
| with sft_path.open("w", encoding="utf-8", newline="\n") as handle: | |
| for row in rows: | |
| handle.write(json.dumps(row, ensure_ascii=False, sort_keys=True) + "\n") | |
| result = { | |
| "schema": "tinymind.native_transfer_curriculum.v1", | |
| "created_at": datetime.now(timezone.utc).isoformat(), | |
| "probe_report": str(probe_report), | |
| "sft_path": str(sft_path), | |
| "summary": { | |
| "event_sample_count": len(events), | |
| "stage_count": len(STAGES), | |
| "variants_per_event": variants_per_event, | |
| "sft_rows": len(rows), | |
| "axes": sorted({str(sample.get("axis")) for sample in events}), | |
| }, | |
| "claim_gate": { | |
| "raw_native_claim_allowed": False, | |
| "world_best_claim_allowed": False, | |
| "reason": "This curriculum transfers repaired behavior into native training; it is not model performance evidence.", | |
| }, | |
| } | |
| json_path = out / "native_transfer_curriculum_report.json" | |
| result["json_path"] = str(json_path) | |
| json_path.write_text(json.dumps(result, ensure_ascii=False, indent=2, sort_keys=True) + "\n", encoding="utf-8") | |
| return result | |
Xet Storage Details
- Size:
- 6.11 kB
- Xet hash:
- cb1338769eb14003ff113d911ed94549f8643cad652e4d490fab9e609e2c4564
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.