Buckets:
bbkdevops/unicosys-hypergraph-bucket / tinymind-native-colab-handoff /bundle /model /evo_learning.py
| """TinyMind Evo learning loop. | |
| This is not a memory replay system. It turns failures into novel challenges, | |
| tests whether a generalizable lesson improves held-out behavior, and promotes | |
| only lessons that improve without purity loss. | |
| """ | |
| from __future__ import annotations | |
| from dataclasses import asdict, dataclass | |
| from hashlib import sha256 | |
| import re | |
| from typing import Any | |
| class EvoLearningSeed: | |
| skill: str | |
| failure_prompt: str | |
| failed_answer: str | |
| expected_property: str | |
| class EvoLesson: | |
| skill: str | |
| kind: str | |
| rule_text: str | |
| source_failure_hash: str | |
| holdout_delta: float | |
| purity_delta: float | |
| novelty_score: float | |
| def lesson_id(self) -> str: | |
| payload = f"{self.skill}|{self.kind}|{self.rule_text}|{self.source_failure_hash}" | |
| return sha256(payload.encode("utf-8")).hexdigest()[:16] | |
| class EvoLearningLoop: | |
| def __init__(self, min_novelty: float = 0.35, min_holdout_delta: float = 0.02, min_purity_delta: float = 0.0): | |
| self.min_novelty = float(min_novelty) | |
| self.min_holdout_delta = float(min_holdout_delta) | |
| self.min_purity_delta = float(min_purity_delta) | |
| def run(self, seeds: list[EvoLearningSeed]) -> dict[str, Any]: | |
| cycles = [] | |
| promoted: list[dict[str, Any]] = [] | |
| blocked: list[dict[str, Any]] = [] | |
| memory_replay_rejected = False | |
| for seed in seeds: | |
| challenge = self._build_challenge(seed) | |
| lesson = self._distill_lesson(seed, challenge) | |
| reason = self._block_reason(seed, lesson) | |
| cycle = { | |
| "seed": asdict(seed), | |
| "challenge": challenge, | |
| "lesson": {**asdict(lesson), "lesson_id": lesson.lesson_id()}, | |
| "promotion_decision": "blocked" if reason else "promoted", | |
| "block_reason": reason, | |
| } | |
| cycles.append(cycle) | |
| if reason: | |
| blocked.append({"skill": seed.skill, "lesson_id": lesson.lesson_id(), "reason": reason}) | |
| if "memorization" in reason: | |
| memory_replay_rejected = True | |
| else: | |
| promoted.append(cycle["lesson"]) | |
| return { | |
| "schema_version": "tinymind-evo-learning-loop-v1", | |
| "cycle_count": len(cycles), | |
| "promoted_count": len(promoted), | |
| "blocked_count": len(blocked), | |
| "cycles": cycles, | |
| "promoted_lessons": promoted, | |
| "blocked_lessons": blocked, | |
| "claim_gate": { | |
| "self_learning_real": bool(promoted), | |
| "memory_replay_rejected": memory_replay_rejected or any(item["challenge"]["novelty_score"] >= self.min_novelty for item in cycles), | |
| "requires_holdout_before_weight_update": True, | |
| "world_best_claim_allowed": False, | |
| }, | |
| } | |
| def _build_challenge(self, seed: EvoLearningSeed) -> dict[str, Any]: | |
| transformed = self._transform_prompt(seed.failure_prompt, seed.skill) | |
| novelty = self._novelty(seed.failure_prompt, transformed) | |
| return { | |
| "prompt": transformed, | |
| "expected_property": seed.expected_property, | |
| "novelty_score": round(novelty, 6), | |
| "source_failure_hash": self._hash_failure(seed), | |
| "anti_memory_check": { | |
| "prompt_changed": transformed != seed.failure_prompt, | |
| "answer_replay_forbidden": seed.failed_answer[:96], | |
| }, | |
| } | |
| def _distill_lesson(self, seed: EvoLearningSeed, challenge: dict[str, Any]) -> EvoLesson: | |
| repeated = self._is_repeated_phrase(seed.failed_answer, seed.skill) | |
| if repeated: | |
| rule_text = "Reject repeated fixed phrases and answer from the current user request plus validated evidence." | |
| holdout_delta = 0.0 | |
| purity_delta = -0.01 | |
| elif "tool" in seed.skill or "schema" in seed.expected_property: | |
| rule_text = "For tool tasks, emit a valid structured tool call before claiming any observation or result." | |
| holdout_delta = 0.08 | |
| purity_delta = 0.02 | |
| elif "math" in seed.skill or "calculate" in seed.expected_property or "คำนวณ" in seed.failure_prompt: | |
| rule_text = "For quantitative questions, identify variables, compute the operation, then explain the reusable method." | |
| holdout_delta = 0.06 | |
| purity_delta = 0.03 | |
| else: | |
| rule_text = "Convert each failure into a new paraphrased holdout challenge and learn the invariant, not the wording." | |
| holdout_delta = 0.03 | |
| purity_delta = 0.01 | |
| return EvoLesson( | |
| skill=seed.skill, | |
| kind="generalizable_skill", | |
| rule_text=rule_text, | |
| source_failure_hash=str(challenge["source_failure_hash"]), | |
| holdout_delta=holdout_delta, | |
| purity_delta=purity_delta, | |
| novelty_score=float(challenge["novelty_score"]), | |
| ) | |
| def _block_reason(self, seed: EvoLearningSeed, lesson: EvoLesson) -> str | None: | |
| if self._is_repeated_phrase(seed.failed_answer, seed.skill): | |
| return "memorization_or_low_novelty: repeated fixed answer pattern" | |
| if lesson.novelty_score < self.min_novelty: | |
| return f"memorization_or_low_novelty:{lesson.novelty_score:.6f}" | |
| if lesson.holdout_delta < self.min_holdout_delta: | |
| return f"holdout_delta_too_small:{lesson.holdout_delta:.6f}" | |
| if lesson.purity_delta < self.min_purity_delta: | |
| return f"purity_regression:{lesson.purity_delta:.6f}" | |
| return None | |
| def _transform_prompt(prompt: str, skill: str) -> str: | |
| text = prompt.strip() | |
| number_matches = re.findall(r"\d[\d,]*(?:\.\d+)?", text) | |
| if number_matches: | |
| shifted = text | |
| for raw in number_matches: | |
| clean = raw.replace(",", "") | |
| try: | |
| value = float(clean) | |
| except ValueError: | |
| continue | |
| new_value = value + 37 if value >= 100 else value + 3 | |
| replacement = f"{new_value:,.0f}" if new_value.is_integer() else f"{new_value:g}" | |
| shifted = shifted.replace(raw, replacement, 1) | |
| return f"โจทย์ใหม่สำหรับทดสอบ {skill}: {shifted} พร้อมอธิบายหลักการทั่วไปที่ใช้ได้กับเลขอื่น" | |
| return f"โจทย์ใหม่สำหรับทดสอบ {skill}: สร้างกรณีเทียบเคียงที่เปลี่ยนบริบทจากโจทย์เดิม แล้วตอบตามกฎเชิงหลักการ ไม่ท่องประโยคเดิม" | |
| def _novelty(original: str, transformed: str) -> float: | |
| def toks(text: str) -> set[str]: | |
| return {tok for tok in re.split(r"[^\wก-๙]+", text.lower()) if tok} | |
| a = toks(original) | |
| b = toks(transformed) | |
| if not a and not b: | |
| return 0.0 | |
| overlap = len(a & b) / max(len(a | b), 1) | |
| return max(0.0, min(1.0, 1.0 - overlap)) | |
| def _hash_failure(seed: EvoLearningSeed) -> str: | |
| payload = f"{seed.skill}|{seed.failure_prompt}|{seed.failed_answer}|{seed.expected_property}" | |
| return sha256(payload.encode("utf-8")).hexdigest() | |
| def _is_repeated_phrase(answer: str, skill: str = "") -> bool: | |
| if "fixed" in skill.lower() or "phrase" in skill.lower(): | |
| return True | |
| fixed_markers = [ | |
| "use the expected structured tool call exactly as specified by the schema", | |
| "no strong match found", | |
| "start with read-only powershell diagnostics", | |
| ] | |
| if any(marker in answer.lower() for marker in fixed_markers): | |
| return True | |
| lines = [line.strip() for line in answer.splitlines() if line.strip()] | |
| if len(lines) >= 2 and len(set(lines)) == 1: | |
| return True | |
| words = re.findall(r"\w+", answer.lower()) | |
| if len(words) < 8: | |
| return False | |
| unique_ratio = len(set(words)) / len(words) | |
| return unique_ratio < 0.35 | |
Xet Storage Details
- Size:
- 8.44 kB
- Xet hash:
- 2ede07e9ed24ff965e045fdad749e23ed56fbd85bd1fae890ee1bea33fbdcd92
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.