Spaces:
Sleeping
Sleeping
| """ | |
| 对模型输出做轻量静态校验(不依赖外部库)。 | |
| 目标: | |
| - 提前发现“缺少 mGDL / 缺少核心模块 / 输出里存在占位符”等高频失败 | |
| - 为“最小修改修复”提供结构化的 FAIL 列表 | |
| """ | |
| import re | |
| from typing import Dict, List, Optional, Tuple | |
| _FENCE_RE = re.compile(r"```(?:[a-zA-Z0-9_-]+)?\n(.*?)```", re.DOTALL) | |
| _CJK_GT = r"[>>]" | |
| def _extract_mgdl_block(text: str) -> str: | |
| """ | |
| 尝试从回答中提取最像 mGDL 的代码块(优先含 (game ... ))。 | |
| """ | |
| if not text: | |
| return "" | |
| blocks = _FENCE_RE.findall(text) | |
| if not blocks: | |
| # 没有代码块时,退化为全文搜索 | |
| return text if "(game " in text else "" | |
| best = "" | |
| for b in blocks: | |
| b_strip = (b or "").strip() | |
| if "(game " in b_strip: | |
| return b_strip | |
| if b_strip.startswith("(game ") or b_strip.startswith("(define_game"): | |
| best = b_strip | |
| return best | |
| def _has_any(text: str, needles: List[str]) -> bool: | |
| t = text or "" | |
| return any(n in t for n in needles) | |
| def _re_search(pattern: str, text: str) -> bool: | |
| return bool(re.search(pattern, text or "", flags=re.DOTALL)) | |
| def validate_mahjong_response(text: str) -> List[Dict[str, str]]: | |
| issues: List[Dict[str, str]] = [] | |
| if not text or not text.strip(): | |
| return [{"code": "EMPTY", "level": "error", "message": "模型返回空内容"}] | |
| # 1) 占位符检查(m_prompt 强制禁止) | |
| if any(token in text for token in ["<PID>", "<NextPID>", "<Variant_Name>", "<custom>"]): | |
| issues.append({ | |
| "code": "PLACEHOLDER", | |
| "level": "error", | |
| "message": "输出包含占位符(如 <PID>/<Variant_Name>),需展开为实际值(A1/A2/A3/A4 等)。" | |
| }) | |
| mgdl = _extract_mgdl_block(text) | |
| if not mgdl: | |
| issues.append({ | |
| "code": "NO_MGDL", | |
| "level": "error", | |
| "message": "未检测到 mGDL 代码块或 (game ...) 根节点。" | |
| }) | |
| return issues | |
| # 0.5) 思维日志(设计日志)检查:生成模式下应包含“设计日志(创新推演摘要)” | |
| # Phase-1 聚焦“融合决策可审核”,避免停留在文本拼接。 | |
| if "设计日志(创新推演摘要)" not in text: | |
| issues.append({ | |
| "code": "NO_DESIGN_LOG", | |
| "level": "warning", | |
| "message": "未检测到“设计日志(创新推演摘要)”段落;Phase-1 建议补齐融合清单/冲突桥接/推演摘要/落地映射。" | |
| }) | |
| # 0.6) 底层物理守恒表达检查(当前阶段重点) | |
| # 目标:强制模型在自然语言规则里显式给出“动作-手牌变化-轮次影响表”和“最小回合推演” | |
| if "动作—手牌变化—轮次影响表" not in text and "动作-手牌变化-轮次影响表" not in text: | |
| issues.append({ | |
| "code": "NO_HAND_DELTA_TABLE", | |
| "level": "warning", | |
| "message": "未检测到《动作—手牌变化—轮次影响表》;该表用于避免“出牌后手牌不变”等守恒错误,建议补齐。" | |
| }) | |
| if "最小回合推演" not in text: | |
| issues.append({ | |
| "code": "NO_MIN_SIMULATION", | |
| "level": "warning", | |
| "message": "未检测到“最小回合推演”(普通/碰/杠三段);建议补齐以验证手牌守恒与轮次控制。" | |
| }) | |
| # 0.7) “硬真理”与机制说明对齐检查(以自然语言显式声明为主) | |
| # 说明:这里做的是“声明存在性”的静态校验(不是逻辑证明),用于减少模型忘写/乱写导致的回归。 | |
| if not _re_search(r"(起手|初始).*13\s*张", text): | |
| issues.append({ | |
| "code": "NO_START_HAND_13", | |
| "level": "warning", | |
| "message": "未显式声明“标准起手 13 张”(麻将机制说明的基础逻辑);建议在基础规则中补一句。" | |
| }) | |
| if not _re_search(r"(摸|抓).*(14\s*张)", text): | |
| issues.append({ | |
| "code": "NO_DRAW_TO_14", | |
| "level": "warning", | |
| "message": "未显式声明“摸牌后手牌为 14 张”(基础逻辑);建议补充以便审计守恒。" | |
| }) | |
| if not _re_search(r"(打|弃|出).*(回到|恢复|为).*(13\s*张)", text): | |
| issues.append({ | |
| "code": "NO_DISCARD_BACK_13", | |
| "level": "warning", | |
| "message": "未显式声明“打牌后手牌回到 13 张”(基础逻辑);建议补充以便审计守恒。" | |
| }) | |
| # 胡/碰/杠/吃优先级(允许不同符号表达) | |
| if not _re_search(rf"胡.*{_CJK_GT}.*碰.*{_CJK_GT}.*杠.*{_CJK_GT}.*吃", text): | |
| issues.append({ | |
| "code": "NO_PRIORITY_ORDER", | |
| "level": "warning", | |
| "message": "未显式声明“胡>碰>杠>吃”的响应优先级(基础逻辑);建议补齐以避免争议场景。" | |
| }) | |
| # 吃的限制 | |
| if not _has_any(text, ["仅能吃上家", "只能吃上家", "只可吃上家"]): | |
| issues.append({ | |
| "code": "NO_CHI_UPWIND_ONLY", | |
| "level": "warning", | |
| "message": "未显式声明“吃仅能吃上家牌”(基础逻辑);建议补齐。" | |
| }) | |
| # 行牌顺序与出牌权归属(声明存在性) | |
| if not _has_any(text, ["庄家-下家-对家-上家", "庄家→下家→对家→上家", "庄家 → 下家 → 对家 → 上家"]): | |
| issues.append({ | |
| "code": "NO_TURN_ORDER_BASE", | |
| "level": "warning", | |
| "message": "未显式声明“庄家-下家-对家-上家”的行牌顺序(基础逻辑);建议补齐。" | |
| }) | |
| if not _has_any(text, ["由碰牌的玩家继续出牌", "由吃/碰者继续出牌", "碰后由碰者出牌"]): | |
| issues.append({ | |
| "code": "NO_POST_PENG_RIGHTS", | |
| "level": "warning", | |
| "message": "未显式声明“碰/吃后由碰/吃者继续出牌”的出牌权规则(基础逻辑);建议补齐。" | |
| }) | |
| if not _has_any(text, ["由杠牌的玩家摸牌后继续出牌", "杠后由杠者补牌后继续出牌", "杠后由杠者继续出牌"]): | |
| issues.append({ | |
| "code": "NO_POST_KONG_RIGHTS", | |
| "level": "warning", | |
| "message": "未显式声明“杠后由杠者补牌/摸牌后继续出牌”的出牌权规则(基础逻辑);建议补齐。" | |
| }) | |
| # 自摸/点炮触发方式 | |
| if not (_has_any(text, ["自摸"]) and _has_any(text, ["点炮"])): | |
| issues.append({ | |
| "code": "NO_ZIMO_DIANPAO", | |
| "level": "warning", | |
| "message": "未同时出现“自摸/点炮”两种胡牌触发方式(基础逻辑);建议补齐。" | |
| }) | |
| # 若引入改变摸打节奏的机制,建议额外最小推演(Prompt 已要求) | |
| special_rhythm_terms = ["连续摸", "摸三打三", "海底漫游", "海捞阶段", "海捞区"] | |
| if _has_any(text, special_rhythm_terms): | |
| if not _re_search(r"(最小回合推演).*(" + "|".join(map(re.escape, special_rhythm_terms)) + ")", text): | |
| issues.append({ | |
| "code": "NO_SPECIAL_MIN_SIM", | |
| "level": "warning", | |
| "message": "检测到改变摸打节奏的机制(如 摸三打三/连续摸/海捞),但未看到对应机制的额外“最小回合推演”;建议补齐以验证守恒。" | |
| }) | |
| # 2) 核心模块检查(按 m_prompt 的“零容忍项”) | |
| required_markers = [ | |
| "(game_variant", | |
| "(players", | |
| "(tileset", | |
| "(extensions", | |
| "(seats", | |
| "(turn_order", | |
| "(setup", | |
| "(actions", | |
| "(win_rules", | |
| "(scoring", | |
| "(fan_table", | |
| "(settlement", | |
| "(invariants", | |
| ] | |
| missing = [m for m in required_markers if m not in mgdl] | |
| if missing: | |
| issues.append({ | |
| "code": "MISSING_MODULES", | |
| "level": "error", | |
| "message": "mGDL 缺少核心模块: {0}".format(", ".join(missing)) | |
| }) | |
| # 3) special_mechanics 注册点(你们工程关键) | |
| if "(special_mechanics" not in mgdl and "extensions.special_mechanics" in text: | |
| issues.append({ | |
| "code": "SPECIAL_MECH_MISMATCH", | |
| "level": "warning", | |
| "message": "自然语言提到 special_mechanics,但 mGDL 中未出现 (special_mechanics ...) 或 extensions.special_mechanics 结构。" | |
| }) | |
| # 4) 简单守恒提示:total 字段存在性(无法保证正确,但能抓掉一批缺失) | |
| if "(total" not in mgdl: | |
| issues.append({ | |
| "code": "NO_TILE_TOTAL", | |
| "level": "warning", | |
| "message": "tileset 中未检测到 (total N),容易导致牌数不自洽。" | |
| }) | |
| # 5) invariants 强制项(Prompt 硬性要求) | |
| if "tile_conservation" not in mgdl: | |
| issues.append({ | |
| "code": "NO_TILE_CONSERVATION", | |
| "level": "warning", | |
| "message": "(invariants ...) 中未检测到 tile_conservation;建议补齐以显式声明牌数守恒。" | |
| }) | |
| if "hand_size_stable" not in mgdl: | |
| issues.append({ | |
| "code": "NO_HAND_SIZE_STABLE", | |
| "level": "warning", | |
| "message": "(invariants ...) 中未检测到 hand_size_stable(或等价声明);建议补齐以约束回合结束手牌稳定值。" | |
| }) | |
| return issues | |
| def format_issues_for_llm(issues: List[Dict[str, str]]) -> str: | |
| if not issues: | |
| return "" | |
| lines = [] | |
| for idx, it in enumerate(issues, start=1): | |
| lines.append("{0}. [{1}] {2}".format(idx, it.get("code"), it.get("message"))) | |
| return "\n".join(lines).strip() | |