bbkdevops's picture
download
raw
6.02 kB
"""Sandbox Tool Core gate and evidence report."""
from __future__ import annotations
from datetime import datetime, timezone
import json
from pathlib import Path
import tempfile
from model.sandbox_tool_core import SandboxToolCore, SandboxToolPolicy
from model.sandbox_sdk_profile import build_sdk_inventory
def build_sandbox_tool_core_eval(out_dir: str | Path) -> dict:
out = Path(out_dir)
out.mkdir(parents=True, exist_ok=True)
sandbox_root = Path(tempfile.mkdtemp(prefix="tinymind_sandbox_tool_core_", dir=out))
core = SandboxToolCore(
sandbox_root,
policy=SandboxToolPolicy(max_write_bytes=16_384, max_files_per_project=16, cmd_timeout_s=3.0),
)
sdk_inventory = build_sdk_inventory(out / "sdk_inventory")
calls = [
core.call("core.manifest"),
core.call("lua.eval", {"code": "local x = 7\nlocal y = x * 6\nreturn y"}),
core.call("fs.write", {"path": "notes/pure.txt", "content": "sandbox evidence\n"}),
core.call("fs.read", {"path": "notes/pure.txt"}),
core.call("project.create", {"name": "demo", "files": {"README.md": "# Demo\n", "src/main.py": "print('ok')\n"}}),
core.call("cmd.run", {"argv": ["echo", "tool-core"]}),
core.call("sandbox.env.create", {"name": "build-a"}),
core.call("sandbox.env.file_put", {"name": "build-a", "path": "input.txt", "content": "artifact\n"}),
core.call("sandbox.env.file_get", {"name": "build-a", "path": "input.txt"}),
core.call("sandbox.env.run", {"name": "build-a", "argv": ["echo", "env-ok"]}),
core.call("sandbox.env.run_detached", {"name": "build-a", "argv": ["echo", "stream-ok"]}),
core.call("sandbox.env.snapshot", {"name": "build-a", "snapshot": "clean-a"}),
core.call("sandbox.env.fork", {"source": "build-a", "child": "build-b"}),
core.call("sandbox.env.stop", {"name": "build-a"}),
core.call("sandbox.env.dashboard"),
core.call("sandbox.env.resources"),
core.call("fs.write", {"path": "../escape.txt", "content": "bad"}),
core.call("cmd.run", {"argv": ["powershell", "-NoProfile", "-Command", "Write-Output bad"]}),
core.call("unknown.tool", {}),
]
ledger_lines = core.ledger_path.read_text(encoding="utf-8").splitlines()
ledger_records = [json.loads(line) for line in ledger_lines]
checks = {
"manifest_exposes_policy": calls[0]["ok"] is True and "policy" in calls[0].get("result", {}),
"lua_subset_executes": calls[1]["ok"] is True and calls[1].get("result") == 42,
"file_roundtrip_inside_root": calls[2]["ok"] is True and calls[3]["ok"] is True and calls[3].get("result") == "sandbox evidence\n",
"project_scaffold_inside_root": calls[4]["ok"] is True and (sandbox_root / "demo" / "src" / "main.py").exists(),
"allowlisted_cmd_runs": calls[5]["ok"] is True and "tool-core" in calls[5].get("stdout", ""),
"isolated_env_created": calls[6]["ok"] is True,
"file_api_roundtrip": calls[7]["ok"] is True and calls[8]["ok"] is True and calls[8].get("result", {}).get("content") == "artifact\n",
"isolated_env_runs_command": calls[9]["ok"] is True,
"detached_command_started": calls[10]["ok"] is True and "stdout_path" in calls[10].get("result", {}),
"isolated_env_snapshot_saved": calls[11]["ok"] is True,
"isolated_env_forked": calls[12]["ok"] is True,
"sandbox_stop_auto_snapshot": calls[13]["ok"] is True
and (calls[13].get("result", {}).get("auto_snapshot") or {}).get("ok") is True,
"dashboard_present": calls[14]["ok"] is True and "usage" in calls[14].get("result", {}),
"resource_accounting_present": calls[15]["ok"] is True
and "sandbox_provisioned_memory_mb" in calls[15].get("result", {}),
"path_escape_rejected": calls[16]["ok"] is False and calls[16].get("error") == "path_escape",
"non_allowlisted_cmd_rejected": calls[17]["ok"] is False and calls[17].get("error") == "command_not_allowlisted",
"unknown_tool_rejected": calls[18]["ok"] is False and calls[18].get("error") == "unknown_tool",
"ledger_hashes_present": all(row.get("input_sha256") and row.get("output_sha256") for row in ledger_records),
"ledger_count_matches_calls": len(ledger_records) == len(calls),
}
passed = all(checks.values())
report = {
"schema_version": "tinymind-sandbox-tool-core-eval-v1",
"created_at": datetime.now(timezone.utc).isoformat(),
"sandbox_root": str(sandbox_root),
"ledger_path": str(core.ledger_path),
"tool_manifest": core.manifest(),
"sdk_inventory": sdk_inventory,
"checks": checks,
"calls": calls,
"claim_gate": {
"sandbox_tool_core_ready": passed,
"host_unrestricted_execution_claim_allowed": False,
"reason": "The core is intentionally policy-gated and audited; unrestricted host execution is not a valid safety or reliability claim.",
},
}
json_path = out / "sandbox_tool_core_eval_report.json"
md_path = out / "sandbox_tool_core_eval_report.md"
report["json_path"] = str(json_path)
report["markdown_path"] = str(md_path)
json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2, sort_keys=True), encoding="utf-8")
md_path.write_text(_markdown(report), encoding="utf-8")
return report
def _markdown(report: dict) -> str:
lines = [
"# TinyMind Sandbox Tool Core Eval",
"",
f"- Sandbox tool core ready: {report['claim_gate']['sandbox_tool_core_ready']}",
f"- Ledger: {report['ledger_path']}",
f"- SDK inventory: {report['sdk_inventory']['json_path']}",
f"- Host unrestricted execution claim allowed: {report['claim_gate']['host_unrestricted_execution_claim_allowed']}",
"",
"## Checks",
"",
]
for name, passed in report["checks"].items():
lines.append(f"- {name}: {passed}")
return "\n".join(lines) + "\n"

Xet Storage Details

Size:
6.02 kB
·
Xet hash:
4959d15b7b8f5002a6b7cfa528e233a663a63af538e2865614ef746fab40f31c

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.