[ { "test_id": "AgentTool_1442", "prompt": "Use AgentTool to do something", "expected_tool": "AgentTool", "expected_params": {}, "tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.", "difficulty": "hard" }, { "test_id": "AgentTool_9133", "prompt": "Call AgentTool", "expected_tool": "AgentTool", "expected_params": {}, "tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.", "difficulty": "easy" }, { "test_id": "AgentTool_5334", "prompt": "Use AgentTool to do something", "expected_tool": "AgentTool", "expected_params": {}, "tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.", "difficulty": "easy" }, { "test_id": "AgentTool_7066", "prompt": "Call AgentTool", "expected_tool": "AgentTool", "expected_params": {}, "tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.", "difficulty": "hard" }, { "test_id": "AgentTool_5142", "prompt": "Call AgentTool", "expected_tool": "AgentTool", "expected_params": {}, "tool_description": "Format one agent line for the agent_listing_delta attachment message:\n`- type: whenToUse (Tools: ...)`.", "difficulty": "hard" }, { "test_id": "AskUserQuestionTool_8221", "prompt": "Execute AskUserQuestionTool", "expected_tool": "AskUserQuestionTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "AskUserQuestionTool_8297", "prompt": "Call AskUserQuestionTool", "expected_tool": "AskUserQuestionTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "AskUserQuestionTool_1059", "prompt": "Call AskUserQuestionTool", "expected_tool": "AskUserQuestionTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "AskUserQuestionTool_7626", "prompt": "Call AskUserQuestionTool", "expected_tool": "AskUserQuestionTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "AskUserQuestionTool_5238", "prompt": "Call AskUserQuestionTool", "expected_tool": "AskUserQuestionTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "BashTool_5334", "prompt": "Please run npm test", "expected_tool": "BashTool", "expected_params": { "command": "npm test" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "BashTool_8558", "prompt": "Run: git status", "expected_tool": "BashTool", "expected_params": { "command": "git status" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "BashTool_8252", "prompt": "Execute npm test", "expected_tool": "BashTool", "expected_params": { "command": "npm test" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "BashTool_4920", "prompt": "Run: make build", "expected_tool": "BashTool", "expected_params": { "command": "make build" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "BashTool_6768", "prompt": "Please run ls -la", "expected_tool": "BashTool", "expected_params": { "command": "ls -la" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "BriefTool_3514", "prompt": "Execute BriefTool", "expected_tool": "BriefTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "BriefTool_2493", "prompt": "Call BriefTool", "expected_tool": "BriefTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "BriefTool_3819", "prompt": "Use BriefTool to do something", "expected_tool": "BriefTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "BriefTool_8934", "prompt": "Call BriefTool", "expected_tool": "BriefTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "BriefTool_1272", "prompt": "Use BriefTool to do something", "expected_tool": "BriefTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ConfigTool_6666", "prompt": "Execute ConfigTool", "expected_tool": "ConfigTool", "expected_params": {}, "tool_description": "Generate the prompt documentation from the registry", "difficulty": "hard" }, { "test_id": "ConfigTool_6890", "prompt": "Use ConfigTool to do something", "expected_tool": "ConfigTool", "expected_params": {}, "tool_description": "Generate the prompt documentation from the registry", "difficulty": "hard" }, { "test_id": "ConfigTool_7721", "prompt": "Call ConfigTool", "expected_tool": "ConfigTool", "expected_params": {}, "tool_description": "Generate the prompt documentation from the registry", "difficulty": "medium" }, { "test_id": "ConfigTool_4292", "prompt": "Call ConfigTool", "expected_tool": "ConfigTool", "expected_params": {}, "tool_description": "Generate the prompt documentation from the registry", "difficulty": "medium" }, { "test_id": "ConfigTool_1324", "prompt": "Call ConfigTool", "expected_tool": "ConfigTool", "expected_params": {}, "tool_description": "Generate the prompt documentation from the registry", "difficulty": "medium" }, { "test_id": "EnterPlanModeTool_8599", "prompt": "Call EnterPlanModeTool", "expected_tool": "EnterPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "EnterPlanModeTool_3574", "prompt": "Call EnterPlanModeTool", "expected_tool": "EnterPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "EnterPlanModeTool_9203", "prompt": "Execute EnterPlanModeTool", "expected_tool": "EnterPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "EnterPlanModeTool_9218", "prompt": "Call EnterPlanModeTool", "expected_tool": "EnterPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "EnterPlanModeTool_2127", "prompt": "Call EnterPlanModeTool", "expected_tool": "EnterPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "EnterWorktreeTool_7189", "prompt": "Call EnterWorktreeTool", "expected_tool": "EnterWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "EnterWorktreeTool_8708", "prompt": "Use EnterWorktreeTool to do something", "expected_tool": "EnterWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "EnterWorktreeTool_4825", "prompt": "Execute EnterWorktreeTool", "expected_tool": "EnterWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "EnterWorktreeTool_6763", "prompt": "Use EnterWorktreeTool to do something", "expected_tool": "EnterWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "EnterWorktreeTool_6925", "prompt": "Execute EnterWorktreeTool", "expected_tool": "EnterWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ExitPlanModeTool_7793", "prompt": "Use ExitPlanModeTool to do something", "expected_tool": "ExitPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ExitPlanModeTool_2469", "prompt": "Use ExitPlanModeTool to do something", "expected_tool": "ExitPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ExitPlanModeTool_8270", "prompt": "Call ExitPlanModeTool", "expected_tool": "ExitPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ExitPlanModeTool_7710", "prompt": "Call ExitPlanModeTool", "expected_tool": "ExitPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ExitPlanModeTool_8976", "prompt": "Call ExitPlanModeTool", "expected_tool": "ExitPlanModeTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ExitWorktreeTool_1725", "prompt": "Use ExitWorktreeTool to do something", "expected_tool": "ExitWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ExitWorktreeTool_2783", "prompt": "Use ExitWorktreeTool to do something", "expected_tool": "ExitWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ExitWorktreeTool_5194", "prompt": "Use ExitWorktreeTool to do something", "expected_tool": "ExitWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ExitWorktreeTool_7674", "prompt": "Execute ExitWorktreeTool", "expected_tool": "ExitWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ExitWorktreeTool_1423", "prompt": "Call ExitWorktreeTool", "expected_tool": "ExitWorktreeTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "FileEditTool_3126", "prompt": "Execute FileEditTool", "expected_tool": "FileEditTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "FileEditTool_7681", "prompt": "Use FileEditTool to do something", "expected_tool": "FileEditTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileEditTool_2291", "prompt": "Call FileEditTool", "expected_tool": "FileEditTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileEditTool_6300", "prompt": "Execute FileEditTool", "expected_tool": "FileEditTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileEditTool_9155", "prompt": "Execute FileEditTool", "expected_tool": "FileEditTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileReadTool_5478", "prompt": "Read README.md", "expected_tool": "FileReadTool", "expected_params": { "file_path": "README.md" }, "tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.", "difficulty": "easy" }, { "test_id": "FileReadTool_2066", "prompt": "Show me the contents of src/main.py", "expected_tool": "FileReadTool", "expected_params": { "file_path": "src/main.py" }, "tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.", "difficulty": "hard" }, { "test_id": "FileReadTool_2018", "prompt": "Show me the contents of config.yaml", "expected_tool": "FileReadTool", "expected_params": { "file_path": "config.yaml" }, "tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.", "difficulty": "easy" }, { "test_id": "FileReadTool_7683", "prompt": "Show me the contents of tests/test_api.py", "expected_tool": "FileReadTool", "expected_params": { "file_path": "tests/test_api.py" }, "tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.", "difficulty": "hard" }, { "test_id": "FileReadTool_4435", "prompt": "Show me the contents of README.md", "expected_tool": "FileReadTool", "expected_params": { "file_path": "README.md" }, "tool_description": "Renders the Read tool prompt template. The caller (FileReadTool) supplies\nthe runtime-computed parts.", "difficulty": "hard" }, { "test_id": "FileWriteTool_3729", "prompt": "Create a new file README.md with content: console.log('test');", "expected_tool": "FileWriteTool", "expected_params": { "file_path": "README.md", "content": "console.log('test');" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "FileWriteTool_2575", "prompt": "Create a new file config.yaml with content: console.log('test');", "expected_tool": "FileWriteTool", "expected_params": { "file_path": "config.yaml", "content": "console.log('test');" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileWriteTool_7630", "prompt": "Write this to src/main.py: console.log('test');", "expected_tool": "FileWriteTool", "expected_params": { "file_path": "src/main.py", "content": "console.log('test');" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileWriteTool_6387", "prompt": "Save the following as README.md: console.log('test');", "expected_tool": "FileWriteTool", "expected_params": { "file_path": "README.md", "content": "console.log('test');" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "FileWriteTool_1230", "prompt": "Save the following as src/index.js: console.log('test');", "expected_tool": "FileWriteTool", "expected_params": { "file_path": "src/index.js", "content": "console.log('test');" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "GlobTool_9441", "prompt": "Find all **/*.py files", "expected_tool": "GlobTool", "expected_params": { "pattern": "**/*.py" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "GlobTool_6788", "prompt": "List files matching **/*.test.*", "expected_tool": "GlobTool", "expected_params": { "pattern": "**/*.test.*" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "GlobTool_5774", "prompt": "Find all **/*.md files", "expected_tool": "GlobTool", "expected_params": { "pattern": "**/*.md" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "GlobTool_8080", "prompt": "Search for files like src/**/*.ts", "expected_tool": "GlobTool", "expected_params": { "pattern": "src/**/*.ts" }, "tool_description": "", "difficulty": "hard" }, { "test_id": "GlobTool_8749", "prompt": "Search for files like **/*.py", "expected_tool": "GlobTool", "expected_params": { "pattern": "**/*.py" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "GrepTool_5985", "prompt": "Search for **/*.js in lib", "expected_tool": "GrepTool", "expected_params": { "pattern": "**/*.js", "directory": "lib" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "GrepTool_8524", "prompt": "Locate **/*.test.* in the codebase", "expected_tool": "GrepTool", "expected_params": { "pattern": "**/*.test.*" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "GrepTool_1452", "prompt": "Locate src/**/*.ts in the codebase", "expected_tool": "GrepTool", "expected_params": { "pattern": "src/**/*.ts" }, "tool_description": "", "difficulty": "medium" }, { "test_id": "GrepTool_5666", "prompt": "Search for **/*.md in tests", "expected_tool": "GrepTool", "expected_params": { "pattern": "**/*.md", "directory": "tests" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "GrepTool_4387", "prompt": "Find all lib/**/*.py", "expected_tool": "GrepTool", "expected_params": { "pattern": "lib/**/*.py" }, "tool_description": "", "difficulty": "easy" }, { "test_id": "LSPTool_6162", "prompt": "Execute LSPTool", "expected_tool": "LSPTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "LSPTool_4317", "prompt": "Execute LSPTool", "expected_tool": "LSPTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "LSPTool_6968", "prompt": "Call LSPTool", "expected_tool": "LSPTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "LSPTool_3243", "prompt": "Call LSPTool", "expected_tool": "LSPTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "LSPTool_8575", "prompt": "Call LSPTool", "expected_tool": "LSPTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ListMcpResourcesTool_7113", "prompt": "Call ListMcpResourcesTool", "expected_tool": "ListMcpResourcesTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ListMcpResourcesTool_4269", "prompt": "Execute ListMcpResourcesTool", "expected_tool": "ListMcpResourcesTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ListMcpResourcesTool_9727", "prompt": "Execute ListMcpResourcesTool", "expected_tool": "ListMcpResourcesTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ListMcpResourcesTool_3347", "prompt": "Execute ListMcpResourcesTool", "expected_tool": "ListMcpResourcesTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ListMcpResourcesTool_7536", "prompt": "Call ListMcpResourcesTool", "expected_tool": "ListMcpResourcesTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "MCPTool_6306", "prompt": "Call MCPTool", "expected_tool": "MCPTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "MCPTool_2877", "prompt": "Use MCPTool to do something", "expected_tool": "MCPTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "MCPTool_1314", "prompt": "Execute MCPTool", "expected_tool": "MCPTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "MCPTool_3246", "prompt": "Call MCPTool", "expected_tool": "MCPTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "MCPTool_1594", "prompt": "Call MCPTool", "expected_tool": "MCPTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "NotebookEditTool_3700", "prompt": "Call NotebookEditTool", "expected_tool": "NotebookEditTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "NotebookEditTool_5392", "prompt": "Execute NotebookEditTool", "expected_tool": "NotebookEditTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "NotebookEditTool_4026", "prompt": "Execute NotebookEditTool", "expected_tool": "NotebookEditTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "NotebookEditTool_7197", "prompt": "Execute NotebookEditTool", "expected_tool": "NotebookEditTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "NotebookEditTool_8062", "prompt": "Use NotebookEditTool to do something", "expected_tool": "NotebookEditTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "PowerShellTool_3052", "prompt": "Call PowerShellTool", "expected_tool": "PowerShellTool", "expected_params": {}, "tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.", "difficulty": "hard" }, { "test_id": "PowerShellTool_1988", "prompt": "Use PowerShellTool to do something", "expected_tool": "PowerShellTool", "expected_params": {}, "tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.", "difficulty": "hard" }, { "test_id": "PowerShellTool_5390", "prompt": "Call PowerShellTool", "expected_tool": "PowerShellTool", "expected_params": {}, "tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.", "difficulty": "easy" }, { "test_id": "PowerShellTool_5212", "prompt": "Execute PowerShellTool", "expected_tool": "PowerShellTool", "expected_params": {}, "tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.", "difficulty": "easy" }, { "test_id": "PowerShellTool_7271", "prompt": "Use PowerShellTool to do something", "expected_tool": "PowerShellTool", "expected_params": {}, "tool_description": "Version-specific syntax guidance. The model's training data covers both\neditions but it can't tell which one it's targeting, so it either emits\npwsh-7 syntax on 5.1 (parser error \u2192 exit 1) or needlessly avoids && on 7.", "difficulty": "hard" }, { "test_id": "ReadMcpResourceTool_2705", "prompt": "Execute ReadMcpResourceTool", "expected_tool": "ReadMcpResourceTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ReadMcpResourceTool_2891", "prompt": "Call ReadMcpResourceTool", "expected_tool": "ReadMcpResourceTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "ReadMcpResourceTool_7780", "prompt": "Execute ReadMcpResourceTool", "expected_tool": "ReadMcpResourceTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ReadMcpResourceTool_2602", "prompt": "Use ReadMcpResourceTool to do something", "expected_tool": "ReadMcpResourceTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ReadMcpResourceTool_3579", "prompt": "Use ReadMcpResourceTool to do something", "expected_tool": "ReadMcpResourceTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "RemoteTriggerTool_1621", "prompt": "Call RemoteTriggerTool", "expected_tool": "RemoteTriggerTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "RemoteTriggerTool_1093", "prompt": "Call RemoteTriggerTool", "expected_tool": "RemoteTriggerTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "RemoteTriggerTool_2536", "prompt": "Execute RemoteTriggerTool", "expected_tool": "RemoteTriggerTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "RemoteTriggerTool_5464", "prompt": "Use RemoteTriggerTool to do something", "expected_tool": "RemoteTriggerTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "RemoteTriggerTool_6075", "prompt": "Use RemoteTriggerTool to do something", "expected_tool": "RemoteTriggerTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "ScheduleCronTool_4909", "prompt": "Execute ScheduleCronTool", "expected_tool": "ScheduleCronTool", "expected_params": {}, "tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul", "difficulty": "hard" }, { "test_id": "ScheduleCronTool_6081", "prompt": "Use ScheduleCronTool to do something", "expected_tool": "ScheduleCronTool", "expected_params": {}, "tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul", "difficulty": "medium" }, { "test_id": "ScheduleCronTool_6686", "prompt": "Call ScheduleCronTool", "expected_tool": "ScheduleCronTool", "expected_params": {}, "tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul", "difficulty": "medium" }, { "test_id": "ScheduleCronTool_5952", "prompt": "Execute ScheduleCronTool", "expected_tool": "ScheduleCronTool", "expected_params": {}, "tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul", "difficulty": "easy" }, { "test_id": "ScheduleCronTool_3141", "prompt": "Execute ScheduleCronTool", "expected_tool": "ScheduleCronTool", "expected_params": {}, "tool_description": "Unified gate for the cron scheduling system. Combines the build-time\n`feature('AGENT_TRIGGERS')` flag (dead code elimination) with the runtime\n`tengu_kairos_cron` GrowthBook gate on a 5-minute refresh window.\n\nAGENT_TRIGGERS is independently shippable from KAIROS \u2014 the cron module\ngraph (cronSchedul", "difficulty": "easy" }, { "test_id": "SendMessageTool_7741", "prompt": "Call SendMessageTool", "expected_tool": "SendMessageTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "SendMessageTool_4050", "prompt": "Execute SendMessageTool", "expected_tool": "SendMessageTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "SendMessageTool_5206", "prompt": "Execute SendMessageTool", "expected_tool": "SendMessageTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "SendMessageTool_9082", "prompt": "Execute SendMessageTool", "expected_tool": "SendMessageTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SendMessageTool_5311", "prompt": "Call SendMessageTool", "expected_tool": "SendMessageTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SkillTool_7072", "prompt": "Call SkillTool", "expected_tool": "SkillTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SkillTool_7411", "prompt": "Use SkillTool to do something", "expected_tool": "SkillTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SkillTool_9504", "prompt": "Execute SkillTool", "expected_tool": "SkillTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "SkillTool_9091", "prompt": "Execute SkillTool", "expected_tool": "SkillTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SkillTool_8646", "prompt": "Use SkillTool to do something", "expected_tool": "SkillTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "SleepTool_2251", "prompt": "Execute SleepTool", "expected_tool": "SleepTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SleepTool_6839", "prompt": "Use SleepTool to do something", "expected_tool": "SleepTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "SleepTool_6919", "prompt": "Use SleepTool to do something", "expected_tool": "SleepTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "SleepTool_5972", "prompt": "Execute SleepTool", "expected_tool": "SleepTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "SleepTool_3416", "prompt": "Execute SleepTool", "expected_tool": "SleepTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskCreateTool_6455", "prompt": "Call TaskCreateTool", "expected_tool": "TaskCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskCreateTool_1883", "prompt": "Use TaskCreateTool to do something", "expected_tool": "TaskCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskCreateTool_4722", "prompt": "Call TaskCreateTool", "expected_tool": "TaskCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskCreateTool_5107", "prompt": "Use TaskCreateTool to do something", "expected_tool": "TaskCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskCreateTool_9309", "prompt": "Call TaskCreateTool", "expected_tool": "TaskCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskGetTool_2106", "prompt": "Call TaskGetTool", "expected_tool": "TaskGetTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskGetTool_7353", "prompt": "Execute TaskGetTool", "expected_tool": "TaskGetTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskGetTool_5818", "prompt": "Execute TaskGetTool", "expected_tool": "TaskGetTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskGetTool_8987", "prompt": "Call TaskGetTool", "expected_tool": "TaskGetTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskGetTool_1243", "prompt": "Call TaskGetTool", "expected_tool": "TaskGetTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskListTool_2131", "prompt": "Use TaskListTool to do something", "expected_tool": "TaskListTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskListTool_6609", "prompt": "Call TaskListTool", "expected_tool": "TaskListTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskListTool_8918", "prompt": "Use TaskListTool to do something", "expected_tool": "TaskListTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskListTool_3153", "prompt": "Execute TaskListTool", "expected_tool": "TaskListTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskListTool_9305", "prompt": "Use TaskListTool to do something", "expected_tool": "TaskListTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskOutputTool_9136", "prompt": "Use TaskOutputTool to do something", "expected_tool": "TaskOutputTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskOutputTool_6266", "prompt": "Use TaskOutputTool to do something", "expected_tool": "TaskOutputTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskOutputTool_1758", "prompt": "Call TaskOutputTool", "expected_tool": "TaskOutputTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskOutputTool_5708", "prompt": "Call TaskOutputTool", "expected_tool": "TaskOutputTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskOutputTool_6261", "prompt": "Execute TaskOutputTool", "expected_tool": "TaskOutputTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskStopTool_1356", "prompt": "Call TaskStopTool", "expected_tool": "TaskStopTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskStopTool_9732", "prompt": "Execute TaskStopTool", "expected_tool": "TaskStopTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskStopTool_5560", "prompt": "Call TaskStopTool", "expected_tool": "TaskStopTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskStopTool_1508", "prompt": "Call TaskStopTool", "expected_tool": "TaskStopTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TaskStopTool_1536", "prompt": "Use TaskStopTool to do something", "expected_tool": "TaskStopTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskUpdateTool_4080", "prompt": "Execute TaskUpdateTool", "expected_tool": "TaskUpdateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskUpdateTool_8394", "prompt": "Execute TaskUpdateTool", "expected_tool": "TaskUpdateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskUpdateTool_6087", "prompt": "Call TaskUpdateTool", "expected_tool": "TaskUpdateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TaskUpdateTool_9395", "prompt": "Use TaskUpdateTool to do something", "expected_tool": "TaskUpdateTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TaskUpdateTool_5167", "prompt": "Call TaskUpdateTool", "expected_tool": "TaskUpdateTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TeamCreateTool_9102", "prompt": "Use TeamCreateTool to do something", "expected_tool": "TeamCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TeamCreateTool_9269", "prompt": "Call TeamCreateTool", "expected_tool": "TeamCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TeamCreateTool_8424", "prompt": "Use TeamCreateTool to do something", "expected_tool": "TeamCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TeamCreateTool_8193", "prompt": "Use TeamCreateTool to do something", "expected_tool": "TeamCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TeamCreateTool_5576", "prompt": "Call TeamCreateTool", "expected_tool": "TeamCreateTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TeamDeleteTool_2955", "prompt": "Execute TeamDeleteTool", "expected_tool": "TeamDeleteTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TeamDeleteTool_6029", "prompt": "Use TeamDeleteTool to do something", "expected_tool": "TeamDeleteTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TeamDeleteTool_6039", "prompt": "Execute TeamDeleteTool", "expected_tool": "TeamDeleteTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TeamDeleteTool_4346", "prompt": "Use TeamDeleteTool to do something", "expected_tool": "TeamDeleteTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TeamDeleteTool_7920", "prompt": "Call TeamDeleteTool", "expected_tool": "TeamDeleteTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "TodoWriteTool_8435", "prompt": "Use TodoWriteTool to do something", "expected_tool": "TodoWriteTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TodoWriteTool_4402", "prompt": "Use TodoWriteTool to do something", "expected_tool": "TodoWriteTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TodoWriteTool_7554", "prompt": "Execute TodoWriteTool", "expected_tool": "TodoWriteTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "TodoWriteTool_3137", "prompt": "Use TodoWriteTool to do something", "expected_tool": "TodoWriteTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "TodoWriteTool_5772", "prompt": "Call TodoWriteTool", "expected_tool": "TodoWriteTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "ToolSearchTool_4685", "prompt": "Call ToolSearchTool", "expected_tool": "ToolSearchTool", "expected_params": {}, "tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi", "difficulty": "easy" }, { "test_id": "ToolSearchTool_8253", "prompt": "Use ToolSearchTool to do something", "expected_tool": "ToolSearchTool", "expected_params": {}, "tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi", "difficulty": "medium" }, { "test_id": "ToolSearchTool_2353", "prompt": "Call ToolSearchTool", "expected_tool": "ToolSearchTool", "expected_params": {}, "tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi", "difficulty": "easy" }, { "test_id": "ToolSearchTool_5736", "prompt": "Execute ToolSearchTool", "expected_tool": "ToolSearchTool", "expected_params": {}, "tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi", "difficulty": "easy" }, { "test_id": "ToolSearchTool_8159", "prompt": "Call ToolSearchTool", "expected_tool": "ToolSearchTool", "expected_params": {}, "tool_description": "Check if a tool should be deferred (requires ToolSearch to load).\nA tool is deferred if:\n- It's an MCP tool (always deferred - workflow-specific)\n- It has shouldDefer: true\n\nA tool is NEVER deferred if it has alwaysLoad: true (MCP tools set this via\n_meta['anthropic/alwaysLoad']). This check runs fi", "difficulty": "hard" }, { "test_id": "WebFetchTool_8507", "prompt": "Execute WebFetchTool", "expected_tool": "WebFetchTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "WebFetchTool_2518", "prompt": "Use WebFetchTool to do something", "expected_tool": "WebFetchTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "WebFetchTool_7285", "prompt": "Use WebFetchTool to do something", "expected_tool": "WebFetchTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "WebFetchTool_4143", "prompt": "Execute WebFetchTool", "expected_tool": "WebFetchTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "WebFetchTool_2209", "prompt": "Call WebFetchTool", "expected_tool": "WebFetchTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "WebSearchTool_5308", "prompt": "Use WebSearchTool to do something", "expected_tool": "WebSearchTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "WebSearchTool_7978", "prompt": "Use WebSearchTool to do something", "expected_tool": "WebSearchTool", "expected_params": {}, "tool_description": "", "difficulty": "easy" }, { "test_id": "WebSearchTool_4077", "prompt": "Use WebSearchTool to do something", "expected_tool": "WebSearchTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" }, { "test_id": "WebSearchTool_8521", "prompt": "Use WebSearchTool to do something", "expected_tool": "WebSearchTool", "expected_params": {}, "tool_description": "", "difficulty": "medium" }, { "test_id": "WebSearchTool_5236", "prompt": "Execute WebSearchTool", "expected_tool": "WebSearchTool", "expected_params": {}, "tool_description": "", "difficulty": "hard" } ]