Stack-2-9-finetuned / samples /benchmarks /test_token_efficiency.py
walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
#!/usr/bin/env python3
"""
Benchmarks for Stack 2.9 - Token Efficiency Tests
Token optimization benchmarks.
"""
import pytest
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch
# Add stack_cli to path
sys.path.insert(0, str(Path(__file__).parent.parent / "stack_cli"))
from stack_cli.agent import StackAgent, create_agent, AgentResponse
class TestTokenUsage:
"""Test token usage patterns."""
def test_response_token_efficiency(self):
"""Test response token efficiency."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.tools.get_tool') as mock_get_tool:
mock_tool = MagicMock(return_value={"success": True, "content": "x"})
mock_get_tool.return_value = mock_tool
agent = StackAgent()
response = agent.process("read test.py")
# Response should have content
assert response.content is not None
assert len(response.content) > 0
def test_context_truncation(self):
"""Test context truncation."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.context.Path') as mock_path:
with patch.object(Path, 'exists', return_value=False):
from stack_cli.context import ContextManager
cm = ContextManager("/tmp")
# Context should be generated
context = cm.get_workspace_context()
# Should be formatted string
assert isinstance(context, str)
class TestPromptEfficiency:
"""Test prompt efficiency."""
def test_intent_parsing_tokens(self):
"""Test intent parsing token usage."""
from stack_cli.agent import QueryUnderstanding
qu = QueryUnderstanding()
# Parse should be efficient
result = qu.parse("read test.py")
# Result should have required fields
assert "intent" in result
assert "confidence" in result
assert result["intent"] == "file_read"
def test_tool_selection_tokens(self):
"""Test tool selection token usage."""
from stack_cli.agent import ToolSelector
ts = ToolSelector()
# Selection should be minimal
tools = ts.select("file_read", {})
# Should return list of tools
assert isinstance(tools)
assert len(tools) > 0
class TestResponseEfficiency:
"""Test response generation efficiency."""
def test_response_generation_size(self):
"""Test response generation output size."""
with patch('stack_cli.context.create_context_manager'):
from stack_cli.agent import ResponseGenerator, ToolCall
rg = ResponseGenerator()
tool_calls = [
ToolCall(
tool_name="read",
arguments={"path": "test.py"},
result={"success": True, "content": "test content"},
success=True
)
]
response = rg.generate(tool_calls, "file_read", {})
# Should produce reasonable output
assert isinstance(response, str)
assert len(response) > 0
# Should not be excessively long
assert len(response) < 10000
def test_clarification_efficiency(self):
"""Test clarification generation efficiency."""
with patch('stack_cli.context.create_context_manager'):
from stack_cli.agent import ResponseGenerator
rg = ResponseGenerator()
clarification = rg.generate_clarification("Which file?")
# Should be concise
assert isinstance(clarification, str)
assert len(clarification) < 200
class TestContextTokenEfficiency:
"""Test context token efficiency."""
def test_context_summary_size(self):
"""Test context summary size."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.context.Path') as mock_path:
with patch.object(Path, 'exists', return_value=False):
from stack_cli.context import ContextManager
cm = ContextManager("/tmp")
summary = cm.get_context_summary()
# Should be JSON-serializable dict
import json
serialized = json.dumps(summary)
# Should be reasonable size
assert len(serialized) < 10000
def test_workspace_context_size(self):
"""Test workspace context size."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.context.Path') as mock_path:
with patch.object(Path, 'exists', return_value=False):
from stack_cli.context import ContextManager
cm = ContextManager("/tmp")
context = cm.get_workspace_context()
# Should be reasonable size
assert len(context) < 10000
class TestToolSchemasEfficiency:
"""Test tool schemas token efficiency."""
def test_schemas_compactness(self):
"""Test schemas are compact."""
from stack_cli.tools import get_tool_schemas
schemas = get_tool_schemas()
import json
serialized = json.dumps(schemas)
# Should be reasonable size
assert len(serialized) < 50000
def test_schema_required_fields(self):
"""Test schemas have required fields only."""
from stack_cli.tools import get_tool_schemas
schemas = get_tool_schemas()
for schema in schemas:
# Should have minimal required fields
assert "name" in schema
assert "description" in schema
assert "parameters" in schema
# Parameters should be minimal
params = schema["parameters"]
assert "type" in params
assert "properties" in params
class TestConversationEfficiency:
"""Test conversation token efficiency."""
def test_history_truncation(self):
"""Test conversation history truncation."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.tools.get_tool') as mock_get_tool:
mock_tool = MagicMock(return_value={"success": True})
mock_get_tool.return_value = mock_tool
agent = StackAgent()
# Add many conversations
for i in range(50):
agent.process(f"query {i}")
# History may be truncated
history_len = len(agent.conversation_history)
# Should not grow unbounded
assert history_len <= 100
def test_summary_efficiency(self):
"""Test summary efficiency."""
with patch('stack_cli.context.create_context_manager'):
agent = StackAgent()
session = agent.context_manager.session
# Add some data
for i in range(10):
session.add_message("user", f"message {i}")
summary = session.get_summary()
# Summary should be compact
import json
serialized = json.dumps(summary)
assert len(serialized) < 1000
class TestTokenOptimization:
"""Test token optimization strategies."""
def test_response_capping(self):
"""Test response content capping."""
with patch('stack_cli.context.create_context_manager'):
from stack_cli.agent import ResponseGenerator, ToolCall
rg = ResponseGenerator()
# Long content should be capped
long_content = "x" * 10000
tool_calls = [
ToolCall(
tool_name="read",
arguments={"path": "test.py"},
result={"success": True, "content": long_content},
success=True
)
]
response = rg.generate(tool_calls, "file_read", {})
# Response should be capped
assert len(response) < 15000
def test_context_truncation_strategy(self):
"""Test context truncation strategy."""
with patch('stack_cli.context.create_context_manager'):
with patch('stack_cli.context.Path') as mock_path:
with patch.object(Path, 'exists', return_value=False):
from stack_cli.context import ContextManager
cm = ContextManager("/tmp")
# With no projects, context should be minimal
context = cm.get_workspace_context()
# Should be concise
lines = context.split('\n')
# Should not have excessive lines
assert len(lines) < 100
if __name__ == "__main__":
pytest.main([__file__, "-v"])