Stack-2-9-finetuned / samples /benchmarks /test_token_efficiency.py

walidsobhie-code

refactor: Squeeze folders further - cleaner structure

65888d5 22 days ago

9.75 kB

	#!/usr/bin/env python3
	"""
	Benchmarks for Stack 2.9 - Token Efficiency Tests
	Token optimization benchmarks.
	"""

	import pytest
	import sys
	from pathlib import Path
	from unittest.mock import MagicMock, patch

	# Add stack_cli to path
	sys.path.insert(0, str(Path(__file__).parent.parent / "stack_cli"))

	from stack_cli.agent import StackAgent, create_agent, AgentResponse


	class TestTokenUsage:
	"""Test token usage patterns."""

	def test_response_token_efficiency(self):
	"""Test response token efficiency."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.tools.get_tool') as mock_get_tool:
	mock_tool = MagicMock(return_value={"success": True, "content": "x"})
	mock_get_tool.return_value = mock_tool

	agent = StackAgent()
	response = agent.process("read test.py")

	# Response should have content
	assert response.content is not None
	assert len(response.content) > 0

	def test_context_truncation(self):
	"""Test context truncation."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.context.Path') as mock_path:
	with patch.object(Path, 'exists', return_value=False):
	from stack_cli.context import ContextManager

	cm = ContextManager("/tmp")

	# Context should be generated
	context = cm.get_workspace_context()

	# Should be formatted string
	assert isinstance(context, str)


	class TestPromptEfficiency:
	"""Test prompt efficiency."""

	def test_intent_parsing_tokens(self):
	"""Test intent parsing token usage."""
	from stack_cli.agent import QueryUnderstanding

	qu = QueryUnderstanding()

	# Parse should be efficient
	result = qu.parse("read test.py")

	# Result should have required fields
	assert "intent" in result
	assert "confidence" in result
	assert result["intent"] == "file_read"

	def test_tool_selection_tokens(self):
	"""Test tool selection token usage."""
	from stack_cli.agent import ToolSelector

	ts = ToolSelector()

	# Selection should be minimal
	tools = ts.select("file_read", {})

	# Should return list of tools
	assert isinstance(tools)
	assert len(tools) > 0


	class TestResponseEfficiency:
	"""Test response generation efficiency."""

	def test_response_generation_size(self):
	"""Test response generation output size."""
	with patch('stack_cli.context.create_context_manager'):
	from stack_cli.agent import ResponseGenerator, ToolCall

	rg = ResponseGenerator()

	tool_calls = [
	ToolCall(
	tool_name="read",
	arguments={"path": "test.py"},
	result={"success": True, "content": "test content"},
	success=True
	)
	]

	response = rg.generate(tool_calls, "file_read", {})

	# Should produce reasonable output
	assert isinstance(response, str)
	assert len(response) > 0
	# Should not be excessively long
	assert len(response) < 10000

	def test_clarification_efficiency(self):
	"""Test clarification generation efficiency."""
	with patch('stack_cli.context.create_context_manager'):
	from stack_cli.agent import ResponseGenerator

	rg = ResponseGenerator()

	clarification = rg.generate_clarification("Which file?")

	# Should be concise
	assert isinstance(clarification, str)
	assert len(clarification) < 200


	class TestContextTokenEfficiency:
	"""Test context token efficiency."""

	def test_context_summary_size(self):
	"""Test context summary size."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.context.Path') as mock_path:
	with patch.object(Path, 'exists', return_value=False):
	from stack_cli.context import ContextManager

	cm = ContextManager("/tmp")

	summary = cm.get_context_summary()

	# Should be JSON-serializable dict
	import json
	serialized = json.dumps(summary)

	# Should be reasonable size
	assert len(serialized) < 10000

	def test_workspace_context_size(self):
	"""Test workspace context size."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.context.Path') as mock_path:
	with patch.object(Path, 'exists', return_value=False):
	from stack_cli.context import ContextManager

	cm = ContextManager("/tmp")

	context = cm.get_workspace_context()

	# Should be reasonable size
	assert len(context) < 10000


	class TestToolSchemasEfficiency:
	"""Test tool schemas token efficiency."""

	def test_schemas_compactness(self):
	"""Test schemas are compact."""
	from stack_cli.tools import get_tool_schemas

	schemas = get_tool_schemas()

	import json
	serialized = json.dumps(schemas)

	# Should be reasonable size
	assert len(serialized) < 50000

	def test_schema_required_fields(self):
	"""Test schemas have required fields only."""
	from stack_cli.tools import get_tool_schemas

	schemas = get_tool_schemas()

	for schema in schemas:
	# Should have minimal required fields
	assert "name" in schema
	assert "description" in schema
	assert "parameters" in schema

	# Parameters should be minimal
	params = schema["parameters"]
	assert "type" in params
	assert "properties" in params


	class TestConversationEfficiency:
	"""Test conversation token efficiency."""

	def test_history_truncation(self):
	"""Test conversation history truncation."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.tools.get_tool') as mock_get_tool:
	mock_tool = MagicMock(return_value={"success": True})
	mock_get_tool.return_value = mock_tool

	agent = StackAgent()

	# Add many conversations
	for i in range(50):
	agent.process(f"query {i}")

	# History may be truncated
	history_len = len(agent.conversation_history)

	# Should not grow unbounded
	assert history_len <= 100

	def test_summary_efficiency(self):
	"""Test summary efficiency."""
	with patch('stack_cli.context.create_context_manager'):
	agent = StackAgent()
	session = agent.context_manager.session

	# Add some data
	for i in range(10):
	session.add_message("user", f"message {i}")

	summary = session.get_summary()

	# Summary should be compact
	import json
	serialized = json.dumps(summary)

	assert len(serialized) < 1000


	class TestTokenOptimization:
	"""Test token optimization strategies."""

	def test_response_capping(self):
	"""Test response content capping."""
	with patch('stack_cli.context.create_context_manager'):
	from stack_cli.agent import ResponseGenerator, ToolCall

	rg = ResponseGenerator()

	# Long content should be capped
	long_content = "x" * 10000

	tool_calls = [
	ToolCall(
	tool_name="read",
	arguments={"path": "test.py"},
	result={"success": True, "content": long_content},
	success=True
	)
	]

	response = rg.generate(tool_calls, "file_read", {})

	# Response should be capped
	assert len(response) < 15000

	def test_context_truncation_strategy(self):
	"""Test context truncation strategy."""
	with patch('stack_cli.context.create_context_manager'):
	with patch('stack_cli.context.Path') as mock_path:
	with patch.object(Path, 'exists', return_value=False):
	from stack_cli.context import ContextManager

	cm = ContextManager("/tmp")

	# With no projects, context should be minimal
	context = cm.get_workspace_context()

	# Should be concise
	lines = context.split('\n')

	# Should not have excessive lines
	assert len(lines) < 100


	if __name__ == "__main__":
	pytest.main([__file__, "-v"])