File size: 9,747 Bytes

b6ae7b8

#!/usr/bin/env python3
"""
Benchmarks for Stack 2.9 - Token Efficiency Tests
Token optimization benchmarks.
"""

import pytest
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch

# Add stack_cli to path
sys.path.insert(0, str(Path(__file__).parent.parent / "stack_cli"))

from stack_cli.agent import StackAgent, create_agent, AgentResponse


class TestTokenUsage:
    """Test token usage patterns."""

    def test_response_token_efficiency(self):
        """Test response token efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True, "content": "x"})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                response = agent.process("read test.py")
                
                # Response should have content
                assert response.content is not None
                assert len(response.content) > 0

    def test_context_truncation(self):
        """Test context truncation."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    # Context should be generated
                    context = cm.get_workspace_context()
                    
                    # Should be formatted string
                    assert isinstance(context, str)


class TestPromptEfficiency:
    """Test prompt efficiency."""

    def test_intent_parsing_tokens(self):
        """Test intent parsing token usage."""
        from stack_cli.agent import QueryUnderstanding
        
        qu = QueryUnderstanding()
        
        # Parse should be efficient
        result = qu.parse("read test.py")
        
        # Result should have required fields
        assert "intent" in result
        assert "confidence" in result
        assert result["intent"] == "file_read"

    def test_tool_selection_tokens(self):
        """Test tool selection token usage."""
        from stack_cli.agent import ToolSelector
        
        ts = ToolSelector()
        
        # Selection should be minimal
        tools = ts.select("file_read", {})
        
        # Should return list of tools
        assert isinstance(tools)
        assert len(tools) > 0


class TestResponseEfficiency:
    """Test response generation efficiency."""

    def test_response_generation_size(self):
        """Test response generation output size."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator, ToolCall
            
            rg = ResponseGenerator()
            
            tool_calls = [
                ToolCall(
                    tool_name="read",
                    arguments={"path": "test.py"},
                    result={"success": True, "content": "test content"},
                    success=True
                )
            ]
            
            response = rg.generate(tool_calls, "file_read", {})
            
            # Should produce reasonable output
            assert isinstance(response, str)
            assert len(response) > 0
            # Should not be excessively long
            assert len(response) < 10000

    def test_clarification_efficiency(self):
        """Test clarification generation efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator
            
            rg = ResponseGenerator()
            
            clarification = rg.generate_clarification("Which file?")
            
            # Should be concise
            assert isinstance(clarification, str)
            assert len(clarification) < 200


class TestContextTokenEfficiency:
    """Test context token efficiency."""

    def test_context_summary_size(self):
        """Test context summary size."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    summary = cm.get_context_summary()
                    
                    # Should be JSON-serializable dict
                    import json
                    serialized = json.dumps(summary)
                    
                    # Should be reasonable size
                    assert len(serialized) < 10000

    def test_workspace_context_size(self):
        """Test workspace context size."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    context = cm.get_workspace_context()
                    
                    # Should be reasonable size
                    assert len(context) < 10000


class TestToolSchemasEfficiency:
    """Test tool schemas token efficiency."""

    def test_schemas_compactness(self):
        """Test schemas are compact."""
        from stack_cli.tools import get_tool_schemas
        
        schemas = get_tool_schemas()
        
        import json
        serialized = json.dumps(schemas)
        
        # Should be reasonable size
        assert len(serialized) < 50000

    def test_schema_required_fields(self):
        """Test schemas have required fields only."""
        from stack_cli.tools import get_tool_schemas
        
        schemas = get_tool_schemas()
        
        for schema in schemas:
            # Should have minimal required fields
            assert "name" in schema
            assert "description" in schema
            assert "parameters" in schema
            
            # Parameters should be minimal
            params = schema["parameters"]
            assert "type" in params
            assert "properties" in params


class TestConversationEfficiency:
    """Test conversation token efficiency."""

    def test_history_truncation(self):
        """Test conversation history truncation."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Add many conversations
                for i in range(50):
                    agent.process(f"query {i}")
                
                # History may be truncated
                history_len = len(agent.conversation_history)
                
                # Should not grow unbounded
                assert history_len <= 100

    def test_summary_efficiency(self):
        """Test summary efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            agent = StackAgent()
            session = agent.context_manager.session
            
            # Add some data
            for i in range(10):
                session.add_message("user", f"message {i}")
            
            summary = session.get_summary()
            
            # Summary should be compact
            import json
            serialized = json.dumps(summary)
            
            assert len(serialized) < 1000


class TestTokenOptimization:
    """Test token optimization strategies."""

    def test_response_capping(self):
        """Test response content capping."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator, ToolCall
            
            rg = ResponseGenerator()
            
            # Long content should be capped
            long_content = "x" * 10000
            
            tool_calls = [
                ToolCall(
                    tool_name="read",
                    arguments={"path": "test.py"},
                    result={"success": True, "content": long_content},
                    success=True
                )
            ]
            
            response = rg.generate(tool_calls, "file_read", {})
            
            # Response should be capped
            assert len(response) < 15000

    def test_context_truncation_strategy(self):
        """Test context truncation strategy."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    # With no projects, context should be minimal
                    context = cm.get_workspace_context()
                    
                    # Should be concise
                    lines = context.split('\n')
                    
                    # Should not have excessive lines
                    assert len(lines) < 100


if __name__ == "__main__":
    pytest.main([__file__, "-v"])