File size: 9,747 Bytes
b6ae7b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
#!/usr/bin/env python3
"""
Benchmarks for Stack 2.9 - Token Efficiency Tests
Token optimization benchmarks.
"""

import pytest
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch

# Add stack_cli to path
sys.path.insert(0, str(Path(__file__).parent.parent / "stack_cli"))

from stack_cli.agent import StackAgent, create_agent, AgentResponse


class TestTokenUsage:
    """Test token usage patterns."""

    def test_response_token_efficiency(self):
        """Test response token efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True, "content": "x"})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                response = agent.process("read test.py")
                
                # Response should have content
                assert response.content is not None
                assert len(response.content) > 0

    def test_context_truncation(self):
        """Test context truncation."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    # Context should be generated
                    context = cm.get_workspace_context()
                    
                    # Should be formatted string
                    assert isinstance(context, str)


class TestPromptEfficiency:
    """Test prompt efficiency."""

    def test_intent_parsing_tokens(self):
        """Test intent parsing token usage."""
        from stack_cli.agent import QueryUnderstanding
        
        qu = QueryUnderstanding()
        
        # Parse should be efficient
        result = qu.parse("read test.py")
        
        # Result should have required fields
        assert "intent" in result
        assert "confidence" in result
        assert result["intent"] == "file_read"

    def test_tool_selection_tokens(self):
        """Test tool selection token usage."""
        from stack_cli.agent import ToolSelector
        
        ts = ToolSelector()
        
        # Selection should be minimal
        tools = ts.select("file_read", {})
        
        # Should return list of tools
        assert isinstance(tools)
        assert len(tools) > 0


class TestResponseEfficiency:
    """Test response generation efficiency."""

    def test_response_generation_size(self):
        """Test response generation output size."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator, ToolCall
            
            rg = ResponseGenerator()
            
            tool_calls = [
                ToolCall(
                    tool_name="read",
                    arguments={"path": "test.py"},
                    result={"success": True, "content": "test content"},
                    success=True
                )
            ]
            
            response = rg.generate(tool_calls, "file_read", {})
            
            # Should produce reasonable output
            assert isinstance(response, str)
            assert len(response) > 0
            # Should not be excessively long
            assert len(response) < 10000

    def test_clarification_efficiency(self):
        """Test clarification generation efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator
            
            rg = ResponseGenerator()
            
            clarification = rg.generate_clarification("Which file?")
            
            # Should be concise
            assert isinstance(clarification, str)
            assert len(clarification) < 200


class TestContextTokenEfficiency:
    """Test context token efficiency."""

    def test_context_summary_size(self):
        """Test context summary size."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    summary = cm.get_context_summary()
                    
                    # Should be JSON-serializable dict
                    import json
                    serialized = json.dumps(summary)
                    
                    # Should be reasonable size
                    assert len(serialized) < 10000

    def test_workspace_context_size(self):
        """Test workspace context size."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    context = cm.get_workspace_context()
                    
                    # Should be reasonable size
                    assert len(context) < 10000


class TestToolSchemasEfficiency:
    """Test tool schemas token efficiency."""

    def test_schemas_compactness(self):
        """Test schemas are compact."""
        from stack_cli.tools import get_tool_schemas
        
        schemas = get_tool_schemas()
        
        import json
        serialized = json.dumps(schemas)
        
        # Should be reasonable size
        assert len(serialized) < 50000

    def test_schema_required_fields(self):
        """Test schemas have required fields only."""
        from stack_cli.tools import get_tool_schemas
        
        schemas = get_tool_schemas()
        
        for schema in schemas:
            # Should have minimal required fields
            assert "name" in schema
            assert "description" in schema
            assert "parameters" in schema
            
            # Parameters should be minimal
            params = schema["parameters"]
            assert "type" in params
            assert "properties" in params


class TestConversationEfficiency:
    """Test conversation token efficiency."""

    def test_history_truncation(self):
        """Test conversation history truncation."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Add many conversations
                for i in range(50):
                    agent.process(f"query {i}")
                
                # History may be truncated
                history_len = len(agent.conversation_history)
                
                # Should not grow unbounded
                assert history_len <= 100

    def test_summary_efficiency(self):
        """Test summary efficiency."""
        with patch('stack_cli.context.create_context_manager'):
            agent = StackAgent()
            session = agent.context_manager.session
            
            # Add some data
            for i in range(10):
                session.add_message("user", f"message {i}")
            
            summary = session.get_summary()
            
            # Summary should be compact
            import json
            serialized = json.dumps(summary)
            
            assert len(serialized) < 1000


class TestTokenOptimization:
    """Test token optimization strategies."""

    def test_response_capping(self):
        """Test response content capping."""
        with patch('stack_cli.context.create_context_manager'):
            from stack_cli.agent import ResponseGenerator, ToolCall
            
            rg = ResponseGenerator()
            
            # Long content should be capped
            long_content = "x" * 10000
            
            tool_calls = [
                ToolCall(
                    tool_name="read",
                    arguments={"path": "test.py"},
                    result={"success": True, "content": long_content},
                    success=True
                )
            ]
            
            response = rg.generate(tool_calls, "file_read", {})
            
            # Response should be capped
            assert len(response) < 15000

    def test_context_truncation_strategy(self):
        """Test context truncation strategy."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.context.Path') as mock_path:
                with patch.object(Path, 'exists', return_value=False):
                    from stack_cli.context import ContextManager
                    
                    cm = ContextManager("/tmp")
                    
                    # With no projects, context should be minimal
                    context = cm.get_workspace_context()
                    
                    # Should be concise
                    lines = context.split('\n')
                    
                    # Should not have excessive lines
                    assert len(lines) < 100


if __name__ == "__main__":
    pytest.main([__file__, "-v"])