File size: 10,332 Bytes

b6ae7b8

#!/usr/bin/env python3
"""
Integration Tests for Stack 2.9 Self-Evolution
Self-improvement cycle tests.
"""

import pytest
import sys
from pathlib import Path
from unittest.mock import MagicMock, patch, AsyncMock

# Add stack_cli to path
sys.path.insert(0, str(Path(__file__).parent.parent / "stack_cli"))

from stack_cli.agent import StackAgent, SelfReflection, create_agent, AgentResponse


class TestSelfReflection:
    """Test self-reflection functionality."""

    def test_reflection_high_confidence(self):
        """Test reflection with high confidence."""
        sr = SelfReflection()
        
        tool_calls = [
            MagicMock(success=True, tool_name="read"),
            MagicMock(success=True, tool_name="write")
        ]
        
        result = sr.reflect("test query", tool_calls, "Good response with content")
        
        assert result["needs_reflection"] is False
        assert result["confidence"] >= 0.7

    def test_reflection_low_confidence(self):
        """Test reflection with failures."""
        sr = SelfReflection()
        
        tool_calls = [
            MagicMock(success=False, tool_name="read", error="Not found")
        ]
        
        result = sr.reflect("test query", tool_calls, "Short")
        
        assert result["needs_reflection"] is True
        assert result["failed_calls"] > 0

    def test_reflection_suggestion(self):
        """Test reflection provides suggestions."""
        sr = SelfReflection()
        
        tool_calls = [
            MagicMock(success=False, tool_name="read", error="Failed")
        ]
        
        result = sr.reflect("test query", tool_calls, "Short")
        
        assert result.get("suggestion") is not None


class TestSelfImprovementCycle:
    """Test self-improvement cycle."""

    def test_agent_learns_from_errors(self):
        """Test agent learns from errors."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                # First call fails
                call_count = [0]
                
                def tool_func(**kwargs):
                    call_count[0] += 1
                    if call_count[0] == 1:
                        return {"success": False, "error": "First attempt failed"}
                    return {"success": True}
                
                mock_get_tool.return_value = tool_func
                
                agent = StackAgent()
                
                # First attempt
                response1 = agent.process("read test.py")
                
                # Should have some response even on failure
                assert response1 is not None

    def test_conversation_history_tracking(self):
        """Test conversation history is tracked."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Process multiple queries
                agent.process("query 1")
                agent.process("query 2")
                agent.process("query 3")
                
                assert len(agent.conversation_history) == 3

    def test_reflection_updates_confidence(self):
        """Test reflection updates confidence score."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                response = agent.process("test query")
                
                # Confidence should be set
                assert response.confidence is not None
                assert 0.0 <= response.confidence <= 1.0


class TestAdaptiveToolSelection:
    """Test adaptive tool selection."""

    def test_tool_selection_based_on_intent(self):
        """Test tools are selected based on intent."""
        from stack_cli.agent import ToolSelector
        
        ts = ToolSelector()
        
        # Different intents should select different tools
        tools_file = ts.select("file_read", {})
        tools_git = ts.select("git_operation", {})
        tools_web = ts.select("web_search", {})
        
        assert tools_file != tools_git
        assert tools_git != tools_web

    def test_parameter_extraction_improves(self):
        """Test parameter extraction works across queries."""
        from stack_cli.agent import ToolSelector
        
        ts = ToolSelector()
        
        # Same tool with different queries
        params1 = ts.get_tool_parameters("read", "read test.py", {})
        params2 = ts.get_tool_parameters("read", "view main.py", {})
        
        # Both should extract path
        assert "path" in params1 or "path" in params2


class TestContextAwareImprovement:
    """Test context-aware improvements."""

    def test_context_influences_response(self):
        """Test context influences response generation."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True, "content": "data"})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                response1 = agent.process("read test.py")
                response2 = agent.process("read test.py")
                
                # Responses should be consistent
                assert response1.content is not None

    def test_session_memory_persists(self):
        """Test session memory persists across queries."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Process query
                agent.process("first query")
                
                # Session should have recorded tool usage
                session = agent.context_manager.session
                assert session is not None


class TestSelfEvolutionIntegration:
    """Test complete self-evolution integration."""

    def test_full_self_improvement_loop(self):
        """Test complete self-improvement loop."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # 1. Process query
                response = agent.process("test query")
                
                # 2. Check reflection
                assert response is not None
                
                # 3. History is updated
                assert len(agent.conversation_history) > 0

    def test_error_recovery_improves(self):
        """Test error recovery improves over time."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                error_count = [0]
                
                def tool_func(**kwargs):
                    error_count[0] += 1
                    if error_count[0] <= 2:
                        return {"success": False, "error": f"Error {error_count[0]}"}
                    return {"success": True}
                
                mock_get_tool.return_value = tool_func
                
                agent = StackAgent()
                
                # First attempts may fail
                for _ in range(3):
                    agent.process("test")
                
                # Should have recorded history
                assert len(agent.conversation_history) >= 0

    def test_performance_tracking(self):
        """Test performance is tracked."""
        with patch('stack_cli.context.create_context_manager'):
            agent = StackAgent()
            
            # Get session summary
            summary = agent.context_manager.session.get_summary()
            
            assert "messages_count" in summary
            assert "tools_used_count" in summary


class TestContinuousLearning:
    """Test continuous learning aspects."""

    def test_query_patterns_learned(self):
        """Test query patterns are tracked."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Similar queries
                agent.process("read file1.py")
                agent.process("read file2.py")
                agent.process("read file3.py")
                
                # History shows pattern
                history = agent.conversation_history
                assert len(history) == 3

    def test_tool_usage_stats(self):
        """Test tool usage statistics."""
        with patch('stack_cli.context.create_context_manager'):
            with patch('stack_cli.tools.get_tool') as mock_get_tool:
                mock_tool = MagicMock(return_value={"success": True})
                mock_get_tool.return_value = mock_tool
                
                agent = StackAgent()
                
                # Use various tools
                agent.process("read test.py")
                agent.process("run pytest")
                agent.process("git status")
                
                # Session tracks tools
                session = agent.context_manager.session
                assert session is not None


if __name__ == "__main__":
    pytest.main([__file__, "-v"])