| """ |
| Stack 2.9 - Pattern-Based AI Coding Assistant |
| HuggingFace Spaces Demo |
| |
| A Gradio interface for Stack 2.9 powered by Qwen2.5-Coder-7B |
| with tool integration and pattern memory. |
| """ |
|
|
| import os |
| import json |
| import time |
| from datetime import datetime |
| from typing import List, Dict, Optional |
| import gradio as gr |
|
|
| |
| |
| |
|
|
| class SelfEvolutionMemory: |
| """Simple in-memory pattern memory system for demo purposes.""" |
| |
| def __init__(self): |
| self.conversations = [] |
| self.learned_patterns = {} |
| self.code_snippets = [] |
| self.preferences = {} |
| self.interaction_count = 0 |
| |
| def add_interaction(self, user_input: str, assistant_response: str, tools_used: List[str] = None): |
| """Record an interaction for learning.""" |
| self.interaction_count += 1 |
| interaction = { |
| "timestamp": datetime.now().isoformat(), |
| "user_input": user_input, |
| "assistant_response": assistant_response, |
| "tools_used": tools_used or [], |
| "interaction_id": self.interaction_count |
| } |
| self.conversations.append(interaction) |
| |
| |
| self._learn_from_interaction(user_input, assistant_response, tools_used or []) |
| |
| def _learn_from_interaction(self, user_input: str, response: str, tools: List[str]): |
| """Learn patterns from interactions.""" |
| |
| for tool in tools: |
| if tool not in self.learned_patterns: |
| self.learned_patterns[tool] = {"count": 0, "contexts": []} |
| self.learned_patterns[tool]["count"] += 1 |
| self.learned_patterns[tool]["contexts"].append(user_input[:100]) |
| |
| |
| if "```" in response: |
| self.code_snippets.append({ |
| "timestamp": datetime.now().isoformat(), |
| "snippet": response |
| }) |
| |
| def get_context(self) -> str: |
| """Get accumulated context for the model.""" |
| context_parts = [f"## Pattern Memory ({self.interaction_count} interactions)"] |
| |
| if self.learned_patterns: |
| context_parts.append("\n### Tool Usage Patterns:") |
| for tool, data in sorted(self.learned_patterns.items(), key=lambda x: x[1]["count"], reverse=True)[:5]: |
| context_parts.append(f"- {tool}: used {data['count']} times") |
| |
| if self.code_snippets: |
| context_parts.append(f"\n### Learned {len(self.code_snippets)} code patterns") |
| |
| return "\n".join(context_parts) |
| |
| def get_stats(self) -> Dict: |
| """Get memory statistics.""" |
| return { |
| "total_interactions": self.interaction_count, |
| "tool_patterns": len(self.learned_patterns), |
| "code_snippets": len(self.code_snippets), |
| "recent_tools": [t for t in self.learned_patterns.keys()][:5] |
| } |
|
|
|
|
| |
| memory = SelfEvolutionMemory() |
|
|
| |
| |
| |
|
|
| class Tool: |
| """Base tool class.""" |
| |
| def __init__(self, name: str, description: str, func): |
| self.name = name |
| self.description = description |
| self.func = func |
| |
| async def execute(self, *args, **kwargs): |
| return await self.func(*args, **kwargs) |
|
|
|
|
| |
| async def tool_file_read(path: str) -> str: |
| """Read a file.""" |
| try: |
| with open(path, 'r') as f: |
| return f.read()[:5000] |
| except FileNotFoundError: |
| return f"File not found: {path}" |
| except Exception as e: |
| return f"Error reading file: {str(e)}" |
|
|
|
|
| async def tool_file_write(path: str, content: str) -> str: |
| """Write to a file.""" |
| try: |
| os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True) |
| with open(path, 'w') as f: |
| f.write(content) |
| return f"Successfully wrote to {path}" |
| except Exception as e: |
| return f"Error writing file: {str(e)}" |
|
|
|
|
| async def tool_git_status() -> str: |
| """Get git status.""" |
| import subprocess |
| try: |
| result = subprocess.run(["git", "status", "--short"], capture_output=True, text=True, cwd=os.getcwd()) |
| return result.stdout or "No changes" |
| except Exception as e: |
| return f"Git error: {str(e)}" |
|
|
|
|
| async def tool_web_search(query: str) -> str: |
| """Search the web.""" |
| from urllib.parse import quote |
| |
| return f"π Search results for '{query}':\n\n1. [Result 1] - Description here\n2. [Result 2] - Description here\n3. [Result 3] - Description here\n\n(Install brave-search to enable real search)" |
|
|
|
|
| async def tool_run_command(cmd: str) -> str: |
| """Run a shell command.""" |
| import subprocess |
| try: |
| result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30) |
| return f"Output:\n{result.stdout}\n\nErrors:\n{result.stderr}" if result.stderr else result.stdout |
| except Exception as e: |
| return f"Command error: {str(e)}" |
|
|
|
|
| async def tool_create_directory(path: str) -> str: |
| """Create a directory.""" |
| try: |
| os.makedirs(path, exist_ok=True) |
| return f"Directory created: {path}" |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
|
|
| async def tool_list_directory(path: str = ".") -> str: |
| """List directory contents.""" |
| try: |
| items = os.listdir(path) |
| return "\n".join([f"π {i}/" if os.path.isdir(os.path.join(path, i)) else f"π {i}" for i in items[:50]]) |
| except Exception as e: |
| return f"Error: {str(e)}" |
|
|
|
|
| |
| TOOLS = { |
| "file_read": Tool("file_read", "Read a file from the filesystem", tool_file_read), |
| "file_write": Tool("file_write", "Write content to a file", tool_file_write), |
| "git_status": Tool("git_status", "Check git repository status", tool_git_status), |
| "web_search": Tool("web_search", "Search the web for information", tool_web_search), |
| "run_command": Tool("run_command", "Execute a shell command", tool_run_command), |
| "create_directory": Tool("create_directory", "Create a new directory", tool_create_directory), |
| "list_directory": Tool("list_directory", "List files in a directory", tool_list_directory), |
| } |
|
|
|
|
| def get_tool_descriptions() -> str: |
| """Get descriptions of all available tools.""" |
| return "\n".join([f"- **{t.name}**: {t.description}" for t in TOOLS.values()]) |
|
|
|
|
| |
| |
| |
|
|
| class StackModel: |
| """Stack 2.9 model interface using transformers.""" |
| |
| def __init__(self, model_id: str = "Qwen/Qwen2.5-Coder-7B-Instruct"): |
| self.model_id = model_id |
| self.model = None |
| self.tokenizer = None |
| self.pipeline = None |
| |
| def load(self): |
| """Load the model with 4-bit quantization for HF Spaces.""" |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig |
| import torch |
| |
| print(f"Loading {self.model_id}...") |
| |
| |
| bnb_config = BitsAndBytesConfig( |
| load_in_4bit=True, |
| bnb_4bit_compute_dtype=torch.float16, |
| bnb_4bit_use_double_quant=True, |
| bnb_4bit_quant_type="nf4" |
| ) |
| |
| |
| self.tokenizer = AutoTokenizer.from_pretrained( |
| self.model_id, |
| trust_remote_code=True |
| ) |
| |
| |
| self.model = AutoModelForCausalLM.from_pretrained( |
| self.model_id, |
| quantization_config=bnb_config, |
| device_map="auto", |
| trust_remote_code=True |
| ) |
| |
| print("Model loaded successfully!") |
| |
| def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.7) -> str: |
| """Generate a response.""" |
| if not self.tokenizer: |
| return "Model not loaded. Please wait for initialization." |
| |
| |
| system_prompt = f"""You are Stack 2.9 - a pattern-based AI coding assistant. |
| |
| ## Available Tools |
| {get_tool_descriptions()} |
| |
| ## Your Capabilities |
| - Write, read, and execute code |
| - Use git for version control |
| - Search the web for information |
| - Create and manage files |
| - Execute shell commands |
| |
| ## Self-Evolution |
| You learn from each interaction. After responding, summarize what tools you used. |
| |
| {memory.get_context()} |
| |
| ## Instructions |
| 1. Be helpful and concise |
| 2. Use tools when needed |
| 3. Learn from the conversation |
| 4. Provide code examples when relevant |
| |
| Now respond to the user:""" |
|
|
| full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:" |
| |
| |
| inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device) |
| |
| |
| outputs = self.model.generate( |
| **inputs, |
| max_new_tokens=max_tokens, |
| temperature=temperature, |
| do_sample=True, |
| top_p=0.9, |
| repetition_penalty=1.1 |
| ) |
| |
| |
| response = self.tokenizer.decode(outputs[0], skip_special_tokens=True) |
| |
| |
| if "Assistant:" in response: |
| response = response.split("Assistant:")[-1].strip() |
| |
| return response |
| |
| def generate_streaming(self, prompt: str, max_tokens: int = 512): |
| """Generate with streaming (yields tokens).""" |
| if not self.tokenizer: |
| yield "Model not loaded. Please wait for initialization." |
| return |
| |
| system_prompt = f"""You are Stack 2.9 - a pattern-based AI coding assistant. |
| |
| ## Available Tools |
| {get_tool_descriptions()} |
| |
| ## Self-Evolution Memory |
| {memory.get_context()} |
| |
| Now respond to the user:""" |
|
|
| full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:" |
| |
| inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device) |
| |
| |
| from transformers import GenerationMixin |
| from typing import Iterator |
| |
| generated_ids = inputs.input_ids |
| |
| for _ in range(max_tokens): |
| with torch.no_grad(): |
| outputs = self.model(generated_ids) |
| next_token_logits = outputs.logits[:, -1, :] |
| |
| |
| next_token_logits = next_token_logits / 0.7 |
| |
| |
| probs = torch.softmax(next_token_logits, dim=-1) |
| next_token = torch.multinomial(probs, num_samples=1) |
| |
| generated_ids = torch.cat([generated_ids, next_token], dim=-1) |
| |
| |
| token_str = self.tokenizer.decode(next_token[0], skip_special_tokens=True) |
| yield token_str |
| |
| |
| if next_token.item() == self.tokenizer.eos_token_id: |
| break |
|
|
|
|
| |
| model = None |
|
|
|
|
| def initialize_model(): |
| """Initialize the model on startup.""" |
| global model |
| try: |
| model = StackModel() |
| model.load() |
| return model |
| except Exception as e: |
| print(f"Failed to load model: {e}") |
| return None |
|
|
|
|
| |
| |
| |
|
|
| def format_tools_used(tools_used: List[str]) -> str: |
| """Format the tools used for display.""" |
| if not tools_used: |
| return "" |
| return f"\n\nπ§ **Tools Used**: {', '.join(tools_used)}" |
|
|
|
|
| def chat_response(message: str, history: List[List[str]]) -> tuple: |
| """Process a chat message and return response.""" |
| global model, memory |
| |
| if model is None or model.model is None: |
| return "β³ Model is loading. Please wait...", history + [[message, "β³ Model is loading. Please wait..."]] |
| |
| |
| tools_used = [] |
| |
| |
| message_lower = message.lower() |
| |
| if any(kw in message_lower for kw in ['git status', 'git']): |
| tools_used.append("git_status") |
| if any(kw in message_lower for kw in ['search', 'find', 'look up']): |
| tools_used.append("web_search") |
| if any(kw in message_lower for kw in ['list files', 'directory', 'ls']): |
| tools_used.append("list_directory") |
| if any(kw in message_lower for kw in ['run ', 'execute', 'command']): |
| tools_used.append("run_command") |
| |
| |
| try: |
| response = model.generate(message, max_tokens=512) |
| except Exception as e: |
| response = f"I encountered an error: {str(e)}" |
| |
| |
| response += format_tools_used(tools_used) |
| |
| |
| memory.add_interaction(message, response, tools_used) |
| |
| return response |
|
|
|
|
| def chat_response_stream(message: str, history: List[List[str]]) -> Generator: |
| """Process a chat message with streaming.""" |
| global model, memory |
| |
| if model is None or model.model is None: |
| yield "β³ Model is loading. Please wait..." |
| return |
| |
| full_response = "" |
| tools_used = [] |
| |
| message_lower = message.lower() |
| if any(kw in message_lower for kw in ['git status', 'git']): |
| tools_used.append("git_status") |
| if any(kw in message_lower for kw in ['search', 'find']): |
| tools_used.append("web_search") |
| if any(kw in message_lower for kw in ['list', 'directory']): |
| tools_used.append("list_directory") |
| |
| |
| for token in model.generate_streaming(message, max_tokens=256): |
| full_response += token |
| yield full_response |
| |
| |
| if tools_used: |
| full_response += format_tools_used(tools_used) |
| yield full_response |
| |
| |
| memory.add_interaction(message, full_response, tools_used) |
|
|
|
|
| |
| EXAMPLE_PROMPTS = [ |
| "Hello! What can you help me with?", |
| "Check git status of this repository", |
| "Search for best practices for Python async programming", |
| "List the files in the current directory", |
| "Write a simple Python function to calculate fibonacci", |
| "How do I use Git to create a new branch?", |
| "What's your memory of our conversation?", |
| ] |
|
|
|
|
| def create_gradio_app(): |
| """Create the Gradio interface.""" |
| |
| with gr.Blocks( |
| title="Stack 2.9 - Pattern-Based AI Coding Assistant", |
| theme=gr.themes.Soft( |
| primary_color="#6366f1", |
| secondary_color="#818cf8", |
| tertiary_color="#a5b4fc" |
| ) |
| ) as app: |
| |
| |
| gr.Markdown(""" |
| # π Stack 2.9 - Pattern-Based AI Coding Assistant |
| |
| Powered by **Qwen2.5-Coder-7B** with 4-bit quantization |
| |
| --- |
| """) |
| |
| |
| with gr.Row(): |
| with gr.Column(scale=1): |
| stats_display = gr.Markdown( |
| "π **Memory Stats**\n\n- Interactions: 0\n- Tools learned: 0\n- Code patterns: 0", |
| elem_id="stats" |
| ) |
| with gr.Column(scale=3): |
| pass |
| |
| |
| chatbot = gr.Chatbot( |
| height=500, |
| show_copy_button=True, |
| bubble_full_width=False |
| ) |
| |
| with gr.Row(): |
| msg = gr.Textbox( |
| label="Message", |
| placeholder="Ask me anything...", |
| scale=4, |
| lines=3 |
| ) |
| submit_btn = gr.Button("Send", variant="primary", scale=1) |
| |
| |
| with gr.Row(): |
| clear_btn = gr.Button("ποΈ Clear Chat") |
| |
| |
| gr.Examples( |
| examples=EXAMPLE_PROMPTS, |
| inputs=msg, |
| label="Example Prompts" |
| ) |
| |
| |
| with gr.Accordion("π§ Self-Evolution Memory", open=False): |
| memory_display = gr.Textbox( |
| label="Memory Content", |
| lines=10, |
| interactive=False |
| ) |
| |
| |
| def respond(message, history): |
| response = chat_response(message, history) |
| history.append([message, response]) |
| return "", history |
| |
| def update_stats(): |
| stats = memory.get_stats() |
| return f"""π **Memory Stats** |
| |
| - **Interactions**: {stats['total_interactions']} |
| - **Tool Patterns**: {stats['tool_patterns']} |
| - **Code Snippets**: {stats['code_snippets']} |
| |
| **Recent Tools**: {', '.join(stats['recent_tools']) if stats['recent_tools'] else 'None'}""" |
| |
| def update_memory(): |
| return memory.get_context() |
| |
| |
| submit_btn.click(respond, [msg, chatbot], [msg, chatbot], api_name="send") |
| msg.submit(respond, [msg, chatbot], [msg, chatbot], api_name="send") |
| |
| def clear_chat(): |
| return [], "" |
| |
| clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg]) |
| |
| |
| chatbot.change(update_stats, outputs=[stats_display]) |
| chatbot.change(update_memory, outputs=[memory_display]) |
| |
| |
| gr.Markdown(""" |
| --- |
| ### About Stack 2.9 |
| |
| Stack 2.9 is a pattern-based AI coding assistant that: |
| - π Uses **Qwen2.5-Coder-7B** (4-bit, ~4GB VRAM) |
| - π οΈ Integrates **7 tools** (file, git, web, search, shell) |
| - π§ Remembers interactions and learns patterns |
| - β‘ Provides fast, streaming responses |
| |
| Deployed on **HuggingFace Spaces** with Gradio |
| """) |
| |
| return app |
|
|
|
|
| |
| |
| |
|
|
| if __name__ == "__main__": |
| import argparse |
| |
| parser = argparse.ArgumentParser(description="Stack 2.9 - HuggingFace Spaces Demo") |
| parser.add_argument("--share", action="store_true", help="Create a public share link") |
| parser.add_argument("--port", type=int, default=7860, help="Port to run on") |
| parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-Coder-7B-Instruct", help="Model ID") |
| args = parser.parse_args() |
| |
| print("=" * 50) |
| print("π Stack 2.9 - Pattern-Based AI Coding Assistant") |
| print("=" * 50) |
| print(f"Model: {args.model}") |
| print("Loading model...") |
| |
| |
| import threading |
| |
| def load_model_thread(): |
| global model |
| model = initialize_model() |
| |
| loader_thread = threading.Thread(target=load_model_thread) |
| loader_thread.start() |
| |
| |
| app = create_gradio_app() |
| |
| print(f"\nπ Launching Gradio on port {args.port}...") |
| print("π Note: Model loads in background. Chat will work once loaded.\n") |
| |
| app.launch( |
| server_name="0.0.0.0", |
| server_port=args.port, |
| share=args.share |
| ) |