walidsobhie-code
refactor: Squeeze folders further - cleaner structure
65888d5
"""
Stack 2.9 - Pattern-Based AI Coding Assistant
HuggingFace Spaces Demo
A Gradio interface for Stack 2.9 powered by Qwen2.5-Coder-7B
with tool integration and pattern memory.
"""
import os
import json
import time
from datetime import datetime
from typing import List, Dict, Optional
import gradio as gr
# ============================================================
# Pattern Memory System
# ============================================================
class SelfEvolutionMemory:
"""Simple in-memory pattern memory system for demo purposes."""
def __init__(self):
self.conversations = []
self.learned_patterns = {}
self.code_snippets = []
self.preferences = {}
self.interaction_count = 0
def add_interaction(self, user_input: str, assistant_response: str, tools_used: List[str] = None):
"""Record an interaction for learning."""
self.interaction_count += 1
interaction = {
"timestamp": datetime.now().isoformat(),
"user_input": user_input,
"assistant_response": assistant_response,
"tools_used": tools_used or [],
"interaction_id": self.interaction_count
}
self.conversations.append(interaction)
# Extract patterns from the interaction
self._learn_from_interaction(user_input, assistant_response, tools_used or [])
def _learn_from_interaction(self, user_input: str, response: str, tools: List[str]):
"""Learn patterns from interactions."""
# Track tool usage patterns
for tool in tools:
if tool not in self.learned_patterns:
self.learned_patterns[tool] = {"count": 0, "contexts": []}
self.learned_patterns[tool]["count"] += 1
self.learned_patterns[tool]["contexts"].append(user_input[:100])
# Extract code snippets if present
if "```" in response:
self.code_snippets.append({
"timestamp": datetime.now().isoformat(),
"snippet": response
})
def get_context(self) -> str:
"""Get accumulated context for the model."""
context_parts = [f"## Pattern Memory ({self.interaction_count} interactions)"]
if self.learned_patterns:
context_parts.append("\n### Tool Usage Patterns:")
for tool, data in sorted(self.learned_patterns.items(), key=lambda x: x[1]["count"], reverse=True)[:5]:
context_parts.append(f"- {tool}: used {data['count']} times")
if self.code_snippets:
context_parts.append(f"\n### Learned {len(self.code_snippets)} code patterns")
return "\n".join(context_parts)
def get_stats(self) -> Dict:
"""Get memory statistics."""
return {
"total_interactions": self.interaction_count,
"tool_patterns": len(self.learned_patterns),
"code_snippets": len(self.code_snippets),
"recent_tools": [t for t in self.learned_patterns.keys()][:5]
}
# Global memory instance
memory = SelfEvolutionMemory()
# ============================================================
# Tool System
# ============================================================
class Tool:
"""Base tool class."""
def __init__(self, name: str, description: str, func):
self.name = name
self.description = description
self.func = func
async def execute(self, *args, **kwargs):
return await self.func(*args, **kwargs)
# Tool implementations (simplified for demo)
async def tool_file_read(path: str) -> str:
"""Read a file."""
try:
with open(path, 'r') as f:
return f.read()[:5000] # Limit output
except FileNotFoundError:
return f"File not found: {path}"
except Exception as e:
return f"Error reading file: {str(e)}"
async def tool_file_write(path: str, content: str) -> str:
"""Write to a file."""
try:
os.makedirs(os.path.dirname(path) if os.path.dirname(path) else ".", exist_ok=True)
with open(path, 'w') as f:
f.write(content)
return f"Successfully wrote to {path}"
except Exception as e:
return f"Error writing file: {str(e)}"
async def tool_git_status() -> str:
"""Get git status."""
import subprocess
try:
result = subprocess.run(["git", "status", "--short"], capture_output=True, text=True, cwd=os.getcwd())
return result.stdout or "No changes"
except Exception as e:
return f"Git error: {str(e)}"
async def tool_web_search(query: str) -> str:
"""Search the web."""
from urllib.parse import quote
# Return a demo response since we can't make actual API calls
return f"πŸ” Search results for '{query}':\n\n1. [Result 1] - Description here\n2. [Result 2] - Description here\n3. [Result 3] - Description here\n\n(Install brave-search to enable real search)"
async def tool_run_command(cmd: str) -> str:
"""Run a shell command."""
import subprocess
try:
result = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=30)
return f"Output:\n{result.stdout}\n\nErrors:\n{result.stderr}" if result.stderr else result.stdout
except Exception as e:
return f"Command error: {str(e)}"
async def tool_create_directory(path: str) -> str:
"""Create a directory."""
try:
os.makedirs(path, exist_ok=True)
return f"Directory created: {path}"
except Exception as e:
return f"Error: {str(e)}"
async def tool_list_directory(path: str = ".") -> str:
"""List directory contents."""
try:
items = os.listdir(path)
return "\n".join([f"πŸ“ {i}/" if os.path.isdir(os.path.join(path, i)) else f"πŸ“„ {i}" for i in items[:50]])
except Exception as e:
return f"Error: {str(e)}"
# Register tools
TOOLS = {
"file_read": Tool("file_read", "Read a file from the filesystem", tool_file_read),
"file_write": Tool("file_write", "Write content to a file", tool_file_write),
"git_status": Tool("git_status", "Check git repository status", tool_git_status),
"web_search": Tool("web_search", "Search the web for information", tool_web_search),
"run_command": Tool("run_command", "Execute a shell command", tool_run_command),
"create_directory": Tool("create_directory", "Create a new directory", tool_create_directory),
"list_directory": Tool("list_directory", "List files in a directory", tool_list_directory),
}
def get_tool_descriptions() -> str:
"""Get descriptions of all available tools."""
return "\n".join([f"- **{t.name}**: {t.description}" for t in TOOLS.values()])
# ============================================================
# Model Interface
# ============================================================
class StackModel:
"""Stack 2.9 model interface using transformers."""
def __init__(self, model_id: str = "Qwen/Qwen2.5-Coder-7B-Instruct"):
self.model_id = model_id
self.model = None
self.tokenizer = None
self.pipeline = None
def load(self):
"""Load the model with 4-bit quantization for HF Spaces."""
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
print(f"Loading {self.model_id}...")
# 4-bit quantization config for 16GB GPU
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.float16,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
# Load tokenizer
self.tokenizer = AutoTokenizer.from_pretrained(
self.model_id,
trust_remote_code=True
)
# Load model with quantization
self.model = AutoModelForCausalLM.from_pretrained(
self.model_id,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
print("Model loaded successfully!")
def generate(self, prompt: str, max_tokens: int = 512, temperature: float = 0.7) -> str:
"""Generate a response."""
if not self.tokenizer:
return "Model not loaded. Please wait for initialization."
# Build the prompt with system and tools
system_prompt = f"""You are Stack 2.9 - a pattern-based AI coding assistant.
## Available Tools
{get_tool_descriptions()}
## Your Capabilities
- Write, read, and execute code
- Use git for version control
- Search the web for information
- Create and manage files
- Execute shell commands
## Self-Evolution
You learn from each interaction. After responding, summarize what tools you used.
{memory.get_context()}
## Instructions
1. Be helpful and concise
2. Use tools when needed
3. Learn from the conversation
4. Provide code examples when relevant
Now respond to the user:"""
full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:"
# Tokenize
inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device)
# Generate
outputs = self.model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
top_p=0.9,
repetition_penalty=1.1
)
# Decode
response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract just the response part
if "Assistant:" in response:
response = response.split("Assistant:")[-1].strip()
return response
def generate_streaming(self, prompt: str, max_tokens: int = 512):
"""Generate with streaming (yields tokens)."""
if not self.tokenizer:
yield "Model not loaded. Please wait for initialization."
return
system_prompt = f"""You are Stack 2.9 - a pattern-based AI coding assistant.
## Available Tools
{get_tool_descriptions()}
## Self-Evolution Memory
{memory.get_context()}
Now respond to the user:"""
full_prompt = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:"
inputs = self.tokenizer(full_prompt, return_tensors="pt").to(self.model.device)
# Generate token by token
from transformers import GenerationMixin
from typing import Iterator
generated_ids = inputs.input_ids
for _ in range(max_tokens):
with torch.no_grad():
outputs = self.model(generated_ids)
next_token_logits = outputs.logits[:, -1, :]
# Apply temperature
next_token_logits = next_token_logits / 0.7
# Sample
probs = torch.softmax(next_token_logits, dim=-1)
next_token = torch.multinomial(probs, num_samples=1)
generated_ids = torch.cat([generated_ids, next_token], dim=-1)
# Decode and yield
token_str = self.tokenizer.decode(next_token[0], skip_special_tokens=True)
yield token_str
# Stop on EOS
if next_token.item() == self.tokenizer.eos_token_id:
break
# Global model instance
model = None
def initialize_model():
"""Initialize the model on startup."""
global model
try:
model = StackModel()
model.load()
return model
except Exception as e:
print(f"Failed to load model: {e}")
return None
# ============================================================
# Gradio Interface
# ============================================================
def format_tools_used(tools_used: List[str]) -> str:
"""Format the tools used for display."""
if not tools_used:
return ""
return f"\n\nπŸ”§ **Tools Used**: {', '.join(tools_used)}"
def chat_response(message: str, history: List[List[str]]) -> tuple:
"""Process a chat message and return response."""
global model, memory
if model is None or model.model is None:
return "⏳ Model is loading. Please wait...", history + [[message, "⏳ Model is loading. Please wait..."]]
# Track tools used
tools_used = []
# Check if we need to use tools based on the message
message_lower = message.lower()
if any(kw in message_lower for kw in ['git status', 'git']):
tools_used.append("git_status")
if any(kw in message_lower for kw in ['search', 'find', 'look up']):
tools_used.append("web_search")
if any(kw in message_lower for kw in ['list files', 'directory', 'ls']):
tools_used.append("list_directory")
if any(kw in message_lower for kw in ['run ', 'execute', 'command']):
tools_used.append("run_command")
# Generate response
try:
response = model.generate(message, max_tokens=512)
except Exception as e:
response = f"I encountered an error: {str(e)}"
# Add tools used to response
response += format_tools_used(tools_used)
# Record in memory
memory.add_interaction(message, response, tools_used)
return response
def chat_response_stream(message: str, history: List[List[str]]) -> Generator:
"""Process a chat message with streaming."""
global model, memory
if model is None or model.model is None:
yield "⏳ Model is loading. Please wait..."
return
full_response = ""
tools_used = []
message_lower = message.lower()
if any(kw in message_lower for kw in ['git status', 'git']):
tools_used.append("git_status")
if any(kw in message_lower for kw in ['search', 'find']):
tools_used.append("web_search")
if any(kw in message_lower for kw in ['list', 'directory']):
tools_used.append("list_directory")
# Stream the response
for token in model.generate_streaming(message, max_tokens=256):
full_response += token
yield full_response
# Add tools used
if tools_used:
full_response += format_tools_used(tools_used)
yield full_response
# Record in memory
memory.add_interaction(message, full_response, tools_used)
# Example prompts for the UI
EXAMPLE_PROMPTS = [
"Hello! What can you help me with?",
"Check git status of this repository",
"Search for best practices for Python async programming",
"List the files in the current directory",
"Write a simple Python function to calculate fibonacci",
"How do I use Git to create a new branch?",
"What's your memory of our conversation?",
]
def create_gradio_app():
"""Create the Gradio interface."""
with gr.Blocks(
title="Stack 2.9 - Pattern-Based AI Coding Assistant",
theme=gr.themes.Soft(
primary_color="#6366f1",
secondary_color="#818cf8",
tertiary_color="#a5b4fc"
)
) as app:
# Header
gr.Markdown("""
# πŸš€ Stack 2.9 - Pattern-Based AI Coding Assistant
Powered by **Qwen2.5-Coder-7B** with 4-bit quantization
---
""")
# Memory stats display
with gr.Row():
with gr.Column(scale=1):
stats_display = gr.Markdown(
"πŸ“Š **Memory Stats**\n\n- Interactions: 0\n- Tools learned: 0\n- Code patterns: 0",
elem_id="stats"
)
with gr.Column(scale=3):
pass # Spacer
# Chat interface
chatbot = gr.Chatbot(
height=500,
show_copy_button=True,
bubble_full_width=False
)
with gr.Row():
msg = gr.Textbox(
label="Message",
placeholder="Ask me anything...",
scale=4,
lines=3
)
submit_btn = gr.Button("Send", variant="primary", scale=1)
# Clear button
with gr.Row():
clear_btn = gr.Button("πŸ—‘οΈ Clear Chat")
# Example prompts
gr.Examples(
examples=EXAMPLE_PROMPTS,
inputs=msg,
label="Example Prompts"
)
# Memory visualization
with gr.Accordion("🧠 Self-Evolution Memory", open=False):
memory_display = gr.Textbox(
label="Memory Content",
lines=10,
interactive=False
)
# Functions
def respond(message, history):
response = chat_response(message, history)
history.append([message, response])
return "", history
def update_stats():
stats = memory.get_stats()
return f"""πŸ“Š **Memory Stats**
- **Interactions**: {stats['total_interactions']}
- **Tool Patterns**: {stats['tool_patterns']}
- **Code Snippets**: {stats['code_snippets']}
**Recent Tools**: {', '.join(stats['recent_tools']) if stats['recent_tools'] else 'None'}"""
def update_memory():
return memory.get_context()
# Button click handlers
submit_btn.click(respond, [msg, chatbot], [msg, chatbot], api_name="send")
msg.submit(respond, [msg, chatbot], [msg, chatbot], api_name="send")
def clear_chat():
return [], ""
clear_btn.click(lambda: ([], ""), outputs=[chatbot, msg])
# Update stats periodically
chatbot.change(update_stats, outputs=[stats_display])
chatbot.change(update_memory, outputs=[memory_display])
# Footer
gr.Markdown("""
---
### About Stack 2.9
Stack 2.9 is a pattern-based AI coding assistant that:
- πŸ” Uses **Qwen2.5-Coder-7B** (4-bit, ~4GB VRAM)
- πŸ› οΈ Integrates **7 tools** (file, git, web, search, shell)
- 🧠 Remembers interactions and learns patterns
- ⚑ Provides fast, streaming responses
Deployed on **HuggingFace Spaces** with Gradio
""")
return app
# ============================================================
# Main Entry Point
# ============================================================
if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser(description="Stack 2.9 - HuggingFace Spaces Demo")
parser.add_argument("--share", action="store_true", help="Create a public share link")
parser.add_argument("--port", type=int, default=7860, help="Port to run on")
parser.add_argument("--model", type=str, default="Qwen/Qwen2.5-Coder-7B-Instruct", help="Model ID")
args = parser.parse_args()
print("=" * 50)
print("πŸš€ Stack 2.9 - Pattern-Based AI Coding Assistant")
print("=" * 50)
print(f"Model: {args.model}")
print("Loading model...")
# Initialize model in a thread
import threading
def load_model_thread():
global model
model = initialize_model()
loader_thread = threading.Thread(target=load_model_thread)
loader_thread.start()
# Create and launch app
app = create_gradio_app()
print(f"\nπŸš€ Launching Gradio on port {args.port}...")
print("πŸ“ Note: Model loads in background. Chat will work once loaded.\n")
app.launch(
server_name="0.0.0.0",
server_port=args.port,
share=args.share
)