File size: 3,397 Bytes

7a8afa9
40b1cc9
7a8afa9
0fbc572
7a8afa9
2aa22b3
 
 
 
40b1cc9
 
2aa22b3
3c29912
217c8d8
 
7a8afa9
217c8d8
7a8afa9
217c8d8
7a8afa9
217c8d8
7a8afa9
 
 
2aa22b3
 
7a8afa9
 
 
40b1cc9
 
 
 
0fbc572
40b1cc9
35799ef
0fbc572
 
 
 
 
35799ef
 
 
 
40b1cc9
 
7a8afa9
 
 
 
 
 
 
 
 
 
40b1cc9
 
 
 
 
 
35799ef
 
 
40b1cc9
 
 
 
2aa22b3
 
 
7a8afa9
 
 
 
 
 
 
 
 
 
2aa22b3
7a8afa9
2aa22b3
 
 
 
 
 
7a8afa9
 
2aa22b3
7a8afa9

import torch
import requests
from transformers import AutoModelForCausalLM, AutoTokenizer
from ddgs import DDGS

SYSTEM_PROMPT = """You are Stack 2.9, an expert AI coding assistant.
- Answer questions naturally and helpfully
- When the user asks for code, write clean complete code
- When the user asks a question, answer in plain language
- Be concise and practical
- If asked to search the internet, use the search: command"""

MODEL_NAME = "/Users/walidsobhi/stack-2-9-final-model"

print(f"Loading {MODEL_NAME} from HuggingFace...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    torch_dtype=torch.float16,
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
print("✅ Ready!\n")

# Generation settings
MAX_TOKENS = 200
TEMPERATURE = 0.4
TOP_P = 0.9
REP_PENALTY = 1.2

print(f"Settings: max_tokens={MAX_TOKENS}, temperature={TEMPERATURE}, top_p={TOP_P}")
print("Commands: search:<query> - search the web, quit/exit - stop\n")

def web_search(query, count=5):
    """Search the web using DuckDuckGo (no API key needed)"""
    try:
        results = []
        with DDGS() as ddgs:
            for r in ddgs.text(query, max_results=count):
                results.append(f"{r['body'][:200]}")
                if len(results) >= count:
                    break
        
        if results:
            return {"success": True, "results": results, "query": query}
        return {"success": False, "error": "No results found"}
    except Exception as e:
        return {"success": False, "error": str(e)}

# Interactive loop
while True:
    try:
        prompt = input("You: ")
        if prompt.lower() in ['quit', 'exit', 'q']:
            break
        if not prompt.strip():
            continue

        # Handle search command
        if prompt.lower().startswith("search:"):
            query = prompt[7:].strip()
            print("🔍 Searching...")
            result = web_search(query)
            if result["success"]:
                print(f"✅ Results for '{result['query']}':\n")
                for i, r in enumerate(result["results"], 1):
                    print(f"  {i}. {r}")
            else:
                print(f"❌ Search failed: {result['error']}")
            continue

        # Prepend system prompt
        full_prompt = f"{SYSTEM_PROMPT}\n\nUser: {prompt}\nAssistant:"
        inputs = tokenizer(full_prompt, return_tensors='pt').to(model.device)
        outputs = model.generate(
            **inputs,
            max_new_tokens=MAX_TOKENS,
            temperature=TEMPERATURE,
            top_p=TOP_P,
            repetition_penalty=REP_PENALTY,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )

        # Decode full response
        full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extract only the assistant's response (after "Assistant:")
        if "Assistant:" in full_response:
            response = full_response.split("Assistant:")[-1].strip()
        else:
            response = full_response[len(full_prompt):].strip()

        # Stop at common stop points
        for stop in ['\n\n\n', 'User:', 'You:']:
            if stop in response:
                response = response.split(stop)[0].strip()

        print(f"AI: {response}\n")

    except KeyboardInterrupt:
        print("\nExiting...")
        break

print("Goodbye!")