| import torch |
| import requests |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
| from ddgs import DDGS |
|
|
| SYSTEM_PROMPT = """You are Stack 2.9, an expert AI coding assistant. |
| - Answer questions naturally and helpfully |
| - When the user asks for code, write clean complete code |
| - When the user asks a question, answer in plain language |
| - Be concise and practical |
| - If asked to search the internet, use the search: command""" |
|
|
| MODEL_NAME = "/Users/walidsobhi/stack-2-9-final-model" |
|
|
| print(f"Loading {MODEL_NAME} from HuggingFace...") |
| model = AutoModelForCausalLM.from_pretrained( |
| MODEL_NAME, |
| torch_dtype=torch.float16, |
| device_map="auto" |
| ) |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) |
| print("β
Ready!\n") |
|
|
| |
| MAX_TOKENS = 200 |
| TEMPERATURE = 0.4 |
| TOP_P = 0.9 |
| REP_PENALTY = 1.2 |
|
|
| print(f"Settings: max_tokens={MAX_TOKENS}, temperature={TEMPERATURE}, top_p={TOP_P}") |
| print("Commands: search:<query> - search the web, quit/exit - stop\n") |
|
|
| def web_search(query, count=5): |
| """Search the web using DuckDuckGo (no API key needed)""" |
| try: |
| results = [] |
| with DDGS() as ddgs: |
| for r in ddgs.text(query, max_results=count): |
| results.append(f"{r['body'][:200]}") |
| if len(results) >= count: |
| break |
| |
| if results: |
| return {"success": True, "results": results, "query": query} |
| return {"success": False, "error": "No results found"} |
| except Exception as e: |
| return {"success": False, "error": str(e)} |
|
|
| |
| while True: |
| try: |
| prompt = input("You: ") |
| if prompt.lower() in ['quit', 'exit', 'q']: |
| break |
| if not prompt.strip(): |
| continue |
|
|
| |
| if prompt.lower().startswith("search:"): |
| query = prompt[7:].strip() |
| print("π Searching...") |
| result = web_search(query) |
| if result["success"]: |
| print(f"β
Results for '{result['query']}':\n") |
| for i, r in enumerate(result["results"], 1): |
| print(f" {i}. {r}") |
| else: |
| print(f"β Search failed: {result['error']}") |
| continue |
|
|
| |
| full_prompt = f"{SYSTEM_PROMPT}\n\nUser: {prompt}\nAssistant:" |
| inputs = tokenizer(full_prompt, return_tensors='pt').to(model.device) |
| outputs = model.generate( |
| **inputs, |
| max_new_tokens=MAX_TOKENS, |
| temperature=TEMPERATURE, |
| top_p=TOP_P, |
| repetition_penalty=REP_PENALTY, |
| do_sample=True, |
| pad_token_id=tokenizer.eos_token_id |
| ) |
|
|
| |
| full_response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
|
| |
| if "Assistant:" in full_response: |
| response = full_response.split("Assistant:")[-1].strip() |
| else: |
| response = full_response[len(full_prompt):].strip() |
|
|
| |
| for stop in ['\n\n\n', 'User:', 'You:']: |
| if stop in response: |
| response = response.split(stop)[0].strip() |
|
|
| print(f"AI: {response}\n") |
|
|
| except KeyboardInterrupt: |
| print("\nExiting...") |
| break |
|
|
| print("Goodbye!") |
|
|