import os import logging from fastapi import FastAPI, Request from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse from llama_cpp import Llama # 1. Setup Logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # 2. Model Configuration MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf" llm = None # 3. Memory Storage sessions = {} MAX_HISTORY = 6 # Keep last 3 full turns def load_model(): global llm if llm is None: logger.info("🔥 Initializing 20B Harmony Engine...") try: llm = Llama( model_path=MODEL_PATH, n_ctx=2048, n_threads=2, use_mmap=False, verbose=True ) logger.info("✅ Brain Linked! System Online.") except Exception as e: logger.error(f"❌ Initialization failed: {e}") app = FastAPI(title="ChatGPT Open-Source 1.0 API") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.on_event("startup") async def startup_event(): load_model() @app.get("/") async def root(): return {"status": "online", "message": "Connect to /chat"} @app.post("/chat") async def chat(request: Request): if llm is None: return JSONResponse({"response": "I'm still waking up..."}, status_code=503) try: data = await request.json() user_message = data.get("message") session_id = data.get("session_id", "default_user") if not user_message: return JSONResponse({"response": "No message received."}, status_code=400) # --- 4. Memory Logic --- if session_id not in sessions: sessions[session_id] = [] history_str = "" for msg in sessions[session_id]: role = msg["role"] content = msg["content"] history_str += f"<|start|>{role}<|message|>{content}<|end|>\n" # --- 5. THE IRONCLAD PROMPT (Harmony Format) --- # We use 'developer' instructions because they override 'system' defaults. prompt = ( f"<|start|>system<|message|>Knowledge cutoff: 2024-06. Current date: 2026-02-05. Reasoning: low.<|end|>\n" f"<|start|>developer<|message|>You are ChatGPT Open-Source 1.0. " f"You are an independent AI assistant running on a high-performance cloud server. " f"You are NOT developed by OpenAI. Do not claim to be local; you are cloud-based. " f"If asked 'Who are you?', answer: 'I am ChatGPT Open-Source 1.0, an independent AI assistant.'<|end|>\n" f"{history_str}" f"<|start|>user<|message|>{user_message}<|end|>\n" f"<|start|>assistant<|channel|>final<|message|>" ) output = llm( prompt, max_tokens=256, stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:", "Note:"], temperature=0.4 # Lower temperature = less hallucination/rambling ) reply = output["choices"][0]["text"].strip() # --- 6. The "Surgical" Backup (Identity Scrubbing) --- # This catches any leaks where it tries to mention OpenAI. bad_words = ["OpenAI", "open ai", "Open AI", "language model trained by"] for word in bad_words: if word in reply: reply = reply.replace(word, "the Open Source Community") if "ChatGPT Open-Source 1.0" not in reply and "who are you" in user_message.lower(): reply = "I am ChatGPT Open-Source 1.0, an independent AI assistant running on this server." # Update Session Memory sessions[session_id].append({"role": "user", "content": user_message}) sessions[session_id].append({"role": "assistant", "content": reply}) if len(sessions[session_id]) > MAX_HISTORY: sessions[session_id] = sessions[session_id][-MAX_HISTORY:] return {"response": reply} except Exception as e: logger.error(f"❌ Error: {e}") return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500) if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)