CooLLaMACEO's picture
Update app.py
93d727b verified
import os
import logging
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from llama_cpp import Llama
# 1. Setup Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 2. Model Configuration
MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
llm = None
# 3. Memory Storage
sessions = {}
MAX_HISTORY = 6 # Keep last 3 full turns
def load_model():
global llm
if llm is None:
logger.info("πŸ”₯ Initializing 20B Harmony Engine...")
try:
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=2,
use_mmap=False,
verbose=True
)
logger.info("βœ… Brain Linked! System Online.")
except Exception as e:
logger.error(f"❌ Initialization failed: {e}")
app = FastAPI(title="ChatGPT Open-Source 1.0 API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
async def startup_event():
load_model()
@app.get("/")
async def root():
return {"status": "online", "message": "Connect to /chat"}
@app.post("/chat")
async def chat(request: Request):
if llm is None:
return JSONResponse({"response": "I'm still waking up..."}, status_code=503)
try:
data = await request.json()
user_message = data.get("message")
session_id = data.get("session_id", "default_user")
if not user_message:
return JSONResponse({"response": "No message received."}, status_code=400)
# --- 4. Memory Logic ---
if session_id not in sessions:
sessions[session_id] = []
history_str = ""
for msg in sessions[session_id]:
role = msg["role"]
content = msg["content"]
history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
# --- 5. THE IRONCLAD PROMPT (Harmony Format) ---
# We use 'developer' instructions because they override 'system' defaults.
prompt = (
f"<|start|>system<|message|>Knowledge cutoff: 2024-06. Current date: 2026-02-05. Reasoning: low.<|end|>\n"
f"<|start|>developer<|message|>You are ChatGPT Open-Source 1.0. "
f"You are an independent AI assistant running on a high-performance cloud server. "
f"You are NOT developed by OpenAI. Do not claim to be local; you are cloud-based. "
f"If asked 'Who are you?', answer: 'I am ChatGPT Open-Source 1.0, an independent AI assistant.'<|end|>\n"
f"{history_str}"
f"<|start|>user<|message|>{user_message}<|end|>\n"
f"<|start|>assistant<|channel|>final<|message|>"
)
output = llm(
prompt,
max_tokens=256,
stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:", "Note:"],
temperature=0.4 # Lower temperature = less hallucination/rambling
)
reply = output["choices"][0]["text"].strip()
# --- 6. The "Surgical" Backup (Identity Scrubbing) ---
# This catches any leaks where it tries to mention OpenAI.
bad_words = ["OpenAI", "open ai", "Open AI", "language model trained by"]
for word in bad_words:
if word in reply:
reply = reply.replace(word, "the Open Source Community")
if "ChatGPT Open-Source 1.0" not in reply and "who are you" in user_message.lower():
reply = "I am ChatGPT Open-Source 1.0, an independent AI assistant running on this server."
# Update Session Memory
sessions[session_id].append({"role": "user", "content": user_message})
sessions[session_id].append({"role": "assistant", "content": reply})
if len(sessions[session_id]) > MAX_HISTORY:
sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
return {"response": reply}
except Exception as e:
logger.error(f"❌ Error: {e}")
return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)