File size: 4,397 Bytes
8655754 111e00f 5e45853 ca02091 5e45853 8655754 111e00f 049d9d4 8655754 111e00f 8655754 7dc174b 59fadc3 12d2e60 59fadc3 111e00f 049d9d4 111e00f 049d9d4 111e00f 8655754 111e00f 5e45853 049d9d4 5e45853 111e00f ca02091 5e45853 ca02091 049d9d4 ca02091 5e45853 111e00f 59fadc3 111e00f 049d9d4 111e00f 7dc174b 59fadc3 12d2e60 049d9d4 93d727b 12d2e60 93d727b 59fadc3 049d9d4 59fadc3 049d9d4 ca02091 5e45853 59fadc3 12d2e60 5e45853 ca02091 111e00f 59fadc3 12d2e60 59fadc3 049d9d4 59fadc3 049d9d4 111e00f 5e45853 049d9d4 7dc174b 8655754 111e00f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | import os
import logging
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from llama_cpp import Llama
# 1. Setup Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# 2. Model Configuration
MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
llm = None
# 3. Memory Storage
sessions = {}
MAX_HISTORY = 6 # Keep last 3 full turns
def load_model():
global llm
if llm is None:
logger.info("🔥 Initializing 20B Harmony Engine...")
try:
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=2,
use_mmap=False,
verbose=True
)
logger.info("✅ Brain Linked! System Online.")
except Exception as e:
logger.error(f"❌ Initialization failed: {e}")
app = FastAPI(title="ChatGPT Open-Source 1.0 API")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.on_event("startup")
async def startup_event():
load_model()
@app.get("/")
async def root():
return {"status": "online", "message": "Connect to /chat"}
@app.post("/chat")
async def chat(request: Request):
if llm is None:
return JSONResponse({"response": "I'm still waking up..."}, status_code=503)
try:
data = await request.json()
user_message = data.get("message")
session_id = data.get("session_id", "default_user")
if not user_message:
return JSONResponse({"response": "No message received."}, status_code=400)
# --- 4. Memory Logic ---
if session_id not in sessions:
sessions[session_id] = []
history_str = ""
for msg in sessions[session_id]:
role = msg["role"]
content = msg["content"]
history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"
# --- 5. THE IRONCLAD PROMPT (Harmony Format) ---
# We use 'developer' instructions because they override 'system' defaults.
prompt = (
f"<|start|>system<|message|>Knowledge cutoff: 2024-06. Current date: 2026-02-05. Reasoning: low.<|end|>\n"
f"<|start|>developer<|message|>You are ChatGPT Open-Source 1.0. "
f"You are an independent AI assistant running on a high-performance cloud server. "
f"You are NOT developed by OpenAI. Do not claim to be local; you are cloud-based. "
f"If asked 'Who are you?', answer: 'I am ChatGPT Open-Source 1.0, an independent AI assistant.'<|end|>\n"
f"{history_str}"
f"<|start|>user<|message|>{user_message}<|end|>\n"
f"<|start|>assistant<|channel|>final<|message|>"
)
output = llm(
prompt,
max_tokens=256,
stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:", "Note:"],
temperature=0.4 # Lower temperature = less hallucination/rambling
)
reply = output["choices"][0]["text"].strip()
# --- 6. The "Surgical" Backup (Identity Scrubbing) ---
# This catches any leaks where it tries to mention OpenAI.
bad_words = ["OpenAI", "open ai", "Open AI", "language model trained by"]
for word in bad_words:
if word in reply:
reply = reply.replace(word, "the Open Source Community")
if "ChatGPT Open-Source 1.0" not in reply and "who are you" in user_message.lower():
reply = "I am ChatGPT Open-Source 1.0, an independent AI assistant running on this server."
# Update Session Memory
sessions[session_id].append({"role": "user", "content": user_message})
sessions[session_id].append({"role": "assistant", "content": reply})
if len(sessions[session_id]) > MAX_HISTORY:
sessions[session_id] = sessions[session_id][-MAX_HISTORY:]
return {"response": reply}
except Exception as e:
logger.error(f"❌ Error: {e}")
return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |