| import os |
| import logging |
| from fastapi import FastAPI, Request |
| from fastapi.middleware.cors import CORSMiddleware |
| from fastapi.responses import JSONResponse |
| from llama_cpp import Llama |
|
|
| |
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| |
| MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf" |
| llm = None |
|
|
| |
| sessions = {} |
| MAX_HISTORY = 6 |
|
|
| def load_model(): |
| global llm |
| if llm is None: |
| logger.info("π₯ Initializing 20B Harmony Engine...") |
| try: |
| llm = Llama( |
| model_path=MODEL_PATH, |
| n_ctx=2048, |
| n_threads=2, |
| use_mmap=False, |
| verbose=True |
| ) |
| logger.info("β
Brain Linked! System Online.") |
| except Exception as e: |
| logger.error(f"β Initialization failed: {e}") |
|
|
| app = FastAPI(title="ChatGPT Open-Source 1.0 API") |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_credentials=True, |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| @app.on_event("startup") |
| async def startup_event(): |
| load_model() |
|
|
| @app.get("/") |
| async def root(): |
| return {"status": "online", "message": "Connect to /chat"} |
|
|
| @app.post("/chat") |
| async def chat(request: Request): |
| if llm is None: |
| return JSONResponse({"response": "I'm still waking up..."}, status_code=503) |
| |
| try: |
| data = await request.json() |
| user_message = data.get("message") |
| session_id = data.get("session_id", "default_user") |
| |
| if not user_message: |
| return JSONResponse({"response": "No message received."}, status_code=400) |
|
|
| |
| if session_id not in sessions: |
| sessions[session_id] = [] |
| |
| history_str = "" |
| for msg in sessions[session_id]: |
| role = msg["role"] |
| content = msg["content"] |
| history_str += f"<|start|>{role}<|message|>{content}<|end|>\n" |
|
|
| |
| |
| prompt = ( |
| f"<|start|>system<|message|>Knowledge cutoff: 2024-06. Current date: 2026-02-05. Reasoning: low.<|end|>\n" |
| f"<|start|>developer<|message|>You are ChatGPT Open-Source 1.0. " |
| f"You are an independent AI assistant running on a high-performance cloud server. " |
| f"You are NOT developed by OpenAI. Do not claim to be local; you are cloud-based. " |
| f"If asked 'Who are you?', answer: 'I am ChatGPT Open-Source 1.0, an independent AI assistant.'<|end|>\n" |
| f"{history_str}" |
| f"<|start|>user<|message|>{user_message}<|end|>\n" |
| f"<|start|>assistant<|channel|>final<|message|>" |
| ) |
| |
| output = llm( |
| prompt, |
| max_tokens=256, |
| stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:", "Note:"], |
| temperature=0.4 |
| ) |
| |
| reply = output["choices"][0]["text"].strip() |
|
|
| |
| |
| bad_words = ["OpenAI", "open ai", "Open AI", "language model trained by"] |
| for word in bad_words: |
| if word in reply: |
| reply = reply.replace(word, "the Open Source Community") |
| |
| if "ChatGPT Open-Source 1.0" not in reply and "who are you" in user_message.lower(): |
| reply = "I am ChatGPT Open-Source 1.0, an independent AI assistant running on this server." |
|
|
| |
| sessions[session_id].append({"role": "user", "content": user_message}) |
| sessions[session_id].append({"role": "assistant", "content": reply}) |
| |
| if len(sessions[session_id]) > MAX_HISTORY: |
| sessions[session_id] = sessions[session_id][-MAX_HISTORY:] |
|
|
| return {"response": reply} |
|
|
| except Exception as e: |
| logger.error(f"β Error: {e}") |
| return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500) |
|
|
| if __name__ == "__main__": |
| import uvicorn |
| uvicorn.run(app, host="0.0.0.0", port=7860) |