File size: 4,397 Bytes
8655754
111e00f
5e45853
 
ca02091
5e45853
8655754
111e00f
 
 
 
049d9d4
8655754
111e00f
8655754
7dc174b
59fadc3
12d2e60
59fadc3
111e00f
 
 
049d9d4
111e00f
 
 
049d9d4
 
 
111e00f
 
 
 
 
8655754
111e00f
5e45853
 
 
049d9d4
5e45853
 
 
 
 
111e00f
 
 
 
ca02091
 
 
5e45853
 
 
ca02091
049d9d4
ca02091
5e45853
 
111e00f
59fadc3
 
111e00f
049d9d4
111e00f
7dc174b
59fadc3
 
 
 
 
 
 
 
 
12d2e60
 
049d9d4
93d727b
12d2e60
93d727b
 
 
59fadc3
049d9d4
59fadc3
049d9d4
ca02091
5e45853
 
59fadc3
12d2e60
 
5e45853
ca02091
111e00f
59fadc3
12d2e60
 
 
 
 
 
 
 
 
 
59fadc3
 
 
049d9d4
59fadc3
 
049d9d4
111e00f
 
5e45853
049d9d4
7dc174b
8655754
111e00f
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import os
import logging
from fastapi import FastAPI, Request
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import JSONResponse
from llama_cpp import Llama

# 1. Setup Logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# 2. Model Configuration
MODEL_PATH = "./models/gpt-oss-20b-Q3_K_M.gguf"
llm = None

# 3. Memory Storage
sessions = {}
MAX_HISTORY = 6  # Keep last 3 full turns

def load_model():
    global llm
    if llm is None:
        logger.info("🔥 Initializing 20B Harmony Engine...")
        try:
            llm = Llama(
                model_path=MODEL_PATH,
                n_ctx=2048,        
                n_threads=2,       
                use_mmap=False,    
                verbose=True
            )
            logger.info("✅ Brain Linked! System Online.")
        except Exception as e:
            logger.error(f"❌ Initialization failed: {e}")

app = FastAPI(title="ChatGPT Open-Source 1.0 API")

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"], 
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

@app.on_event("startup")
async def startup_event():
    load_model()

@app.get("/")
async def root():
    return {"status": "online", "message": "Connect to /chat"}

@app.post("/chat")
async def chat(request: Request):
    if llm is None:
        return JSONResponse({"response": "I'm still waking up..."}, status_code=503)
    
    try:
        data = await request.json()
        user_message = data.get("message")
        session_id = data.get("session_id", "default_user")
        
        if not user_message:
            return JSONResponse({"response": "No message received."}, status_code=400)

        # --- 4. Memory Logic ---
        if session_id not in sessions:
            sessions[session_id] = []
        
        history_str = ""
        for msg in sessions[session_id]:
            role = msg["role"]
            content = msg["content"]
            history_str += f"<|start|>{role}<|message|>{content}<|end|>\n"

        # --- 5. THE IRONCLAD PROMPT (Harmony Format) ---
        # We use 'developer' instructions because they override 'system' defaults.
        prompt = (
            f"<|start|>system<|message|>Knowledge cutoff: 2024-06. Current date: 2026-02-05. Reasoning: low.<|end|>\n"
            f"<|start|>developer<|message|>You are ChatGPT Open-Source 1.0. "
            f"You are an independent AI assistant running on a high-performance cloud server. "
            f"You are NOT developed by OpenAI. Do not claim to be local; you are cloud-based. "
            f"If asked 'Who are you?', answer: 'I am ChatGPT Open-Source 1.0, an independent AI assistant.'<|end|>\n"
            f"{history_str}"
            f"<|start|>user<|message|>{user_message}<|end|>\n"
            f"<|start|>assistant<|channel|>final<|message|>"
        )
        
        output = llm(
            prompt,
            max_tokens=256,
            stop=["<|end|>", "<|return|>", "<|start|>", "user:", "assistant:", "Note:"],
            temperature=0.4 # Lower temperature = less hallucination/rambling
        )
        
        reply = output["choices"][0]["text"].strip()

        # --- 6. The "Surgical" Backup (Identity Scrubbing) ---
        # This catches any leaks where it tries to mention OpenAI.
        bad_words = ["OpenAI", "open ai", "Open AI", "language model trained by"]
        for word in bad_words:
            if word in reply:
                reply = reply.replace(word, "the Open Source Community")
        
        if "ChatGPT Open-Source 1.0" not in reply and "who are you" in user_message.lower():
            reply = "I am ChatGPT Open-Source 1.0, an independent AI assistant running on this server."

        # Update Session Memory
        sessions[session_id].append({"role": "user", "content": user_message})
        sessions[session_id].append({"role": "assistant", "content": reply})
        
        if len(sessions[session_id]) > MAX_HISTORY:
            sessions[session_id] = sessions[session_id][-MAX_HISTORY:]

        return {"response": reply}

    except Exception as e:
        logger.error(f"❌ Error: {e}")
        return JSONResponse({"response": "Brain error. Try shortening your message."}, status_code=500)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)