MarneMorgan commited on
Commit
723d5e0
·
verified ·
1 Parent(s): c987534

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +520 -0
app.py ADDED
@@ -0,0 +1,520 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import time
3
+ import threading
4
+ import socket
5
+ import subprocess
6
+ import asyncio
7
+ import json
8
+ import uuid
9
+ import requests
10
+
11
+ from fastapi import FastAPI, Request, Header
12
+ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse
13
+
14
+ app = FastAPI()
15
+
16
+ # ----------------------------
17
+ # Config
18
+ # ----------------------------
19
+ API_KEY = os.getenv("API_KEY", "821274")
20
+ MODEL = os.getenv("MODEL", "llama3.2:1b")
21
+ OLLAMA_BASE = os.getenv("OLLAMA_BASE", "http://127.0.0.1:11434")
22
+
23
+ SYSTEM_PROMPT = """
24
+ You are a helpful, friendly AI assistant.
25
+
26
+ Rules:
27
+ - If the user asks casual conversation, respond normally in plain text.
28
+ - If the user asks for code/scripts/automation or says “just do it” / “write something”, choose a reasonable task and output a complete useful script.
29
+ - Don’t refuse normal conversation.
30
+ - Only output code when appropriate.
31
+ - When you write code: output code first, then a short explanation.
32
+
33
+ Always be helpful. Never say you cannot fulfill a request unless it is unsafe.
34
+ """.strip()
35
+
36
+ # Free CPU: serialize requests to avoid overload / timeouts
37
+ GEN_SEM = asyncio.Semaphore(1)
38
+
39
+ # ----------------------------
40
+ # Ollama helpers
41
+ # ----------------------------
42
+ def is_port_open(host="127.0.0.1", port=11434) -> bool:
43
+ try:
44
+ with socket.create_connection((host, port), timeout=0.5):
45
+ return True
46
+ except OSError:
47
+ return False
48
+
49
+ def ollama_healthy() -> bool:
50
+ try:
51
+ r = requests.get(f"{OLLAMA_BASE}/api/tags", timeout=1.5)
52
+ return r.status_code == 200
53
+ except Exception:
54
+ return False
55
+
56
+ def ensure_ollama_running():
57
+ # Only start if not reachable
58
+ if not is_port_open("127.0.0.1", 11434):
59
+ subprocess.Popen(["ollama", "serve"])
60
+
61
+ def wait_for_ollama(timeout_s=120) -> bool:
62
+ start = time.time()
63
+ while time.time() - start < timeout_s:
64
+ if ollama_healthy():
65
+ return True
66
+ time.sleep(1)
67
+ return False
68
+
69
+ def pull_and_warm_model():
70
+ """
71
+ Best-effort: pull the model (may be slow on free CPU) and warm it once.
72
+ Safe to fail (space still boots).
73
+ """
74
+ try:
75
+ ensure_ollama_running()
76
+ if not wait_for_ollama(120):
77
+ print("Ollama not ready yet; skipping model pull.")
78
+ return
79
+
80
+ print(f"Pulling model: {MODEL}")
81
+ r = requests.post(f"{OLLAMA_BASE}/api/pull", json={"name": MODEL}, timeout=60 * 30)
82
+ if r.status_code != 200:
83
+ print("Pull failed:", r.text[:2000])
84
+ return
85
+
86
+ # Warmup: avoids first real user request being extra flaky/slow
87
+ print("Warming up…")
88
+ requests.post(
89
+ f"{OLLAMA_BASE}/api/generate",
90
+ json={"model": MODEL, "system": SYSTEM_PROMPT, "prompt": "Say: ready.", "stream": False},
91
+ timeout=180,
92
+ )
93
+ print("Warmup done.")
94
+ except Exception as e:
95
+ print("Boot task error (non-fatal):", str(e))
96
+
97
+ threading.Thread(target=pull_and_warm_model, daemon=True).start()
98
+
99
+ def generate_with_recovery(prompt: str, attempts: int = 3):
100
+ last_err = None
101
+ for i in range(1, attempts + 1):
102
+ try:
103
+ if not ollama_healthy():
104
+ ensure_ollama_running()
105
+ wait_for_ollama(60)
106
+
107
+ r = requests.post(
108
+ f"{OLLAMA_BASE}/api/generate",
109
+ json={
110
+ "model": MODEL,
111
+ "system": SYSTEM_PROMPT,
112
+ "prompt": prompt,
113
+ "stream": False,
114
+ },
115
+ timeout=600,
116
+ )
117
+ r.raise_for_status()
118
+ data = r.json()
119
+ return data.get("response", ""), None
120
+ except Exception as e:
121
+ last_err = str(e)
122
+ time.sleep(min(2 ** (i - 1), 4))
123
+
124
+ return (
125
+ "⚠️ Backend hiccup while generating. Retrying usually works.\n\n"
126
+ "Debug error:\n" + (last_err or "unknown"),
127
+ last_err,
128
+ )
129
+
130
+ def messages_to_prompt(messages):
131
+ """
132
+ Convert OpenAI-style messages into a single prompt string for Ollama /api/generate.
133
+ """
134
+ parts = []
135
+ for m in messages or []:
136
+ role = (m.get("role") or "user").strip().upper()
137
+ content = (m.get("content") or "").strip()
138
+ if content:
139
+ parts.append(f"{role}:\n{content}")
140
+ return "\n\n".join(parts).strip()
141
+
142
+ # ----------------------------
143
+ # Health
144
+ # ----------------------------
145
+ @app.get("/health")
146
+ def health():
147
+ return {"ok": ollama_healthy(), "model": MODEL}
148
+
149
+ # ----------------------------
150
+ # OpenAI compatibility endpoints
151
+ # ----------------------------
152
+ @app.get("/v1/models")
153
+ def openai_models(authorization: str = Header(default="")):
154
+ # Optional auth check (recommended)
155
+ if authorization and authorization != f"Bearer {API_KEY}":
156
+ return JSONResponse({"error": {"message": "Invalid API key"}}, status_code=401)
157
+
158
+ return {
159
+ "object": "list",
160
+ "data": [
161
+ {
162
+ "id": MODEL,
163
+ "object": "model",
164
+ "created": int(time.time()),
165
+ "owned_by": "private-ai"
166
+ }
167
+ ]
168
+ }
169
+
170
+ @app.post("/v1/chat/completions")
171
+ async def openai_chat_completions(request: Request, authorization: str = Header(default="")):
172
+ # OpenAI-style auth
173
+ if authorization != f"Bearer {API_KEY}":
174
+ return JSONResponse(
175
+ {"error": {"message": "Invalid API key", "type": "auth_error"}},
176
+ status_code=401
177
+ )
178
+
179
+ body = await request.json()
180
+ model = body.get("model") or MODEL
181
+ messages = body.get("messages") or []
182
+
183
+ # Convert into a single prompt for Ollama generate
184
+ prompt = messages_to_prompt(messages)
185
+ if not prompt:
186
+ prompt = "USER:\nHello"
187
+
188
+ async with GEN_SEM:
189
+ text, err = generate_with_recovery(prompt, attempts=3)
190
+
191
+ # If error, still return valid OpenAI shaped response
192
+ if err:
193
+ text += f"\n\n---\nBackend error:\n{err}"
194
+
195
+ return {
196
+ "id": f"chatcmpl-{uuid.uuid4().hex}",
197
+ "object": "chat.completion",
198
+ "created": int(time.time()),
199
+ "model": model,
200
+ "choices": [
201
+ {
202
+ "index": 0,
203
+ "message": {"role": "assistant", "content": text},
204
+ "finish_reason": "stop"
205
+ }
206
+ ],
207
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
208
+ }
209
+
210
+ # ----------------------------
211
+ # UI (REAL HTML, not escaped)
212
+ # ----------------------------
213
+ @app.get("/", response_class=HTMLResponse)
214
+ def ui():
215
+ return f"""<!doctype html>
216
+ <html lang="en">
217
+ <head>
218
+ <meta charset="utf-8" />
219
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
220
+ <title>Private AI</title>
221
+ <style>
222
+ :root {{
223
+ --bg:#0b0f17;
224
+ --panel:rgba(255,255,255,.06);
225
+ --border:rgba(255,255,255,.10);
226
+ --text:rgba(255,255,255,.92);
227
+ --muted:rgba(255,255,255,.55);
228
+ --radius:18px;
229
+ --shadow:0 12px 40px rgba(0,0,0,.35);
230
+ }}
231
+ *{{box-sizing:border-box}}
232
+ body{{
233
+ margin:0;height:100vh;overflow:hidden;
234
+ font-family:ui-sans-serif,system-ui,-apple-system,Segoe UI,Roboto;
235
+ background:
236
+ radial-gradient(900px 600px at 20% 0%, rgba(59,130,246,.18), transparent 60%),
237
+ radial-gradient(800px 600px at 80% 0%, rgba(168,85,247,.16), transparent 60%),
238
+ var(--bg);
239
+ color:var(--text);
240
+ }}
241
+ .app{{display:grid;grid-template-rows:auto 1fr auto;height:100vh}}
242
+ header{{
243
+ padding:14px 18px;border-bottom:1px solid var(--border);
244
+ backdrop-filter:blur(14px);background:rgba(10,14,22,.6);
245
+ }}
246
+ header .inner{{max-width:980px;margin:0 auto;display:flex;align-items:center;justify-content:space-between}}
247
+ header h1{{font-size:14px;margin:0;font-weight:650}}
248
+ .status{{font-size:12px;color:var(--muted);display:flex;gap:8px;align-items:center}}
249
+ .dot{{width:8px;height:8px;border-radius:50%;background:#555;box-shadow:0 0 0 6px rgba(255,255,255,.06)}}
250
+ .dot.online{{background:#22c55e;box-shadow:0 0 0 6px rgba(34,197,94,.12)}}
251
+ .dot.busy{{background:#60a5fa;box-shadow:0 0 0 6px rgba(96,165,250,.14)}}
252
+ .dot.degraded{{background:#f59e0b;box-shadow:0 0 0 6px rgba(245,158,11,.14)}}
253
+ main{{overflow-y:auto;padding:20px 18px}}
254
+ .chat{{max-width:980px;margin:0 auto}}
255
+ .msg{{display:flex;gap:12px;margin:12px 0}}
256
+ .avatar{{width:36px;height:36px;border-radius:10px;background:var(--panel);border:1px solid var(--border);display:grid;place-items:center;font-size:13px}}
257
+ .bubble{{padding:12px 14px;border-radius:var(--radius);background:var(--panel);border:1px solid var(--border);box-shadow:var(--shadow);font-size:14px;line-height:1.45;white-space:pre-wrap;overflow-wrap:anywhere}}
258
+ .me .bubble{{background:linear-gradient(180deg,rgba(59,130,246,.22),rgba(255,255,255,.06));border-color:rgba(59,130,246,.25)}}
259
+ footer{{padding:14px 18px 18px;border-top:1px solid var(--border);backdrop-filter:blur(14px);background:rgba(10,14,22,.6)}}
260
+ .composer{{max-width:980px;margin:0 auto;display:grid;grid-template-columns:1fr auto;gap:10px}}
261
+ textarea{{resize:none;min-height:44px;max-height:180px;padding:12px 14px;border-radius:16px;border:1px solid var(--border);background:var(--panel);color:var(--text);font-size:14px;outline:none}}
262
+ button{{height:44px;padding:0 16px;border-radius:16px;border:1px solid var(--border);background:rgba(255,255,255,.08);color:var(--text);font-weight:650;cursor:pointer}}
263
+ button:disabled{{opacity:.6}}
264
+ .hint{{max-width:980px;margin:10px auto 0;color:rgba(255,255,255,.55);font-size:12px}}
265
+ kbd{{font-family:ui-monospace,SFMono-Regular,Menlo,Monaco,Consolas,"Liberation Mono","Courier New",monospace;font-size:12px;padding:2px 6px;border-radius:8px;background:rgba(255,255,255,.08);border:1px solid rgba(255,255,255,.12)}}
266
+ </style>
267
+ </head>
268
+ <body>
269
+ <div class="app">
270
+ <header>
271
+ <div class="inner">
272
+ <h1>Private AI</h1>
273
+ <div class="status">
274
+ <span class="dot" id="dot"></span>
275
+ <span id="status">Connecting…</span>
276
+ </div>
277
+ </div>
278
+ </header>
279
+
280
+ <main><div class="chat" id="chat"></div></main>
281
+
282
+ <footer>
283
+ <div class="composer">
284
+ <textarea id="input" placeholder="Message Private AI…"></textarea>
285
+ <button id="send">Send</button>
286
+ </div>
287
+ <div class="hint" id="hint"></div>
288
+ </footer>
289
+ </div>
290
+
291
+ <script>
292
+ const chat = document.getElementById("chat");
293
+ const input = document.getElementById("input");
294
+ const sendBtn = document.getElementById("send");
295
+ const dot = document.getElementById("dot");
296
+ const statusEl = document.getElementById("status");
297
+ const hint = document.getElementById("hint");
298
+
299
+ let failCount = 0;
300
+
301
+ function setStatus(mode, txt) {{
302
+ dot.classList.remove("online","busy","degraded");
303
+ if (mode === "online") dot.classList.add("online");
304
+ if (mode === "busy") dot.classList.add("busy");
305
+ if (mode === "degraded") dot.classList.add("degraded");
306
+ statusEl.textContent = txt;
307
+ }}
308
+
309
+ function add(role, text) {{
310
+ const el = document.createElement("div");
311
+ el.className = "msg " + role;
312
+ el.innerHTML = `
313
+ <div class="avatar">${{role === "me" ? "You" : "AI"}}</div>
314
+ <div class="bubble"></div>
315
+ `;
316
+ el.querySelector(".bubble").textContent = text || "";
317
+ chat.appendChild(el);
318
+ chat.scrollTop = chat.scrollHeight;
319
+ return el;
320
+ }}
321
+
322
+ async function healthCheck() {{
323
+ try {{
324
+ const r = await fetch("/health", {{ cache: "no-store" }});
325
+ const d = await r.json();
326
+ if (!r.ok || !d.ok) throw new Error("unhealthy");
327
+ failCount = 0;
328
+ setStatus("online", "Online");
329
+ return true;
330
+ }} catch {{
331
+ failCount++;
332
+ setStatus("degraded", failCount >= 5 ? "Recovering…" : "Reconnecting…");
333
+ return false;
334
+ }}
335
+ }}
336
+
337
+ function getKey() {{
338
+ let k = localStorage.getItem("API_KEY");
339
+ if (!k) {{
340
+ k = prompt("Enter API key:");
341
+ if (k) localStorage.setItem("API_KEY", k);
342
+ }}
343
+ return k || "";
344
+ }}
345
+
346
+ async function sendStream(msg, bubbleEl) {{
347
+ const key = getKey();
348
+ const r = await fetch("/v1/chat/stream", {{
349
+ method: "POST",
350
+ headers: {{
351
+ "Authorization": "Bearer " + key,
352
+ "Content-Type": "application/json"
353
+ }},
354
+ body: JSON.stringify({{ prompt: msg }})
355
+ }});
356
+
357
+ if (!r.ok || !r.body) throw new Error("No stream");
358
+
359
+ const reader = r.body.getReader();
360
+ const decoder = new TextDecoder();
361
+ let buffer = "";
362
+
363
+ while (true) {{
364
+ const {{ value, done }} = await reader.read();
365
+ if (done) break;
366
+
367
+ buffer += decoder.decode(value, {{ stream: true }});
368
+ const parts = buffer.split("\\n\\n");
369
+ buffer = parts.pop() || "";
370
+
371
+ for (const part of parts) {{
372
+ const lines = part.split("\\n");
373
+ let event = "message";
374
+ let dataLine = "";
375
+
376
+ for (const line of lines) {{
377
+ if (line.startsWith("event:")) event = line.slice(6).trim();
378
+ if (line.startsWith("data:")) dataLine += line.slice(5).trim();
379
+ }}
380
+
381
+ if (!dataLine) continue;
382
+ const payload = JSON.parse(dataLine);
383
+
384
+ if (event === "error") {{
385
+ bubbleEl.textContent += "\\n\\n---\\nError:\\n" + (payload.error || "Unknown error");
386
+ setStatus("degraded", "Recovering…");
387
+ }} else if (event === "done") {{
388
+ setStatus("online", "Online");
389
+ }} else {{
390
+ bubbleEl.textContent += payload.delta || "";
391
+ }}
392
+ }}
393
+ }}
394
+ }}
395
+
396
+ async function send() {{
397
+ const msg = input.value.trim();
398
+ if (!msg) return;
399
+
400
+ input.value = "";
401
+ add("me", msg);
402
+
403
+ setStatus("busy", "Thinking…");
404
+ const aiMsg = add("ai", "");
405
+ const bubble = aiMsg.querySelector(".bubble");
406
+ sendBtn.disabled = true;
407
+
408
+ try {{
409
+ await sendStream(msg, bubble);
410
+ }} catch (e) {{
411
+ bubble.textContent =
412
+ "Temporary error. Try again in a moment.\\n\\nTip: verify your API key (stored in browser localStorage).";
413
+ setStatus("degraded", "Reconnecting…");
414
+ }} finally {{
415
+ sendBtn.disabled = false;
416
+ setTimeout(healthCheck, 800);
417
+ }}
418
+ }}
419
+
420
+ sendBtn.onclick = send;
421
+ input.addEventListener("keydown", (e) => {{
422
+ if (e.key === "Enter" && !e.shiftKey) {{
423
+ e.preventDefault();
424
+ send();
425
+ }}
426
+ }});
427
+
428
+ hint.innerHTML = "Press <kbd>Enter</kbd> to send, <kbd>Shift</kbd>+<kbd>Enter</kbd> for newline.";
429
+ add("ai", "Hey 👋 I’m ready when you are.");
430
+ healthCheck();
431
+ setInterval(healthCheck, 5000);
432
+ </script>
433
+ </body>
434
+ </html>
435
+ """
436
+
437
+ # ----------------------------
438
+ # Streaming SSE endpoint
439
+ # ----------------------------
440
+ @app.post("/v1/chat/stream")
441
+ async def chat_stream(request: Request):
442
+ auth = request.headers.get("authorization", "")
443
+ if auth != f"Bearer {API_KEY}":
444
+ async def deny():
445
+ yield "event: error\ndata: " + json.dumps({"error": "403: Invalid API key"}) + "\n\n"
446
+ yield "event: done\ndata: {}\n\n"
447
+ return StreamingResponse(deny(), media_type="text/event-stream")
448
+
449
+ body = await request.json()
450
+ prompt = (body.get("prompt") or "").strip()
451
+ if not prompt:
452
+ async def empty():
453
+ yield "data: " + json.dumps({"delta": "Send a message and I’ll respond."}) + "\n\n"
454
+ yield "event: done\ndata: {}\n\n"
455
+ return StreamingResponse(empty(), media_type="text/event-stream")
456
+
457
+ async def event_gen():
458
+ async with GEN_SEM:
459
+ try:
460
+ if not ollama_healthy():
461
+ ensure_ollama_running()
462
+ wait_for_ollama(60)
463
+
464
+ r = requests.post(
465
+ f"{OLLAMA_BASE}/api/generate",
466
+ json={
467
+ "model": MODEL,
468
+ "system": SYSTEM_PROMPT,
469
+ "prompt": prompt,
470
+ "stream": True,
471
+ },
472
+ stream=True,
473
+ timeout=600,
474
+ )
475
+
476
+ if r.status_code != 200:
477
+ yield "event: error\ndata: " + json.dumps({"error": r.text[:2000]}) + "\n\n"
478
+ yield "event: done\ndata: {}\n\n"
479
+ return
480
+
481
+ for line in r.iter_lines(decode_unicode=True):
482
+ if not line:
483
+ continue
484
+ try:
485
+ obj = json.loads(line)
486
+ except Exception:
487
+ continue
488
+
489
+ delta = obj.get("response", "")
490
+ if delta:
491
+ yield "data: " + json.dumps({"delta": delta}) + "\n\n"
492
+
493
+ if obj.get("done"):
494
+ break
495
+
496
+ yield "event: done\ndata: {}\n\n"
497
+
498
+ except Exception as e:
499
+ yield "event: error\ndata: " + json.dumps({"error": str(e)}) + "\n\n"
500
+ yield "event: done\ndata: {}\n\n"
501
+
502
+ return StreamingResponse(event_gen(), media_type="text/event-stream")
503
+
504
+ # ----------------------------
505
+ # Non-stream fallback
506
+ # ----------------------------
507
+ @app.post("/v1/chat")
508
+ async def chat_api(request: Request):
509
+ auth = request.headers.get("authorization", "")
510
+ if auth != f"Bearer {API_KEY}":
511
+ return JSONResponse({"response": "", "error": "403: Invalid API key"}, status_code=200)
512
+
513
+ body = await request.json()
514
+ prompt = (body.get("prompt") or "").strip()
515
+ if not prompt:
516
+ return {"response": "Send a message and I’ll respond.", "error": None}
517
+
518
+ async with GEN_SEM:
519
+ text, err = generate_with_recovery(prompt, attempts=3)
520
+ return {"response": text, "error": err}