OverThinker / app.py
broadfield-dev's picture
Update app.py
0ec3e05 verified
Raw
History Blame Contribute Delete
25.8 kB
#!/usr/bin/env python3
"""
Overthinker - Local 4B Quantized Edition (Nemotron 3 Nano 4B)
Uses a local 4B model (NVIDIA Nemotron 3 Nano 4B) loaded in 4-bit quantization if supported,
otherwise falls back to BF16 (which fits easily on 24GB GPUs).
Handles mamba-ssm dependency gracefully by disabling use_mamba_kernels in config
to use transformers' native PyTorch fallback implementation when mamba-ssm is not available.
"""
import os
import re
import json
import uuid
import sqlite3
import torch
from pathlib import Path
from typing import Optional, Dict, List
from gradio import Server
from fastapi import HTTPException
from starlette.responses import HTMLResponse, PlainTextResponse, JSONResponse
from datasets import Dataset, concatenate_datasets, load_dataset
import pandas as pd
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoConfig
from bag import (
BASE_URL,
LLMS_TXT,
SITEMAP_XML,
ROBOTS_TXT,
OVERSEER_JSON,
VIDEO_PAGE_HTML,
README_MD
)
# ---------------------------------------------------------------------------
# Application Setup
# ---------------------------------------------------------------------------
app = Server()
PORT = 7860
DATA_DIR = Path("data")
DATA_DIR.mkdir(exist_ok=True)
# ---------- Local Model Configuration ----------
MODEL_NAME = "nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16"
print("[Overthinker] Attempting to load Nemotron 3 Nano 4B with 4-bit quantization...")
# Load config and disable mamba kernels to avoid mamba-ssm dependency
print("[Overthinker] Loading model config...")
config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)
# Disable mamba kernels to use transformers' native PyTorch fallback
# This avoids needing mamba-ssm and causal-conv1d packages
if hasattr(config, 'use_mamba_kernels'):
config.use_mamba_kernels = False
print("[Overthinker] Disabled use_mamba_kernels - using PyTorch fallback for Mamba layers")
else:
print("[Overthinker] Warning: Config does not have use_mamba_kernels attribute")
# Try 4-bit first; if incompatibility, fallback to BF16
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
loaded_quantized = False
try:
print("[Overthinker] Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
print("[Overthinker] Loading model with 4-bit quantization...")
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
config=config,
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16
)
print(f"[Overthinker] Model loaded in 4-bit quantization on device: {model.device}")
loaded_quantized = True
except Exception as e:
print(f"[Overthinker] 4-bit quantization failed: {e}")
print("[Overthinker] Falling back to BF16 (no quantization) - model is only ~8GB.")
if hasattr(config, 'use_mamba_kernels'):
config.use_mamba_kernels = False
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
config=config,
device_map="auto",
trust_remote_code=True,
torch_dtype=torch.bfloat16
)
loaded_quantized = False
print(f"[Overthinker] Model loaded in BF16 on device: {model.device}")
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=2048,
temperature=0.8,
do_sample=True,
top_p=0.9
)
HF_TOKEN = os.getenv('HF_TOKEN', '')
HF_DATASET_REPO = os.getenv('HF_DATASET_REPO', 'build-small-hackathon/Overthinker-traces')
# ---------------------------------------------------------------------------
# Database Helpers (same as OVERTHINKER_FINAL)
# ---------------------------------------------------------------------------
def get_db_path(session_id: str) -> Path:
return DATA_DIR / f"session_{session_id}.db"
def init_session(session_id: str):
db_path = get_db_path(session_id)
if db_path.exists():
return
conn = sqlite3.connect(str(db_path))
conn.execute("""
CREATE TABLE nodes (
id TEXT PRIMARY KEY,
parent_id TEXT,
type TEXT NOT NULL,
label TEXT NOT NULL,
description TEXT DEFAULT '',
emoji TEXT DEFAULT 'πŸ”Ή',
tips TEXT DEFAULT '[]',
order_index INTEGER DEFAULT 0,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
root_id = str(uuid.uuid4())
conn.execute(
"INSERT INTO nodes (id, parent_id, type, label, description, emoji) VALUES (?, ?, ?, ?, ?, ?)",
(root_id, None, "root", "What decision do you want to explore?", "", "🌳")
)
conn.commit()
conn.close()
def get_node_db(session_id: str, node_id: str) -> Optional[Dict]:
db_path = get_db_path(session_id)
if not db_path.exists():
return None
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM nodes WHERE id=?", (node_id,)).fetchone()
conn.close()
if row is None:
return None
result = dict(row)
try:
result['tips'] = json.loads(result.get('tips', '[]'))
except:
result['tips'] = []
return result
def get_children_db(session_id: str, parent_id: str) -> List[Dict]:
db_path = get_db_path(session_id)
if not db_path.exists():
return []
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
rows = conn.execute(
"SELECT * FROM nodes WHERE parent_id=? ORDER BY order_index",
(parent_id,)
).fetchall()
conn.close()
result = []
for row in rows:
d = dict(row)
try:
d['tips'] = json.loads(d.get('tips', '[]'))
except:
d['tips'] = []
result.append(d)
return result
def add_node_db(session_id: str, parent_id: str, node_type: str, label: str,
description: str = "", emoji: str = "πŸ”Ή",
tips: list = None, order_index: int = 0) -> Dict:
node_id = str(uuid.uuid4())
tips_json = json.dumps(tips or [])
db_path = get_db_path(session_id)
conn = sqlite3.connect(str(db_path))
conn.execute(
"INSERT INTO nodes (id, parent_id, type, label, description, emoji, tips, order_index) VALUES (?,?,?,?,?,?,?,?)",
(node_id, parent_id, node_type, label, description, emoji, tips_json, order_index)
)
conn.commit()
conn.close()
return {
"id": node_id,
"parent_id": parent_id,
"type": node_type,
"label": label,
"description": description,
"emoji": emoji,
"tips": tips or [],
"order_index": order_index
}
def update_root_db(session_id: str, label: str, description: str = ""):
db_path = get_db_path(session_id)
conn = sqlite3.connect(str(db_path))
conn.execute(
"UPDATE nodes SET label=?, description=? WHERE parent_id IS NULL",
(label, description)
)
conn.commit()
conn.close()
def get_path_db(session_id: str, node_id: str) -> List[Dict]:
path = []
current_id = node_id
while current_id:
node = get_node_db(session_id, current_id)
if node is None:
break
path.append(node)
current_id = node.get("parent_id")
path.reverse()
return path
def build_path_string(session_id: str, node_id: str) -> str:
nodes = get_path_db(session_id, node_id)
parts = []
for n in nodes:
t = n["type"]
label = n["label"]
if t == "root":
parts.append(f"[ROOT] {label}")
elif t == "input":
parts.append(f"[INPUT] {label}")
elif t == "outcome":
parts.append(f"[OUTCOME] {label}")
return " β†’ ".join(parts)
def get_root_node(session_id: str) -> Optional[Dict]:
db_path = get_db_path(session_id)
if not db_path.exists():
return None
conn = sqlite3.connect(str(db_path))
conn.row_factory = sqlite3.Row
row = conn.execute("SELECT * FROM nodes WHERE parent_id IS NULL LIMIT 1").fetchone()
conn.close()
if row is None:
return None
result = dict(row)
try:
result['tips'] = json.loads(result.get('tips', '[]'))
except:
result['tips'] = []
return result
def get_all_node_ids(session_id: str) -> List[str]:
db_path = get_db_path(session_id)
if not db_path.exists():
return []
conn = sqlite3.connect(str(db_path))
rows = conn.execute("SELECT id FROM nodes").fetchall()
conn.close()
return [r[0] for r in rows]
def build_tree_nested(session_id: str) -> Optional[Dict]:
root = get_root_node(session_id)
if not root:
return None
def build_tree(node):
children = get_children_db(session_id, node['id'])
node_copy = dict(node)
if isinstance(node_copy.get('tips'), str):
try:
node_copy['tips'] = json.loads(node_copy['tips'])
except:
node_copy['tips'] = []
node_copy['children'] = [build_tree(c) for c in children]
return node_copy
return build_tree(root)
# ---------------------------------------------------------------------------
# Prompt Builders (same as OVERTHINKER_FINAL)
# ---------------------------------------------------------------------------
def build_root_prompt(decision: str) -> str:
return f'''You are an AI that helps people explore decisions by generating decision trees.
Generate a ROOT decision node for the following decision:
"{decision}"
Return ONLY valid JSON with exactly this structure (no markdown, no backticks):
{{
"label": "A concise label for this decision tree (3-6 words)",
"description": "A 1-2 sentence description of this decision context",
"emoji": "An emoji representing this decision",
"tips": ["One actionable tip for approaching this decision"]
}}'''
def build_options_prompt(decision_label: str, decision_desc: str, count: int, path_context: str, comment: str = "") -> str:
path_section = f'\nFull path from root to this node: "{path_context}"' if path_context else ''
comment_section = f'\nUser context: "{comment}"' if comment else ''
return f'''You are an AI that helps explore decisions by generating decision tree branches.
Parent node: "{decision_label}"
Description: "{decision_desc}"{path_section}{comment_section}
Generate EXACTLY {count} child nodes that represent different OPTIONS or CHOICES the person could take.
IMPORTANT: Frame each child as an OPTION or CHOICE, not as an outcome.
Consider the full decision path above to ensure the options are contextually relevant.
Return ONLY valid JSON with exactly this structure (no markdown, no backticks):
{{
"children": [
{{
"id": "child_1",
"label": "Short option label (3-6 words)",
"description": "1-2 sentence description",
"emoji": "An emoji",
"tips": ["One practical tip"]
}},
...
]
}}
Ensure children have unique IDs like child_1, child_2, etc.'''
def build_outcomes_prompt(decision_label: str, decision_desc: str, count: int, path_context: str, comment: str = "") -> str:
path_section = f'\nFull path from root to this node: "{path_context}"' if path_context else ''
comment_section = f'\nUser context: "{comment}"' if comment else ''
return f'''You are an AI that helps explore decisions by generating decision tree branches.
Parent node: "{decision_label}"
Description: "{decision_desc}"{path_section}{comment_section}
Generate EXACTLY {count} child nodes that represent a DIVERSE RANGE of possible OUTCOMES. Include a MIX of positive, neutral, and negative outcomes.
IMPORTANT: Frame each child as an OUTCOME or CONSEQUENCE, not as a choice someone makes.
Consider the full decision path above to ensure the outcomes are contextually relevant.
Return ONLY valid JSON with exactly this structure (no markdown, no backticks):
{{
"children": [
{{
"id": "child_1",
"label": "Short outcome label (3-6 words)",
"description": "1-2 sentence description",
"emoji": "An emoji",
"tips": ["One practical tip"]
}},
...
]
}}
Ensure children have unique IDs. Make sure the first child is POSITIVE, the second is NEUTRAL, and the third is NEGATIVE.'''
# ---------------------------------------------------------------------------
# AI Call (Local Model via pipeline)
# ---------------------------------------------------------------------------
def call_model(prompt: str, system_prompt: str = "You are a helpful assistant that generates decision trees.") -> Optional[str]:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt}
]
try:
outputs = pipe(messages, max_new_tokens=2048, temperature=0.8, do_sample=True)
response_text = outputs[0]["generated_text"][-1]["content"]
return response_text
except Exception as e:
print(f"[Local Model Error] {e}")
return None
def parse_json_response(text: str) -> Optional[dict]:
if not text:
return None
text = text.strip()
text = re.sub(r'```json\s*', '', text)
text = re.sub(r'```\s*', '', text)
text = text.strip()
start = text.find('{')
end = text.rfind('}')
if start >= 0 and end > start:
text = text[start:end+1]
try:
return json.loads(text)
except json.JSONDecodeError as e:
print(f"[JSON Parse Error] {e}")
print(f"[Raw text] {text[:500]}")
return None
# ---------------------------------------------------------------------------
# Routes
# ---------------------------------------------------------------------------
@app.get("/")
async def index():
html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates", "index.html")
if os.path.exists(html_path):
with open(html_path, "r", encoding="utf-8") as f:
return HTMLResponse(content=f.read(), status_code=200)
return HTMLResponse(content="<h1>Overthinker</h1><p>index.html not found</p>", status_code=404)
@app.post("/root")
async def create_root(request: dict):
session_id = request.get('session_id', str(uuid.uuid4()))
init_session(session_id)
root = get_root_node(session_id)
if root is None:
raise HTTPException(status_code=500, detail="Could not initialize session.")
return {"session_id": session_id, "node": root}
@app.post("/create_tree")
async def create_tree(request: dict):
session_id = request.get('session_id', str(uuid.uuid4()))
decision = request.get('decision', '')
if not decision:
raise HTTPException(status_code=400, detail="Decision text is required.")
init_session(session_id)
prompt = build_root_prompt(decision)
ai_response = call_model(prompt)
parsed = parse_json_response(ai_response) if ai_response else None
if not parsed:
raise HTTPException(status_code=500, detail="Failed to generate root node. Please check model availability.")
label = parsed.get('label', f'Overthinking: {decision[:40]}')
description = parsed.get('description', f'You are overthinking: {decision}')
emoji = parsed.get('emoji', '🌳')
tips = parsed.get('tips', ['Start by exploring options.'])
update_root_db(session_id, label, description)
db_path = get_db_path(session_id)
conn = sqlite3.connect(str(db_path))
conn.execute("UPDATE nodes SET emoji=?, tips=? WHERE parent_id IS NULL", (emoji, json.dumps(tips)))
conn.commit()
conn.close()
root = get_root_node(session_id)
return {'session_id': session_id, 'node': root}
@app.post("/get_node")
async def get_node_endpoint(request: dict):
session_id = request.get('session_id')
node_id = request.get('node_id')
if not session_id or not node_id:
raise HTTPException(status_code=400, detail="Missing session_id or node_id")
init_session(session_id)
node = get_node_db(session_id, node_id)
if node is None:
raise HTTPException(status_code=404, detail="Node not found")
children = get_children_db(session_id, node_id)
path_context = build_path_string(session_id, node_id)
return {
'node': node,
'children': children,
'path_context': path_context
}
@app.post("/get_children")
async def get_children(request: dict):
session_id = request.get('session_id')
node_id = request.get('node_id')
count = request.get('count', 3)
node_type = request.get('node_type', 'outcome')
comment = request.get('comment', '')
if not session_id or not node_id:
raise HTTPException(status_code=400, detail="Missing session_id or node_id")
init_session(session_id)
parent = get_node_db(session_id, node_id)
if parent is None:
raise HTTPException(status_code=404, detail="Node not found")
path_context = build_path_string(session_id, node_id)
next_type_map = {'root': 'input', 'input': 'outcome', 'outcome': 'input'}
next_type = next_type_map.get(node_type, 'outcome')
parent_label = parent.get('label', 'Unknown')
parent_desc = parent.get('description', '')
if next_type == 'input':
prompt = build_options_prompt(parent_label, parent_desc, count, path_context, comment)
else:
prompt = build_outcomes_prompt(parent_label, parent_desc, count, path_context, comment)
ai_response = call_model(prompt)
parsed = parse_json_response(ai_response) if ai_response else None
if not parsed or 'children' not in parsed or not isinstance(parsed['children'], list):
raise HTTPException(status_code=500, detail="Generation failed. Please try again.")
children_data = parsed['children']
children = []
for i, child in enumerate(children_data):
label = child.get('label', 'Unknown')
description = child.get('description', '')
emoji = child.get('emoji', 'πŸ”Ή')
tips = child.get('tips', [f'Consider this {next_type}.'])
existing = get_children_db(session_id, node_id)
existing_labels = [c['label'] for c in existing]
if label in existing_labels or label in [c['label'] for c in children]:
label = f"{label} ({i+1})"
child_node = add_node_db(session_id, node_id, next_type, label, description, emoji, tips, order_index=i)
child_node['type'] = next_type
children.append(child_node)
return {'children': children, 'next_type': next_type}
@app.post("/add_options")
async def add_options(request: dict):
session_id = request.get('session_id')
node_id = request.get('node_id')
count = request.get('count', 3)
comment = request.get('comment', '')
if not session_id or not node_id:
raise HTTPException(status_code=400, detail="Missing session_id or node_id")
init_session(session_id)
parent = get_node_db(session_id, node_id)
if parent is None:
raise HTTPException(status_code=404, detail="Node not found")
path_context = build_path_string(session_id, node_id)
next_type_map = {'root': 'input', 'input': 'outcome', 'outcome': 'input'}
next_type = next_type_map.get(parent.get('type', 'root'), 'outcome')
parent_label = parent.get('label', 'Unknown')
parent_desc = parent.get('description', '')
if next_type == 'input':
prompt = build_options_prompt(parent_label, parent_desc, count, path_context, comment)
else:
prompt = build_outcomes_prompt(parent_label, parent_desc, count, path_context, comment)
ai_response = call_model(prompt)
parsed = parse_json_response(ai_response) if ai_response else None
if not parsed or 'children' not in parsed or not isinstance(parsed['children'], list):
raise HTTPException(status_code=500, detail="Failed to add options. Please try again.")
children_data = parsed['children']
children = []
for i, child in enumerate(children_data):
label = child.get('label', 'Unknown')
description = child.get('description', '')
emoji = child.get('emoji', 'πŸ”Ή')
tips = child.get('tips', [f'Additional {next_type}.'])
existing = get_children_db(session_id, node_id)
existing_labels = [c['label'] for c in existing]
if label in existing_labels or label in [c['label'] for c in children]:
label = f"{label} ({i+1})"
child_node = add_node_db(session_id, node_id, next_type, label, description, emoji, tips, order_index=i)
child_node['type'] = next_type
children.append(child_node)
return {'children': children, 'next_type': next_type}
@app.post("/upload_trace")
async def upload_trace(request: dict):
session_id = request.get('session_id')
if not session_id:
raise HTTPException(status_code=400, detail="Missing session_id")
if not HF_TOKEN or not HF_DATASET_REPO:
raise HTTPException(status_code=500, detail="HF_TOKEN and HF_DATASET_REPO must be configured.")
tree = build_tree_nested(session_id)
if tree is None:
raise HTTPException(status_code=404, detail="No tree found for this session.")
try:
row = {
'session_id': session_id,
'tree_json': json.dumps(tree),
'created_at': str(tree.get('created_at', ''))
}
df = pd.DataFrame([row])
new_dataset = Dataset.from_pandas(df)
try:
existing_dataset = load_dataset(HF_DATASET_REPO, split='train', token=HF_TOKEN)
combined = concatenate_datasets([existing_dataset, new_dataset])
except Exception:
combined = new_dataset
combined.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=False)
return {'status': 'success', 'message': 'Trace uploaded successfully!'}
except Exception as e:
print(f"[Upload Trace Error] {e}")
raise HTTPException(status_code=500, detail=f"Failed to upload trace: {str(e)}")
@app.post("/export_json")
async def export_json(request: dict):
session_id = request.get('session_id')
if not session_id:
raise HTTPException(status_code=400, detail="Missing session_id")
root = get_root_node(session_id)
if not root:
raise HTTPException(status_code=404, detail="No tree found")
def build_tree(node):
children = get_children_db(session_id, node['id'])
node_copy = dict(node)
node_copy['children'] = [build_tree(c) for c in children]
return node_copy
full_tree = build_tree(root)
return full_tree
@app.post("/export_path_json")
async def export_path_json(request: dict):
session_id = request.get('session_id')
node_id = request.get('node_id')
if not session_id or not node_id:
raise HTTPException(status_code=400, detail="Missing session_id or node_id")
path_nodes = get_path_db(session_id, node_id)
return {'path': path_nodes}
@app.post("/export_path_md")
async def export_path_md(request: dict):
session_id = request.get('session_id')
node_id = request.get('node_id')
if not session_id or not node_id:
raise HTTPException(status_code=400, detail="Missing session_id or node_id")
path = get_path_db(session_id, node_id)
md = '# 🧠 Overthinker β€” Decision Path\n\n'
for i, node in enumerate(path):
indent = ' ' * i
emoji = {'root': '🌳', 'input': '🧠', 'outcome': 'πŸ“Š'}.get(node.get('type', ''), 'πŸ“Œ')
md += f'{indent}{emoji} **{node.get("label", "")}**\n'
if node.get('description'):
md += f'{indent} > {node.get("description", "")}\n'
if node.get('tips') and len(node['tips']) > 0:
md += f'{indent} > πŸ’‘ {node["tips"][0]}\n'
md += '\n'
return PlainTextResponse(content=md, status_code=200)
@app.get("/llms.txt", response_class=PlainTextResponse)
async def get_llms_txt():
return PlainTextResponse(LLMS_TXT)
@app.get("/readme.md", response_class=PlainTextResponse)
async def get_readme_md():
return PlainTextResponse(README_MD)
@app.get("/sitemap.xml", response_class=HTMLResponse)
async def get_sitemap():
return HTMLResponse(content=SITEMAP_XML, media_type="application/xml")
@app.get("/robots.txt", response_class=PlainTextResponse)
async def get_robots():
return PlainTextResponse(ROBOTS_TXT)
@app.get("/overthinker.json", response_class=JSONResponse)
async def get_overthinker_json():
return JSONResponse(content=OVERSEER_JSON, media_type="application/json")
@app.get("/video", response_class=HTMLResponse)
async def get_video():
return HTMLResponse(content=VIDEO_PAGE_HTML)
# ---------------------------------------------------------------------------
# Launch
# ---------------------------------------------------------------------------
if __name__ == "__main__":
print(f"🧠 Overthinker β€” Local 4B Quantized Edition on port {PORT}")
print(f"πŸ€– Model: {MODEL_NAME}")
print("πŸ”‹ Mamba kernels: Disabled (using PyTorch fallback - no mamba-ssm/causal-conv1d needed)")
if loaded_quantized:
print("πŸ’Ύ Quantization: 4-bit NF4 (BitsAndBytes)")
else:
print("πŸ’Ύ Quantization: None (BF16 fallback - fits in 16GB VRAM)")
print(f"🌐 Open http://localhost:{PORT} in your browser")
if not HF_TOKEN or not HF_DATASET_REPO:
print("⚠️ No HF_TOKEN or HF_DATASET_REPO set. Upload will fail.")
app.launch(
server_port=PORT,
show_error=True,
share=False
)