Spaces:
Paused
Paused
| #!/usr/bin/env python3 | |
| """ | |
| Overthinker - Local 4B Quantized Edition (Nemotron 3 Nano 4B) | |
| Uses a local 4B model (NVIDIA Nemotron 3 Nano 4B) loaded in 4-bit quantization if supported, | |
| otherwise falls back to BF16 (which fits easily on 24GB GPUs). | |
| Handles mamba-ssm dependency gracefully by disabling use_mamba_kernels in config | |
| to use transformers' native PyTorch fallback implementation when mamba-ssm is not available. | |
| """ | |
| import os | |
| import re | |
| import json | |
| import uuid | |
| import sqlite3 | |
| import torch | |
| from pathlib import Path | |
| from typing import Optional, Dict, List | |
| from gradio import Server | |
| from fastapi import HTTPException | |
| from starlette.responses import HTMLResponse, PlainTextResponse, JSONResponse | |
| from datasets import Dataset, concatenate_datasets, load_dataset | |
| import pandas as pd | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig, AutoConfig | |
| from bag import ( | |
| BASE_URL, | |
| LLMS_TXT, | |
| SITEMAP_XML, | |
| ROBOTS_TXT, | |
| OVERSEER_JSON, | |
| VIDEO_PAGE_HTML, | |
| README_MD | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Application Setup | |
| # --------------------------------------------------------------------------- | |
| app = Server() | |
| PORT = 7860 | |
| DATA_DIR = Path("data") | |
| DATA_DIR.mkdir(exist_ok=True) | |
| # ---------- Local Model Configuration ---------- | |
| MODEL_NAME = "nvidia/NVIDIA-Nemotron-3-Nano-4B-BF16" | |
| print("[Overthinker] Attempting to load Nemotron 3 Nano 4B with 4-bit quantization...") | |
| # Load config and disable mamba kernels to avoid mamba-ssm dependency | |
| print("[Overthinker] Loading model config...") | |
| config = AutoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| # Disable mamba kernels to use transformers' native PyTorch fallback | |
| # This avoids needing mamba-ssm and causal-conv1d packages | |
| if hasattr(config, 'use_mamba_kernels'): | |
| config.use_mamba_kernels = False | |
| print("[Overthinker] Disabled use_mamba_kernels - using PyTorch fallback for Mamba layers") | |
| else: | |
| print("[Overthinker] Warning: Config does not have use_mamba_kernels attribute") | |
| # Try 4-bit first; if incompatibility, fallback to BF16 | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_use_double_quant=True, | |
| bnb_4bit_quant_type="nf4", | |
| bnb_4bit_compute_dtype=torch.bfloat16 | |
| ) | |
| loaded_quantized = False | |
| try: | |
| print("[Overthinker] Loading tokenizer...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| print("[Overthinker] Loading model with 4-bit quantization...") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| config=config, | |
| quantization_config=bnb_config, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| print(f"[Overthinker] Model loaded in 4-bit quantization on device: {model.device}") | |
| loaded_quantized = True | |
| except Exception as e: | |
| print(f"[Overthinker] 4-bit quantization failed: {e}") | |
| print("[Overthinker] Falling back to BF16 (no quantization) - model is only ~8GB.") | |
| if hasattr(config, 'use_mamba_kernels'): | |
| config.use_mamba_kernels = False | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| config=config, | |
| device_map="auto", | |
| trust_remote_code=True, | |
| torch_dtype=torch.bfloat16 | |
| ) | |
| loaded_quantized = False | |
| print(f"[Overthinker] Model loaded in BF16 on device: {model.device}") | |
| pipe = pipeline( | |
| "text-generation", | |
| model=model, | |
| tokenizer=tokenizer, | |
| max_new_tokens=2048, | |
| temperature=0.8, | |
| do_sample=True, | |
| top_p=0.9 | |
| ) | |
| HF_TOKEN = os.getenv('HF_TOKEN', '') | |
| HF_DATASET_REPO = os.getenv('HF_DATASET_REPO', 'build-small-hackathon/Overthinker-traces') | |
| # --------------------------------------------------------------------------- | |
| # Database Helpers (same as OVERTHINKER_FINAL) | |
| # --------------------------------------------------------------------------- | |
| def get_db_path(session_id: str) -> Path: | |
| return DATA_DIR / f"session_{session_id}.db" | |
| def init_session(session_id: str): | |
| db_path = get_db_path(session_id) | |
| if db_path.exists(): | |
| return | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.execute(""" | |
| CREATE TABLE nodes ( | |
| id TEXT PRIMARY KEY, | |
| parent_id TEXT, | |
| type TEXT NOT NULL, | |
| label TEXT NOT NULL, | |
| description TEXT DEFAULT '', | |
| emoji TEXT DEFAULT 'πΉ', | |
| tips TEXT DEFAULT '[]', | |
| order_index INTEGER DEFAULT 0, | |
| created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
| ) | |
| """) | |
| root_id = str(uuid.uuid4()) | |
| conn.execute( | |
| "INSERT INTO nodes (id, parent_id, type, label, description, emoji) VALUES (?, ?, ?, ?, ?, ?)", | |
| (root_id, None, "root", "What decision do you want to explore?", "", "π³") | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def get_node_db(session_id: str, node_id: str) -> Optional[Dict]: | |
| db_path = get_db_path(session_id) | |
| if not db_path.exists(): | |
| return None | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.row_factory = sqlite3.Row | |
| row = conn.execute("SELECT * FROM nodes WHERE id=?", (node_id,)).fetchone() | |
| conn.close() | |
| if row is None: | |
| return None | |
| result = dict(row) | |
| try: | |
| result['tips'] = json.loads(result.get('tips', '[]')) | |
| except: | |
| result['tips'] = [] | |
| return result | |
| def get_children_db(session_id: str, parent_id: str) -> List[Dict]: | |
| db_path = get_db_path(session_id) | |
| if not db_path.exists(): | |
| return [] | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.row_factory = sqlite3.Row | |
| rows = conn.execute( | |
| "SELECT * FROM nodes WHERE parent_id=? ORDER BY order_index", | |
| (parent_id,) | |
| ).fetchall() | |
| conn.close() | |
| result = [] | |
| for row in rows: | |
| d = dict(row) | |
| try: | |
| d['tips'] = json.loads(d.get('tips', '[]')) | |
| except: | |
| d['tips'] = [] | |
| result.append(d) | |
| return result | |
| def add_node_db(session_id: str, parent_id: str, node_type: str, label: str, | |
| description: str = "", emoji: str = "πΉ", | |
| tips: list = None, order_index: int = 0) -> Dict: | |
| node_id = str(uuid.uuid4()) | |
| tips_json = json.dumps(tips or []) | |
| db_path = get_db_path(session_id) | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.execute( | |
| "INSERT INTO nodes (id, parent_id, type, label, description, emoji, tips, order_index) VALUES (?,?,?,?,?,?,?,?)", | |
| (node_id, parent_id, node_type, label, description, emoji, tips_json, order_index) | |
| ) | |
| conn.commit() | |
| conn.close() | |
| return { | |
| "id": node_id, | |
| "parent_id": parent_id, | |
| "type": node_type, | |
| "label": label, | |
| "description": description, | |
| "emoji": emoji, | |
| "tips": tips or [], | |
| "order_index": order_index | |
| } | |
| def update_root_db(session_id: str, label: str, description: str = ""): | |
| db_path = get_db_path(session_id) | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.execute( | |
| "UPDATE nodes SET label=?, description=? WHERE parent_id IS NULL", | |
| (label, description) | |
| ) | |
| conn.commit() | |
| conn.close() | |
| def get_path_db(session_id: str, node_id: str) -> List[Dict]: | |
| path = [] | |
| current_id = node_id | |
| while current_id: | |
| node = get_node_db(session_id, current_id) | |
| if node is None: | |
| break | |
| path.append(node) | |
| current_id = node.get("parent_id") | |
| path.reverse() | |
| return path | |
| def build_path_string(session_id: str, node_id: str) -> str: | |
| nodes = get_path_db(session_id, node_id) | |
| parts = [] | |
| for n in nodes: | |
| t = n["type"] | |
| label = n["label"] | |
| if t == "root": | |
| parts.append(f"[ROOT] {label}") | |
| elif t == "input": | |
| parts.append(f"[INPUT] {label}") | |
| elif t == "outcome": | |
| parts.append(f"[OUTCOME] {label}") | |
| return " β ".join(parts) | |
| def get_root_node(session_id: str) -> Optional[Dict]: | |
| db_path = get_db_path(session_id) | |
| if not db_path.exists(): | |
| return None | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.row_factory = sqlite3.Row | |
| row = conn.execute("SELECT * FROM nodes WHERE parent_id IS NULL LIMIT 1").fetchone() | |
| conn.close() | |
| if row is None: | |
| return None | |
| result = dict(row) | |
| try: | |
| result['tips'] = json.loads(result.get('tips', '[]')) | |
| except: | |
| result['tips'] = [] | |
| return result | |
| def get_all_node_ids(session_id: str) -> List[str]: | |
| db_path = get_db_path(session_id) | |
| if not db_path.exists(): | |
| return [] | |
| conn = sqlite3.connect(str(db_path)) | |
| rows = conn.execute("SELECT id FROM nodes").fetchall() | |
| conn.close() | |
| return [r[0] for r in rows] | |
| def build_tree_nested(session_id: str) -> Optional[Dict]: | |
| root = get_root_node(session_id) | |
| if not root: | |
| return None | |
| def build_tree(node): | |
| children = get_children_db(session_id, node['id']) | |
| node_copy = dict(node) | |
| if isinstance(node_copy.get('tips'), str): | |
| try: | |
| node_copy['tips'] = json.loads(node_copy['tips']) | |
| except: | |
| node_copy['tips'] = [] | |
| node_copy['children'] = [build_tree(c) for c in children] | |
| return node_copy | |
| return build_tree(root) | |
| # --------------------------------------------------------------------------- | |
| # Prompt Builders (same as OVERTHINKER_FINAL) | |
| # --------------------------------------------------------------------------- | |
| def build_root_prompt(decision: str) -> str: | |
| return f'''You are an AI that helps people explore decisions by generating decision trees. | |
| Generate a ROOT decision node for the following decision: | |
| "{decision}" | |
| Return ONLY valid JSON with exactly this structure (no markdown, no backticks): | |
| {{ | |
| "label": "A concise label for this decision tree (3-6 words)", | |
| "description": "A 1-2 sentence description of this decision context", | |
| "emoji": "An emoji representing this decision", | |
| "tips": ["One actionable tip for approaching this decision"] | |
| }}''' | |
| def build_options_prompt(decision_label: str, decision_desc: str, count: int, path_context: str, comment: str = "") -> str: | |
| path_section = f'\nFull path from root to this node: "{path_context}"' if path_context else '' | |
| comment_section = f'\nUser context: "{comment}"' if comment else '' | |
| return f'''You are an AI that helps explore decisions by generating decision tree branches. | |
| Parent node: "{decision_label}" | |
| Description: "{decision_desc}"{path_section}{comment_section} | |
| Generate EXACTLY {count} child nodes that represent different OPTIONS or CHOICES the person could take. | |
| IMPORTANT: Frame each child as an OPTION or CHOICE, not as an outcome. | |
| Consider the full decision path above to ensure the options are contextually relevant. | |
| Return ONLY valid JSON with exactly this structure (no markdown, no backticks): | |
| {{ | |
| "children": [ | |
| {{ | |
| "id": "child_1", | |
| "label": "Short option label (3-6 words)", | |
| "description": "1-2 sentence description", | |
| "emoji": "An emoji", | |
| "tips": ["One practical tip"] | |
| }}, | |
| ... | |
| ] | |
| }} | |
| Ensure children have unique IDs like child_1, child_2, etc.''' | |
| def build_outcomes_prompt(decision_label: str, decision_desc: str, count: int, path_context: str, comment: str = "") -> str: | |
| path_section = f'\nFull path from root to this node: "{path_context}"' if path_context else '' | |
| comment_section = f'\nUser context: "{comment}"' if comment else '' | |
| return f'''You are an AI that helps explore decisions by generating decision tree branches. | |
| Parent node: "{decision_label}" | |
| Description: "{decision_desc}"{path_section}{comment_section} | |
| Generate EXACTLY {count} child nodes that represent a DIVERSE RANGE of possible OUTCOMES. Include a MIX of positive, neutral, and negative outcomes. | |
| IMPORTANT: Frame each child as an OUTCOME or CONSEQUENCE, not as a choice someone makes. | |
| Consider the full decision path above to ensure the outcomes are contextually relevant. | |
| Return ONLY valid JSON with exactly this structure (no markdown, no backticks): | |
| {{ | |
| "children": [ | |
| {{ | |
| "id": "child_1", | |
| "label": "Short outcome label (3-6 words)", | |
| "description": "1-2 sentence description", | |
| "emoji": "An emoji", | |
| "tips": ["One practical tip"] | |
| }}, | |
| ... | |
| ] | |
| }} | |
| Ensure children have unique IDs. Make sure the first child is POSITIVE, the second is NEUTRAL, and the third is NEGATIVE.''' | |
| # --------------------------------------------------------------------------- | |
| # AI Call (Local Model via pipeline) | |
| # --------------------------------------------------------------------------- | |
| def call_model(prompt: str, system_prompt: str = "You are a helpful assistant that generates decision trees.") -> Optional[str]: | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| try: | |
| outputs = pipe(messages, max_new_tokens=2048, temperature=0.8, do_sample=True) | |
| response_text = outputs[0]["generated_text"][-1]["content"] | |
| return response_text | |
| except Exception as e: | |
| print(f"[Local Model Error] {e}") | |
| return None | |
| def parse_json_response(text: str) -> Optional[dict]: | |
| if not text: | |
| return None | |
| text = text.strip() | |
| text = re.sub(r'```json\s*', '', text) | |
| text = re.sub(r'```\s*', '', text) | |
| text = text.strip() | |
| start = text.find('{') | |
| end = text.rfind('}') | |
| if start >= 0 and end > start: | |
| text = text[start:end+1] | |
| try: | |
| return json.loads(text) | |
| except json.JSONDecodeError as e: | |
| print(f"[JSON Parse Error] {e}") | |
| print(f"[Raw text] {text[:500]}") | |
| return None | |
| # --------------------------------------------------------------------------- | |
| # Routes | |
| # --------------------------------------------------------------------------- | |
| async def index(): | |
| html_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "templates", "index.html") | |
| if os.path.exists(html_path): | |
| with open(html_path, "r", encoding="utf-8") as f: | |
| return HTMLResponse(content=f.read(), status_code=200) | |
| return HTMLResponse(content="<h1>Overthinker</h1><p>index.html not found</p>", status_code=404) | |
| async def create_root(request: dict): | |
| session_id = request.get('session_id', str(uuid.uuid4())) | |
| init_session(session_id) | |
| root = get_root_node(session_id) | |
| if root is None: | |
| raise HTTPException(status_code=500, detail="Could not initialize session.") | |
| return {"session_id": session_id, "node": root} | |
| async def create_tree(request: dict): | |
| session_id = request.get('session_id', str(uuid.uuid4())) | |
| decision = request.get('decision', '') | |
| if not decision: | |
| raise HTTPException(status_code=400, detail="Decision text is required.") | |
| init_session(session_id) | |
| prompt = build_root_prompt(decision) | |
| ai_response = call_model(prompt) | |
| parsed = parse_json_response(ai_response) if ai_response else None | |
| if not parsed: | |
| raise HTTPException(status_code=500, detail="Failed to generate root node. Please check model availability.") | |
| label = parsed.get('label', f'Overthinking: {decision[:40]}') | |
| description = parsed.get('description', f'You are overthinking: {decision}') | |
| emoji = parsed.get('emoji', 'π³') | |
| tips = parsed.get('tips', ['Start by exploring options.']) | |
| update_root_db(session_id, label, description) | |
| db_path = get_db_path(session_id) | |
| conn = sqlite3.connect(str(db_path)) | |
| conn.execute("UPDATE nodes SET emoji=?, tips=? WHERE parent_id IS NULL", (emoji, json.dumps(tips))) | |
| conn.commit() | |
| conn.close() | |
| root = get_root_node(session_id) | |
| return {'session_id': session_id, 'node': root} | |
| async def get_node_endpoint(request: dict): | |
| session_id = request.get('session_id') | |
| node_id = request.get('node_id') | |
| if not session_id or not node_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id or node_id") | |
| init_session(session_id) | |
| node = get_node_db(session_id, node_id) | |
| if node is None: | |
| raise HTTPException(status_code=404, detail="Node not found") | |
| children = get_children_db(session_id, node_id) | |
| path_context = build_path_string(session_id, node_id) | |
| return { | |
| 'node': node, | |
| 'children': children, | |
| 'path_context': path_context | |
| } | |
| async def get_children(request: dict): | |
| session_id = request.get('session_id') | |
| node_id = request.get('node_id') | |
| count = request.get('count', 3) | |
| node_type = request.get('node_type', 'outcome') | |
| comment = request.get('comment', '') | |
| if not session_id or not node_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id or node_id") | |
| init_session(session_id) | |
| parent = get_node_db(session_id, node_id) | |
| if parent is None: | |
| raise HTTPException(status_code=404, detail="Node not found") | |
| path_context = build_path_string(session_id, node_id) | |
| next_type_map = {'root': 'input', 'input': 'outcome', 'outcome': 'input'} | |
| next_type = next_type_map.get(node_type, 'outcome') | |
| parent_label = parent.get('label', 'Unknown') | |
| parent_desc = parent.get('description', '') | |
| if next_type == 'input': | |
| prompt = build_options_prompt(parent_label, parent_desc, count, path_context, comment) | |
| else: | |
| prompt = build_outcomes_prompt(parent_label, parent_desc, count, path_context, comment) | |
| ai_response = call_model(prompt) | |
| parsed = parse_json_response(ai_response) if ai_response else None | |
| if not parsed or 'children' not in parsed or not isinstance(parsed['children'], list): | |
| raise HTTPException(status_code=500, detail="Generation failed. Please try again.") | |
| children_data = parsed['children'] | |
| children = [] | |
| for i, child in enumerate(children_data): | |
| label = child.get('label', 'Unknown') | |
| description = child.get('description', '') | |
| emoji = child.get('emoji', 'πΉ') | |
| tips = child.get('tips', [f'Consider this {next_type}.']) | |
| existing = get_children_db(session_id, node_id) | |
| existing_labels = [c['label'] for c in existing] | |
| if label in existing_labels or label in [c['label'] for c in children]: | |
| label = f"{label} ({i+1})" | |
| child_node = add_node_db(session_id, node_id, next_type, label, description, emoji, tips, order_index=i) | |
| child_node['type'] = next_type | |
| children.append(child_node) | |
| return {'children': children, 'next_type': next_type} | |
| async def add_options(request: dict): | |
| session_id = request.get('session_id') | |
| node_id = request.get('node_id') | |
| count = request.get('count', 3) | |
| comment = request.get('comment', '') | |
| if not session_id or not node_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id or node_id") | |
| init_session(session_id) | |
| parent = get_node_db(session_id, node_id) | |
| if parent is None: | |
| raise HTTPException(status_code=404, detail="Node not found") | |
| path_context = build_path_string(session_id, node_id) | |
| next_type_map = {'root': 'input', 'input': 'outcome', 'outcome': 'input'} | |
| next_type = next_type_map.get(parent.get('type', 'root'), 'outcome') | |
| parent_label = parent.get('label', 'Unknown') | |
| parent_desc = parent.get('description', '') | |
| if next_type == 'input': | |
| prompt = build_options_prompt(parent_label, parent_desc, count, path_context, comment) | |
| else: | |
| prompt = build_outcomes_prompt(parent_label, parent_desc, count, path_context, comment) | |
| ai_response = call_model(prompt) | |
| parsed = parse_json_response(ai_response) if ai_response else None | |
| if not parsed or 'children' not in parsed or not isinstance(parsed['children'], list): | |
| raise HTTPException(status_code=500, detail="Failed to add options. Please try again.") | |
| children_data = parsed['children'] | |
| children = [] | |
| for i, child in enumerate(children_data): | |
| label = child.get('label', 'Unknown') | |
| description = child.get('description', '') | |
| emoji = child.get('emoji', 'πΉ') | |
| tips = child.get('tips', [f'Additional {next_type}.']) | |
| existing = get_children_db(session_id, node_id) | |
| existing_labels = [c['label'] for c in existing] | |
| if label in existing_labels or label in [c['label'] for c in children]: | |
| label = f"{label} ({i+1})" | |
| child_node = add_node_db(session_id, node_id, next_type, label, description, emoji, tips, order_index=i) | |
| child_node['type'] = next_type | |
| children.append(child_node) | |
| return {'children': children, 'next_type': next_type} | |
| async def upload_trace(request: dict): | |
| session_id = request.get('session_id') | |
| if not session_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id") | |
| if not HF_TOKEN or not HF_DATASET_REPO: | |
| raise HTTPException(status_code=500, detail="HF_TOKEN and HF_DATASET_REPO must be configured.") | |
| tree = build_tree_nested(session_id) | |
| if tree is None: | |
| raise HTTPException(status_code=404, detail="No tree found for this session.") | |
| try: | |
| row = { | |
| 'session_id': session_id, | |
| 'tree_json': json.dumps(tree), | |
| 'created_at': str(tree.get('created_at', '')) | |
| } | |
| df = pd.DataFrame([row]) | |
| new_dataset = Dataset.from_pandas(df) | |
| try: | |
| existing_dataset = load_dataset(HF_DATASET_REPO, split='train', token=HF_TOKEN) | |
| combined = concatenate_datasets([existing_dataset, new_dataset]) | |
| except Exception: | |
| combined = new_dataset | |
| combined.push_to_hub(HF_DATASET_REPO, token=HF_TOKEN, private=False) | |
| return {'status': 'success', 'message': 'Trace uploaded successfully!'} | |
| except Exception as e: | |
| print(f"[Upload Trace Error] {e}") | |
| raise HTTPException(status_code=500, detail=f"Failed to upload trace: {str(e)}") | |
| async def export_json(request: dict): | |
| session_id = request.get('session_id') | |
| if not session_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id") | |
| root = get_root_node(session_id) | |
| if not root: | |
| raise HTTPException(status_code=404, detail="No tree found") | |
| def build_tree(node): | |
| children = get_children_db(session_id, node['id']) | |
| node_copy = dict(node) | |
| node_copy['children'] = [build_tree(c) for c in children] | |
| return node_copy | |
| full_tree = build_tree(root) | |
| return full_tree | |
| async def export_path_json(request: dict): | |
| session_id = request.get('session_id') | |
| node_id = request.get('node_id') | |
| if not session_id or not node_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id or node_id") | |
| path_nodes = get_path_db(session_id, node_id) | |
| return {'path': path_nodes} | |
| async def export_path_md(request: dict): | |
| session_id = request.get('session_id') | |
| node_id = request.get('node_id') | |
| if not session_id or not node_id: | |
| raise HTTPException(status_code=400, detail="Missing session_id or node_id") | |
| path = get_path_db(session_id, node_id) | |
| md = '# π§ Overthinker β Decision Path\n\n' | |
| for i, node in enumerate(path): | |
| indent = ' ' * i | |
| emoji = {'root': 'π³', 'input': 'π§ ', 'outcome': 'π'}.get(node.get('type', ''), 'π') | |
| md += f'{indent}{emoji} **{node.get("label", "")}**\n' | |
| if node.get('description'): | |
| md += f'{indent} > {node.get("description", "")}\n' | |
| if node.get('tips') and len(node['tips']) > 0: | |
| md += f'{indent} > π‘ {node["tips"][0]}\n' | |
| md += '\n' | |
| return PlainTextResponse(content=md, status_code=200) | |
| async def get_llms_txt(): | |
| return PlainTextResponse(LLMS_TXT) | |
| async def get_readme_md(): | |
| return PlainTextResponse(README_MD) | |
| async def get_sitemap(): | |
| return HTMLResponse(content=SITEMAP_XML, media_type="application/xml") | |
| async def get_robots(): | |
| return PlainTextResponse(ROBOTS_TXT) | |
| async def get_overthinker_json(): | |
| return JSONResponse(content=OVERSEER_JSON, media_type="application/json") | |
| async def get_video(): | |
| return HTMLResponse(content=VIDEO_PAGE_HTML) | |
| # --------------------------------------------------------------------------- | |
| # Launch | |
| # --------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| print(f"π§ Overthinker β Local 4B Quantized Edition on port {PORT}") | |
| print(f"π€ Model: {MODEL_NAME}") | |
| print("π Mamba kernels: Disabled (using PyTorch fallback - no mamba-ssm/causal-conv1d needed)") | |
| if loaded_quantized: | |
| print("πΎ Quantization: 4-bit NF4 (BitsAndBytes)") | |
| else: | |
| print("πΎ Quantization: None (BF16 fallback - fits in 16GB VRAM)") | |
| print(f"π Open http://localhost:{PORT} in your browser") | |
| if not HF_TOKEN or not HF_DATASET_REPO: | |
| print("β οΈ No HF_TOKEN or HF_DATASET_REPO set. Upload will fail.") | |
| app.launch( | |
| server_port=PORT, | |
| show_error=True, | |
| share=False | |
| ) |