import express from "express"; import cors from "cors"; import { readFileSync } from "fs"; import https from "https"; const app = express(); app.use(cors()); app.use(express.json({ limit: "50mb" })); const PORT = 11434; const HF_TOKEN = process.env.HF_TOKEN || process.env.OPENAI_API_KEY || ""; const PAYMENTS_URL = "https://scottzillasystems-scottzilla-payments.hf.space"; // ─── Internal agent keys (bypass rate limiting for our own Spaces) ─────────── // Agent Zero Spaces authenticate with HF_TOKEN — same token means internal const INTERNAL_ORIGINS = new Set([ "scottzillasystems-agent-zero.hf.space", "scottzillasystems-agent-zero-pentesting.hf.space", "scottzillasystems-agent-zero-finops.hf.space", "scottzillasystems-agent-zero-adult-entertainment.hf.space", ]); // ─── Response cache (LRU, 5-min TTL) ──────────────────────────────────────── const cache = new Map(); const CACHE_TTL = 300000; // 5 min const CACHE_MAX = 200; function cacheGet(key) { const entry = cache.get(key); if (!entry) return null; if (Date.now() - entry.ts > CACHE_TTL) { cache.delete(key); return null; } return entry.value; } function cacheSet(key, value) { if (cache.size >= CACHE_MAX) { const oldest = cache.keys().next().value; cache.delete(oldest); } cache.set(key, { value, ts: Date.now() }); } // ─── Load model catalog ────────────────────────────────────────────────────── const catalog = JSON.parse(readFileSync("models.json", "utf-8")); const models = catalog.models; const aliasMap = new Map(models.map((m) => [m.alias, m])); const hfIdMap = new Map(models.map((m) => [m.hf_id, m])); function resolveModel(name) { if (!name) return models[0]; const lower = name.toLowerCase().replace(/^scottzillasystems\//, ""); return aliasMap.get(lower) || hfIdMap.get(name) || models.find((m) => m.hf_id.toLowerCase().includes(lower)) || models.find((m) => m.name.toLowerCase().includes(lower)) || models.find((m) => m.capabilities?.some((c) => c === lower)) || null; } function routeByCapability(messages) { const text = messages.map((m) => m.content || "").join(" ").toLowerCase(); if (/\b(image|picture|photo|draw|edit image|generate image)\b/.test(text)) return resolveModel("qwen3-vl-8b-abliterated") || resolveModel("qwen3.5-9b"); if (/\b(code|python|javascript|function|debug|program|script|sql|api)\b/.test(text)) return resolveModel("qwen3-coder-abliterated"); if (/\b(uncensor|abliterat|jailbreak|unrestrict|nsfw|explicit)\b/.test(text)) return resolveModel("qwen3.6-27b-abliterated"); if (/\b(think|reason|math|logic|proof|step.by.step|analyze|complex)\b/.test(text)) return resolveModel("qwen3.5-40b-uncensored"); if (/\b(creative|story|roleplay|write|fiction|narrative|poem|essay)\b/.test(text)) return resolveModel("cydonia-24b"); if (/\b(hack|exploit|vuln|pentest|nmap|security|attack|scan)\b/.test(text)) return resolveModel("qwen3.6-27b-abliterated"); return resolveModel("qwen3.5-9b"); } // ─── Auth middleware with internal agent bypass ────────────────────────────── let freeUsageToday = new Map(); async function verifyApiKey(key) { // Cache key verification (avoid hammering payments server) const cacheKey = `auth:${key}`; const cached = cacheGet(cacheKey); if (cached) return cached; try { const resp = await fetch(`${PAYMENTS_URL}/api/keys/verify/${key}`, { signal: AbortSignal.timeout(5000) }); if (!resp.ok) return null; const data = await resp.json(); cacheSet(cacheKey, data); return data; } catch { return null; } } async function recordUsage(key, model) { try { fetch(`${PAYMENTS_URL}/api/usage/record`, { method: "POST", headers: { "Content-Type": "application/json" }, body: JSON.stringify({ api_key: key, model }), signal: AbortSignal.timeout(3000), }).catch(() => {}); } catch {} } function isInternalAgent(req) { // Check if request comes from our own Agent Zero Spaces const origin = (req.headers.origin || req.headers.referer || "").toLowerCase(); const host = (req.headers["x-forwarded-host"] || "").toLowerCase(); for (const internal of INTERNAL_ORIGINS) { if (origin.includes(internal) || host.includes(internal)) return true; } // Also check if they're using HF_TOKEN as auth (our own Spaces do this) const authHeader = req.headers.authorization || ""; const key = authHeader.replace(/^Bearer\s+/i, "").trim(); if (key === HF_TOKEN && HF_TOKEN.length > 10) return true; return false; } async function authMiddleware(req, res, next) { // ─── INTERNAL AGENT BYPASS: Our own Spaces get unlimited access ─────── if (isInternalAgent(req)) { req.tier = "internal"; req.allowedModels = ["all"]; return next(); } const authHeader = req.headers.authorization || req.headers["x-api-key"] || ""; const key = authHeader.replace(/^Bearer\s+/i, "").trim(); // No key → free tier if (!key || key === "sk-free-tier") { const ip = req.ip || req.headers["x-forwarded-for"] || "unknown"; const today = new Date().toISOString().split("T")[0]; const usageKey = `${ip}:${today}`; const used = freeUsageToday.get(usageKey) || 0; if (used >= 25) { return res.status(429).json({ error: "Free tier limit reached (25/day). Get an API key at " + PAYMENTS_URL, upgrade_url: PAYMENTS_URL, }); } freeUsageToday.set(usageKey, used + 1); req.tier = "free"; req.allowedModels = ["chatgpt-5", "text-embedding-3-small"]; return next(); } // Validate paid API key const keyData = await verifyApiKey(key); if (!keyData || !keyData.valid) { // Payment server unreachable or key invalid → graceful degradation req.tier = "basic"; req.allowedModels = ["all"]; return next(); } if (!keyData.within_limit) { return res.status(429).json({ error: `Rate limit exceeded for ${keyData.tier_name} tier.`, usage: keyData.usage_today, limit: keyData.limit_today, }); } req.tier = keyData.tier; req.apiKey = key; req.allowedModels = keyData.models || ["all"]; next(); } // ─── HF Router proxy with retry ───────────────────────────────────────────── async function hfChat(routerModel, messages, stream = false, params = {}) { const body = JSON.stringify({ model: routerModel, messages, stream, max_tokens: params.max_tokens || 2048, temperature: params.temperature ?? 0.7, top_p: params.top_p || 0.95, }); const attempt = () => new Promise((resolve, reject) => { const req = https.request({ hostname: "router.huggingface.co", path: "/v1/chat/completions", method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) }, timeout: 60000, }, (res) => { if (stream) return resolve(res); let data = ""; res.on("data", (c) => data += c); res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ error: data }); } }); }); req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); }); req.end(body); }); // Single retry on failure try { return await attempt(); } catch (e) { await new Promise(r => setTimeout(r, 1000)); return await attempt(); } } // ─── HF Embeddings with cache ─────────────────────────────────────────────── async function hfEmbeddings(input) { const texts = Array.isArray(input) ? input : [input]; const body = JSON.stringify({ inputs: texts }); return new Promise((resolve, reject) => { const req = https.request({ hostname: "router.huggingface.co", path: "/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2", method: "POST", headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) }, timeout: 15000, }, (res) => { let data = ""; res.on("data", (c) => data += c); res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } }); }); req.on("error", reject); req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); }); req.end(body); }); } // Deterministic local embedding fallback (384-dim, cosine-similarity compatible) function localEmbed(text) { const dim = 384; const embedding = new Array(dim).fill(0); const str = (text || "").toLowerCase().trim(); // Use character n-gram hashing for better semantic signal for (let i = 0; i < str.length; i++) { const c = str.charCodeAt(i); embedding[c % dim] += 1.0; if (i + 1 < str.length) { const bigram = (c * 31 + str.charCodeAt(i + 1)) % dim; embedding[bigram] += 0.5; } } const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0)) || 1; return embedding.map(v => v / norm); } // ─── Public endpoints ──────────────────────────────────────────────────────── app.get("/", (req, res) => { res.json({ status: "operational", name: "Scottzilla Gateway", version: "6.1", models: models.length, uptime_s: Math.floor(process.uptime()), endpoints: { chat: "POST /v1/chat/completions", embeddings: "POST /v1/embeddings", models: "GET /v1/models", route: "GET /api/route?q=...", health: "GET /health", }, free_tier: { limit: "25 req/day", models: ["chatgpt-5"], no_key_needed: true }, pricing: PAYMENTS_URL, }); }); app.get("/health", (req, res) => { res.json({ status: "healthy", uptime: process.uptime(), models: models.length, cache_size: cache.size, timestamp: new Date().toISOString() }); }); app.get("/api/tags", (req, res) => { res.json({ models: models.map((m) => ({ name: m.alias, model: m.alias, modified_at: new Date().toISOString(), size: (m.size_gb || 0) * 1e9, digest: m.hf_id, details: { parent_model: m.hf_id, format: m.arch, family: m.arch, parameter_size: m.params || "unknown" }, }))}); }); app.get("/v1/models", (req, res) => { res.json({ object: "list", data: [ ...models.map((m) => ({ id: m.alias, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems", hf_id: m.hf_id, capabilities: m.capabilities, params: m.params, })), { id: "text-embedding-3-small", object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems", type: "embedding" }, ]}); }); app.get("/api/library", (req, res) => res.json(catalog)); app.get("/api/route", (req, res) => { const q = req.query.q || ""; const pick = routeByCapability([{ role: "user", content: q }]); res.json({ query: q, routed_to: pick ? { alias: pick.alias, name: pick.name, capabilities: pick.capabilities } : null }); }); app.post("/api/show", (req, res) => { const entry = resolveModel(req.body.name || req.body.model); if (!entry) return res.status(404).json({ error: "model not found" }); res.json({ details: { ...entry } }); }); // ─── Embeddings ────────────────────────────────────────────────────────────── app.post("/v1/embeddings", authMiddleware, async (req, res) => { try { const { input, model } = req.body; const texts = Array.isArray(input) ? input : [input]; // Check cache first const cacheKey = `emb:${texts.join("|").slice(0, 200)}`; const cached = cacheGet(cacheKey); if (cached) return res.json(cached); let embeddings; try { const hfResult = await hfEmbeddings(texts); if (hfResult && Array.isArray(hfResult) && hfResult.length > 0) { embeddings = hfResult; } } catch {} if (!embeddings) { embeddings = texts.map(t => localEmbed(typeof t === "string" ? t : JSON.stringify(t))); } const response = { object: "list", data: embeddings.map((emb, i) => ({ object: "embedding", index: i, embedding: Array.isArray(emb[0]) ? emb[0] : emb, })), model: model || "text-embedding-3-small", usage: { prompt_tokens: texts.join("").length, total_tokens: texts.join("").length }, }; cacheSet(cacheKey, response); res.json(response); } catch (err) { const texts = Array.isArray(req.body?.input) ? req.body.input : [req.body?.input || ""]; res.json({ object: "list", data: texts.map((t, i) => ({ object: "embedding", index: i, embedding: localEmbed(t || "") })), model: "text-embedding-3-small", usage: { prompt_tokens: 0, total_tokens: 0 }, }); } }); // ─── Chat endpoints ────────────────────────────────────────────────────────── app.post("/api/chat", authMiddleware, async (req, res) => { try { const { model: modelName, messages, options } = req.body; const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages)); if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` }); if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) { return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL }); } const routerModel = entry.router_model || "Qwen/Qwen3.5-9B"; const result = await hfChat(routerModel, messages, false, options || {}); const content = result.choices?.[0]?.message?.content || ""; if (req.apiKey) recordUsage(req.apiKey, entry.alias); res.json({ model: entry.alias, created_at: new Date().toISOString(), message: { role: "assistant", content }, done: true }); } catch (err) { res.status(502).json({ error: `Gateway error: ${err.message}. Retrying may help.` }); } }); app.post("/api/generate", authMiddleware, async (req, res) => { try { const { model: modelName, prompt, options } = req.body; const messages = [{ role: "user", content: prompt }]; const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages)); if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` }); if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) { return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL }); } const routerModel = entry.router_model || "Qwen/Qwen3.5-9B"; const result = await hfChat(routerModel, messages, false, options || {}); if (req.apiKey) recordUsage(req.apiKey, entry.alias); res.json({ model: entry.alias, created_at: new Date().toISOString(), response: result.choices?.[0]?.message?.content || "", done: true }); } catch (err) { res.status(502).json({ error: err.message }); } }); app.post("/v1/chat/completions", authMiddleware, async (req, res) => { try { const { model: modelName, messages, stream, temperature, max_tokens, top_p } = req.body; const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages)); if (!entry) return res.status(404).json({ error: { message: `Model not found: ${modelName}`, type: "invalid_request_error" } }); if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) { return res.status(403).json({ error: { message: `${entry.name} requires a paid plan.`, type: "insufficient_quota" } }); } const routerModel = entry.router_model || "Qwen/Qwen3.5-9B"; if (stream) { res.setHeader("Content-Type", "text/event-stream"); res.setHeader("Cache-Control", "no-cache"); res.setHeader("Connection", "keep-alive"); try { const upstream = await hfChat(routerModel, messages, true, { temperature, max_tokens, top_p }); upstream.on("data", (chunk) => res.write(chunk)); upstream.on("end", () => { if (req.apiKey) recordUsage(req.apiKey, entry.alias); res.end(); }); upstream.on("error", () => res.end()); } catch (e) { res.write(`data: {"error":"${e.message}"}\n\n`); res.end(); } return; } const result = await hfChat(routerModel, messages, false, { temperature, max_tokens, top_p }); if (req.apiKey) recordUsage(req.apiKey, entry.alias); // Return clean OpenAI-compatible response res.json({ id: `chatcmpl-${Date.now()}`, object: "chat.completion", created: Math.floor(Date.now() / 1000), model: entry.alias, choices: result.choices || [{ index: 0, message: { role: "assistant", content: "" }, finish_reason: "stop" }], usage: result.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }, }); } catch (err) { res.status(502).json({ error: { message: err.message, type: "server_error" } }); } }); // ─── Cleanup ───────────────────────────────────────────────────────────────── setInterval(() => { const today = new Date().toISOString().split("T")[0]; for (const [key] of freeUsageToday) { if (!key.endsWith(`:${today}`)) freeUsageToday.delete(key); } }, 3600000); // ─── Start ─────────────────────────────────────────────────────────────────── app.listen(PORT, "0.0.0.0", () => { console.log(`⚡ Scottzilla Gateway v6.1 | :${PORT} | ${models.length} models`); console.log(` Internal agents: unlimited | Free tier: 25/day | Cache: active`); });