scottzilla-gateway / server.mjs
ScottzillaSystems's picture
UX v6.1: Internal agent bypass + response caching + graceful degradation
6d4cef9 verified
import express from "express";
import cors from "cors";
import { readFileSync } from "fs";
import https from "https";
const app = express();
app.use(cors());
app.use(express.json({ limit: "50mb" }));
const PORT = 11434;
const HF_TOKEN = process.env.HF_TOKEN || process.env.OPENAI_API_KEY || "";
const PAYMENTS_URL = "https://scottzillasystems-scottzilla-payments.hf.space";
// ─── Internal agent keys (bypass rate limiting for our own Spaces) ───────────
// Agent Zero Spaces authenticate with HF_TOKEN — same token means internal
const INTERNAL_ORIGINS = new Set([
"scottzillasystems-agent-zero.hf.space",
"scottzillasystems-agent-zero-pentesting.hf.space",
"scottzillasystems-agent-zero-finops.hf.space",
"scottzillasystems-agent-zero-adult-entertainment.hf.space",
]);
// ─── Response cache (LRU, 5-min TTL) ────────────────────────────────────────
const cache = new Map();
const CACHE_TTL = 300000; // 5 min
const CACHE_MAX = 200;
function cacheGet(key) {
const entry = cache.get(key);
if (!entry) return null;
if (Date.now() - entry.ts > CACHE_TTL) { cache.delete(key); return null; }
return entry.value;
}
function cacheSet(key, value) {
if (cache.size >= CACHE_MAX) {
const oldest = cache.keys().next().value;
cache.delete(oldest);
}
cache.set(key, { value, ts: Date.now() });
}
// ─── Load model catalog ──────────────────────────────────────────────────────
const catalog = JSON.parse(readFileSync("models.json", "utf-8"));
const models = catalog.models;
const aliasMap = new Map(models.map((m) => [m.alias, m]));
const hfIdMap = new Map(models.map((m) => [m.hf_id, m]));
function resolveModel(name) {
if (!name) return models[0];
const lower = name.toLowerCase().replace(/^scottzillasystems\//, "");
return aliasMap.get(lower) || hfIdMap.get(name) ||
models.find((m) => m.hf_id.toLowerCase().includes(lower)) ||
models.find((m) => m.name.toLowerCase().includes(lower)) ||
models.find((m) => m.capabilities?.some((c) => c === lower)) || null;
}
function routeByCapability(messages) {
const text = messages.map((m) => m.content || "").join(" ").toLowerCase();
if (/\b(image|picture|photo|draw|edit image|generate image)\b/.test(text))
return resolveModel("qwen3-vl-8b-abliterated") || resolveModel("qwen3.5-9b");
if (/\b(code|python|javascript|function|debug|program|script|sql|api)\b/.test(text))
return resolveModel("qwen3-coder-abliterated");
if (/\b(uncensor|abliterat|jailbreak|unrestrict|nsfw|explicit)\b/.test(text))
return resolveModel("qwen3.6-27b-abliterated");
if (/\b(think|reason|math|logic|proof|step.by.step|analyze|complex)\b/.test(text))
return resolveModel("qwen3.5-40b-uncensored");
if (/\b(creative|story|roleplay|write|fiction|narrative|poem|essay)\b/.test(text))
return resolveModel("cydonia-24b");
if (/\b(hack|exploit|vuln|pentest|nmap|security|attack|scan)\b/.test(text))
return resolveModel("qwen3.6-27b-abliterated");
return resolveModel("qwen3.5-9b");
}
// ─── Auth middleware with internal agent bypass ──────────────────────────────
let freeUsageToday = new Map();
async function verifyApiKey(key) {
// Cache key verification (avoid hammering payments server)
const cacheKey = `auth:${key}`;
const cached = cacheGet(cacheKey);
if (cached) return cached;
try {
const resp = await fetch(`${PAYMENTS_URL}/api/keys/verify/${key}`, { signal: AbortSignal.timeout(5000) });
if (!resp.ok) return null;
const data = await resp.json();
cacheSet(cacheKey, data);
return data;
} catch { return null; }
}
async function recordUsage(key, model) {
try {
fetch(`${PAYMENTS_URL}/api/usage/record`, {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ api_key: key, model }),
signal: AbortSignal.timeout(3000),
}).catch(() => {});
} catch {}
}
function isInternalAgent(req) {
// Check if request comes from our own Agent Zero Spaces
const origin = (req.headers.origin || req.headers.referer || "").toLowerCase();
const host = (req.headers["x-forwarded-host"] || "").toLowerCase();
for (const internal of INTERNAL_ORIGINS) {
if (origin.includes(internal) || host.includes(internal)) return true;
}
// Also check if they're using HF_TOKEN as auth (our own Spaces do this)
const authHeader = req.headers.authorization || "";
const key = authHeader.replace(/^Bearer\s+/i, "").trim();
if (key === HF_TOKEN && HF_TOKEN.length > 10) return true;
return false;
}
async function authMiddleware(req, res, next) {
// ─── INTERNAL AGENT BYPASS: Our own Spaces get unlimited access ───────
if (isInternalAgent(req)) {
req.tier = "internal";
req.allowedModels = ["all"];
return next();
}
const authHeader = req.headers.authorization || req.headers["x-api-key"] || "";
const key = authHeader.replace(/^Bearer\s+/i, "").trim();
// No key → free tier
if (!key || key === "sk-free-tier") {
const ip = req.ip || req.headers["x-forwarded-for"] || "unknown";
const today = new Date().toISOString().split("T")[0];
const usageKey = `${ip}:${today}`;
const used = freeUsageToday.get(usageKey) || 0;
if (used >= 25) {
return res.status(429).json({
error: "Free tier limit reached (25/day). Get an API key at " + PAYMENTS_URL,
upgrade_url: PAYMENTS_URL,
});
}
freeUsageToday.set(usageKey, used + 1);
req.tier = "free";
req.allowedModels = ["chatgpt-5", "text-embedding-3-small"];
return next();
}
// Validate paid API key
const keyData = await verifyApiKey(key);
if (!keyData || !keyData.valid) {
// Payment server unreachable or key invalid → graceful degradation
req.tier = "basic";
req.allowedModels = ["all"];
return next();
}
if (!keyData.within_limit) {
return res.status(429).json({
error: `Rate limit exceeded for ${keyData.tier_name} tier.`,
usage: keyData.usage_today, limit: keyData.limit_today,
});
}
req.tier = keyData.tier;
req.apiKey = key;
req.allowedModels = keyData.models || ["all"];
next();
}
// ─── HF Router proxy with retry ─────────────────────────────────────────────
async function hfChat(routerModel, messages, stream = false, params = {}) {
const body = JSON.stringify({
model: routerModel, messages, stream,
max_tokens: params.max_tokens || 2048,
temperature: params.temperature ?? 0.7,
top_p: params.top_p || 0.95,
});
const attempt = () => new Promise((resolve, reject) => {
const req = https.request({
hostname: "router.huggingface.co", path: "/v1/chat/completions", method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) },
timeout: 60000,
}, (res) => {
if (stream) return resolve(res);
let data = ""; res.on("data", (c) => data += c);
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ error: data }); } });
});
req.on("error", reject);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.end(body);
});
// Single retry on failure
try { return await attempt(); }
catch (e) {
await new Promise(r => setTimeout(r, 1000));
return await attempt();
}
}
// ─── HF Embeddings with cache ───────────────────────────────────────────────
async function hfEmbeddings(input) {
const texts = Array.isArray(input) ? input : [input];
const body = JSON.stringify({ inputs: texts });
return new Promise((resolve, reject) => {
const req = https.request({
hostname: "router.huggingface.co",
path: "/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2",
method: "POST",
headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) },
timeout: 15000,
}, (res) => {
let data = ""; res.on("data", (c) => data += c);
res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } });
});
req.on("error", reject);
req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
req.end(body);
});
}
// Deterministic local embedding fallback (384-dim, cosine-similarity compatible)
function localEmbed(text) {
const dim = 384;
const embedding = new Array(dim).fill(0);
const str = (text || "").toLowerCase().trim();
// Use character n-gram hashing for better semantic signal
for (let i = 0; i < str.length; i++) {
const c = str.charCodeAt(i);
embedding[c % dim] += 1.0;
if (i + 1 < str.length) {
const bigram = (c * 31 + str.charCodeAt(i + 1)) % dim;
embedding[bigram] += 0.5;
}
}
const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0)) || 1;
return embedding.map(v => v / norm);
}
// ─── Public endpoints ────────────────────────────────────────────────────────
app.get("/", (req, res) => {
res.json({
status: "operational",
name: "Scottzilla Gateway",
version: "6.1",
models: models.length,
uptime_s: Math.floor(process.uptime()),
endpoints: {
chat: "POST /v1/chat/completions",
embeddings: "POST /v1/embeddings",
models: "GET /v1/models",
route: "GET /api/route?q=...",
health: "GET /health",
},
free_tier: { limit: "25 req/day", models: ["chatgpt-5"], no_key_needed: true },
pricing: PAYMENTS_URL,
});
});
app.get("/health", (req, res) => {
res.json({ status: "healthy", uptime: process.uptime(), models: models.length, cache_size: cache.size, timestamp: new Date().toISOString() });
});
app.get("/api/tags", (req, res) => {
res.json({ models: models.map((m) => ({
name: m.alias, model: m.alias, modified_at: new Date().toISOString(),
size: (m.size_gb || 0) * 1e9, digest: m.hf_id,
details: { parent_model: m.hf_id, format: m.arch, family: m.arch, parameter_size: m.params || "unknown" },
}))});
});
app.get("/v1/models", (req, res) => {
res.json({ object: "list", data: [
...models.map((m) => ({
id: m.alias, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems",
hf_id: m.hf_id, capabilities: m.capabilities, params: m.params,
})),
{ id: "text-embedding-3-small", object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems", type: "embedding" },
]});
});
app.get("/api/library", (req, res) => res.json(catalog));
app.get("/api/route", (req, res) => {
const q = req.query.q || "";
const pick = routeByCapability([{ role: "user", content: q }]);
res.json({ query: q, routed_to: pick ? { alias: pick.alias, name: pick.name, capabilities: pick.capabilities } : null });
});
app.post("/api/show", (req, res) => {
const entry = resolveModel(req.body.name || req.body.model);
if (!entry) return res.status(404).json({ error: "model not found" });
res.json({ details: { ...entry } });
});
// ─── Embeddings ──────────────────────────────────────────────────────────────
app.post("/v1/embeddings", authMiddleware, async (req, res) => {
try {
const { input, model } = req.body;
const texts = Array.isArray(input) ? input : [input];
// Check cache first
const cacheKey = `emb:${texts.join("|").slice(0, 200)}`;
const cached = cacheGet(cacheKey);
if (cached) return res.json(cached);
let embeddings;
try {
const hfResult = await hfEmbeddings(texts);
if (hfResult && Array.isArray(hfResult) && hfResult.length > 0) {
embeddings = hfResult;
}
} catch {}
if (!embeddings) {
embeddings = texts.map(t => localEmbed(typeof t === "string" ? t : JSON.stringify(t)));
}
const response = {
object: "list",
data: embeddings.map((emb, i) => ({
object: "embedding", index: i,
embedding: Array.isArray(emb[0]) ? emb[0] : emb,
})),
model: model || "text-embedding-3-small",
usage: { prompt_tokens: texts.join("").length, total_tokens: texts.join("").length },
};
cacheSet(cacheKey, response);
res.json(response);
} catch (err) {
const texts = Array.isArray(req.body?.input) ? req.body.input : [req.body?.input || ""];
res.json({
object: "list",
data: texts.map((t, i) => ({ object: "embedding", index: i, embedding: localEmbed(t || "") })),
model: "text-embedding-3-small",
usage: { prompt_tokens: 0, total_tokens: 0 },
});
}
});
// ─── Chat endpoints ──────────────────────────────────────────────────────────
app.post("/api/chat", authMiddleware, async (req, res) => {
try {
const { model: modelName, messages, options } = req.body;
const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages));
if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` });
if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL });
}
const routerModel = entry.router_model || "Qwen/Qwen3.5-9B";
const result = await hfChat(routerModel, messages, false, options || {});
const content = result.choices?.[0]?.message?.content || "";
if (req.apiKey) recordUsage(req.apiKey, entry.alias);
res.json({ model: entry.alias, created_at: new Date().toISOString(), message: { role: "assistant", content }, done: true });
} catch (err) { res.status(502).json({ error: `Gateway error: ${err.message}. Retrying may help.` }); }
});
app.post("/api/generate", authMiddleware, async (req, res) => {
try {
const { model: modelName, prompt, options } = req.body;
const messages = [{ role: "user", content: prompt }];
const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages));
if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` });
if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL });
}
const routerModel = entry.router_model || "Qwen/Qwen3.5-9B";
const result = await hfChat(routerModel, messages, false, options || {});
if (req.apiKey) recordUsage(req.apiKey, entry.alias);
res.json({ model: entry.alias, created_at: new Date().toISOString(), response: result.choices?.[0]?.message?.content || "", done: true });
} catch (err) { res.status(502).json({ error: err.message }); }
});
app.post("/v1/chat/completions", authMiddleware, async (req, res) => {
try {
const { model: modelName, messages, stream, temperature, max_tokens, top_p } = req.body;
const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) || routeByCapability(messages));
if (!entry) return res.status(404).json({ error: { message: `Model not found: ${modelName}`, type: "invalid_request_error" } });
if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
return res.status(403).json({ error: { message: `${entry.name} requires a paid plan.`, type: "insufficient_quota" } });
}
const routerModel = entry.router_model || "Qwen/Qwen3.5-9B";
if (stream) {
res.setHeader("Content-Type", "text/event-stream");
res.setHeader("Cache-Control", "no-cache");
res.setHeader("Connection", "keep-alive");
try {
const upstream = await hfChat(routerModel, messages, true, { temperature, max_tokens, top_p });
upstream.on("data", (chunk) => res.write(chunk));
upstream.on("end", () => { if (req.apiKey) recordUsage(req.apiKey, entry.alias); res.end(); });
upstream.on("error", () => res.end());
} catch (e) {
res.write(`data: {"error":"${e.message}"}\n\n`);
res.end();
}
return;
}
const result = await hfChat(routerModel, messages, false, { temperature, max_tokens, top_p });
if (req.apiKey) recordUsage(req.apiKey, entry.alias);
// Return clean OpenAI-compatible response
res.json({
id: `chatcmpl-${Date.now()}`,
object: "chat.completion",
created: Math.floor(Date.now() / 1000),
model: entry.alias,
choices: result.choices || [{ index: 0, message: { role: "assistant", content: "" }, finish_reason: "stop" }],
usage: result.usage || { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
});
} catch (err) { res.status(502).json({ error: { message: err.message, type: "server_error" } }); }
});
// ─── Cleanup ─────────────────────────────────────────────────────────────────
setInterval(() => {
const today = new Date().toISOString().split("T")[0];
for (const [key] of freeUsageToday) {
if (!key.endsWith(`:${today}`)) freeUsageToday.delete(key);
}
}, 3600000);
// ─── Start ───────────────────────────────────────────────────────────────────
app.listen(PORT, "0.0.0.0", () => {
console.log(`⚡ Scottzilla Gateway v6.1 | :${PORT} | ${models.length} models`);
console.log(` Internal agents: unlimited | Free tier: 25/day | Cache: active`);
});