Spaces:

ScottzillaSystems
/

scottzilla-gateway

Running

App Files Files Community

scottzilla-gateway / server.mjs

ScottzillaSystems's picture

ScottzillaSystems

UX v6.1: Internal agent bypass + response caching + graceful degradation

6d4cef9 verified 1 day ago

history blame contribute delete

18.5 kB

	import express from "express";
	import cors from "cors";
	import { readFileSync } from "fs";
	import https from "https";

	const app = express();
	app.use(cors());
	app.use(express.json({ limit: "50mb" }));

	const PORT = 11434;
	const HF_TOKEN = process.env.HF_TOKEN \|\| process.env.OPENAI_API_KEY \|\| "";
	const PAYMENTS_URL = "https://scottzillasystems-scottzilla-payments.hf.space";

	// ─── Internal agent keys (bypass rate limiting for our own Spaces) ───────────
	// Agent Zero Spaces authenticate with HF_TOKEN — same token means internal
	const INTERNAL_ORIGINS = new Set([
	"scottzillasystems-agent-zero.hf.space",
	"scottzillasystems-agent-zero-pentesting.hf.space",
	"scottzillasystems-agent-zero-finops.hf.space",
	"scottzillasystems-agent-zero-adult-entertainment.hf.space",
	]);

	// ─── Response cache (LRU, 5-min TTL) ────────────────────────────────────────
	const cache = new Map();
	const CACHE_TTL = 300000; // 5 min
	const CACHE_MAX = 200;

	function cacheGet(key) {
	const entry = cache.get(key);
	if (!entry) return null;
	if (Date.now() - entry.ts > CACHE_TTL) { cache.delete(key); return null; }
	return entry.value;
	}

	function cacheSet(key, value) {
	if (cache.size >= CACHE_MAX) {
	const oldest = cache.keys().next().value;
	cache.delete(oldest);
	}
	cache.set(key, { value, ts: Date.now() });
	}

	// ─── Load model catalog ──────────────────────────────────────────────────────
	const catalog = JSON.parse(readFileSync("models.json", "utf-8"));
	const models = catalog.models;
	const aliasMap = new Map(models.map((m) => [m.alias, m]));
	const hfIdMap = new Map(models.map((m) => [m.hf_id, m]));

	function resolveModel(name) {
	if (!name) return models[0];
	const lower = name.toLowerCase().replace(/^scottzillasystems\//, "");
	return aliasMap.get(lower) \|\| hfIdMap.get(name) \|\|
	models.find((m) => m.hf_id.toLowerCase().includes(lower)) \|\|
	models.find((m) => m.name.toLowerCase().includes(lower)) \|\|
	models.find((m) => m.capabilities?.some((c) => c === lower)) \|\| null;
	}

	function routeByCapability(messages) {
	const text = messages.map((m) => m.content \|\| "").join(" ").toLowerCase();
	if (/\b(image\|picture\|photo\|draw\|edit image\|generate image)\b/.test(text))
	return resolveModel("qwen3-vl-8b-abliterated") \|\| resolveModel("qwen3.5-9b");
	if (/\b(code\|python\|javascript\|function\|debug\|program\|script\|sql\|api)\b/.test(text))
	return resolveModel("qwen3-coder-abliterated");
	if (/\b(uncensor\|abliterat\|jailbreak\|unrestrict\|nsfw\|explicit)\b/.test(text))
	return resolveModel("qwen3.6-27b-abliterated");
	if (/\b(think\|reason\|math\|logic\|proof\|step.by.step\|analyze\|complex)\b/.test(text))
	return resolveModel("qwen3.5-40b-uncensored");
	if (/\b(creative\|story\|roleplay\|write\|fiction\|narrative\|poem\|essay)\b/.test(text))
	return resolveModel("cydonia-24b");
	if (/\b(hack\|exploit\|vuln\|pentest\|nmap\|security\|attack\|scan)\b/.test(text))
	return resolveModel("qwen3.6-27b-abliterated");
	return resolveModel("qwen3.5-9b");
	}

	// ─── Auth middleware with internal agent bypass ──────────────────────────────
	let freeUsageToday = new Map();

	async function verifyApiKey(key) {
	// Cache key verification (avoid hammering payments server)
	const cacheKey = `auth:${key}`;
	const cached = cacheGet(cacheKey);
	if (cached) return cached;

	try {
	const resp = await fetch(`${PAYMENTS_URL}/api/keys/verify/${key}`, { signal: AbortSignal.timeout(5000) });
	if (!resp.ok) return null;
	const data = await resp.json();
	cacheSet(cacheKey, data);
	return data;
	} catch { return null; }
	}

	async function recordUsage(key, model) {
	try {
	fetch(`${PAYMENTS_URL}/api/usage/record`, {
	method: "POST",
	headers: { "Content-Type": "application/json" },
	body: JSON.stringify({ api_key: key, model }),
	signal: AbortSignal.timeout(3000),
	}).catch(() => {});
	} catch {}
	}

	function isInternalAgent(req) {
	// Check if request comes from our own Agent Zero Spaces
	const origin = (req.headers.origin \|\| req.headers.referer \|\| "").toLowerCase();
	const host = (req.headers["x-forwarded-host"] \|\| "").toLowerCase();

	for (const internal of INTERNAL_ORIGINS) {
	if (origin.includes(internal) \|\| host.includes(internal)) return true;
	}

	// Also check if they're using HF_TOKEN as auth (our own Spaces do this)
	const authHeader = req.headers.authorization \|\| "";
	const key = authHeader.replace(/^Bearer\s+/i, "").trim();
	if (key === HF_TOKEN && HF_TOKEN.length > 10) return true;

	return false;
	}

	async function authMiddleware(req, res, next) {
	// ─── INTERNAL AGENT BYPASS: Our own Spaces get unlimited access ───────
	if (isInternalAgent(req)) {
	req.tier = "internal";
	req.allowedModels = ["all"];
	return next();
	}

	const authHeader = req.headers.authorization \|\| req.headers["x-api-key"] \|\| "";
	const key = authHeader.replace(/^Bearer\s+/i, "").trim();

	// No key → free tier
	if (!key \|\| key === "sk-free-tier") {
	const ip = req.ip \|\| req.headers["x-forwarded-for"] \|\| "unknown";
	const today = new Date().toISOString().split("T")[0];
	const usageKey = `${ip}:${today}`;
	const used = freeUsageToday.get(usageKey) \|\| 0;
	if (used >= 25) {
	return res.status(429).json({
	error: "Free tier limit reached (25/day). Get an API key at " + PAYMENTS_URL,
	upgrade_url: PAYMENTS_URL,
	});
	}
	freeUsageToday.set(usageKey, used + 1);
	req.tier = "free";
	req.allowedModels = ["chatgpt-5", "text-embedding-3-small"];
	return next();
	}

	// Validate paid API key
	const keyData = await verifyApiKey(key);
	if (!keyData \|\| !keyData.valid) {
	// Payment server unreachable or key invalid → graceful degradation
	req.tier = "basic";
	req.allowedModels = ["all"];
	return next();
	}
	if (!keyData.within_limit) {
	return res.status(429).json({
	error: `Rate limit exceeded for ${keyData.tier_name} tier.`,
	usage: keyData.usage_today, limit: keyData.limit_today,
	});
	}

	req.tier = keyData.tier;
	req.apiKey = key;
	req.allowedModels = keyData.models \|\| ["all"];
	next();
	}

	// ─── HF Router proxy with retry ─────────────────────────────────────────────
	async function hfChat(routerModel, messages, stream = false, params = {}) {
	const body = JSON.stringify({
	model: routerModel, messages, stream,
	max_tokens: params.max_tokens \|\| 2048,
	temperature: params.temperature ?? 0.7,
	top_p: params.top_p \|\| 0.95,
	});

	const attempt = () => new Promise((resolve, reject) => {
	const req = https.request({
	hostname: "router.huggingface.co", path: "/v1/chat/completions", method: "POST",
	headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) },
	timeout: 60000,
	}, (res) => {
	if (stream) return resolve(res);
	let data = ""; res.on("data", (c) => data += c);
	res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve({ error: data }); } });
	});
	req.on("error", reject);
	req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
	req.end(body);
	});

	// Single retry on failure
	try { return await attempt(); }
	catch (e) {
	await new Promise(r => setTimeout(r, 1000));
	return await attempt();
	}
	}

	// ─── HF Embeddings with cache ───────────────────────────────────────────────
	async function hfEmbeddings(input) {
	const texts = Array.isArray(input) ? input : [input];
	const body = JSON.stringify({ inputs: texts });
	return new Promise((resolve, reject) => {
	const req = https.request({
	hostname: "router.huggingface.co",
	path: "/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2",
	method: "POST",
	headers: { "Content-Type": "application/json", Authorization: `Bearer ${HF_TOKEN}`, "Content-Length": Buffer.byteLength(body) },
	timeout: 15000,
	}, (res) => {
	let data = ""; res.on("data", (c) => data += c);
	res.on("end", () => { try { resolve(JSON.parse(data)); } catch { resolve(null); } });
	});
	req.on("error", reject);
	req.on("timeout", () => { req.destroy(); reject(new Error("timeout")); });
	req.end(body);
	});
	}

	// Deterministic local embedding fallback (384-dim, cosine-similarity compatible)
	function localEmbed(text) {
	const dim = 384;
	const embedding = new Array(dim).fill(0);
	const str = (text \|\| "").toLowerCase().trim();
	// Use character n-gram hashing for better semantic signal
	for (let i = 0; i < str.length; i++) {
	const c = str.charCodeAt(i);
	embedding[c % dim] += 1.0;
	if (i + 1 < str.length) {
	const bigram = (c * 31 + str.charCodeAt(i + 1)) % dim;
	embedding[bigram] += 0.5;
	}
	}
	const norm = Math.sqrt(embedding.reduce((s, v) => s + v * v, 0)) \|\| 1;
	return embedding.map(v => v / norm);
	}

	// ─── Public endpoints ────────────────────────────────────────────────────────
	app.get("/", (req, res) => {
	res.json({
	status: "operational",
	name: "Scottzilla Gateway",
	version: "6.1",
	models: models.length,
	uptime_s: Math.floor(process.uptime()),
	endpoints: {
	chat: "POST /v1/chat/completions",
	embeddings: "POST /v1/embeddings",
	models: "GET /v1/models",
	route: "GET /api/route?q=...",
	health: "GET /health",
	},
	free_tier: { limit: "25 req/day", models: ["chatgpt-5"], no_key_needed: true },
	pricing: PAYMENTS_URL,
	});
	});

	app.get("/health", (req, res) => {
	res.json({ status: "healthy", uptime: process.uptime(), models: models.length, cache_size: cache.size, timestamp: new Date().toISOString() });
	});

	app.get("/api/tags", (req, res) => {
	res.json({ models: models.map((m) => ({
	name: m.alias, model: m.alias, modified_at: new Date().toISOString(),
	size: (m.size_gb \|\| 0) * 1e9, digest: m.hf_id,
	details: { parent_model: m.hf_id, format: m.arch, family: m.arch, parameter_size: m.params \|\| "unknown" },
	}))});
	});

	app.get("/v1/models", (req, res) => {
	res.json({ object: "list", data: [
	...models.map((m) => ({
	id: m.alias, object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems",
	hf_id: m.hf_id, capabilities: m.capabilities, params: m.params,
	})),
	{ id: "text-embedding-3-small", object: "model", created: Math.floor(Date.now() / 1000), owned_by: "ScottzillaSystems", type: "embedding" },
	]});
	});

	app.get("/api/library", (req, res) => res.json(catalog));

	app.get("/api/route", (req, res) => {
	const q = req.query.q \|\| "";
	const pick = routeByCapability([{ role: "user", content: q }]);
	res.json({ query: q, routed_to: pick ? { alias: pick.alias, name: pick.name, capabilities: pick.capabilities } : null });
	});

	app.post("/api/show", (req, res) => {
	const entry = resolveModel(req.body.name \|\| req.body.model);
	if (!entry) return res.status(404).json({ error: "model not found" });
	res.json({ details: { ...entry } });
	});

	// ─── Embeddings ──────────────────────────────────────────────────────────────
	app.post("/v1/embeddings", authMiddleware, async (req, res) => {
	try {
	const { input, model } = req.body;
	const texts = Array.isArray(input) ? input : [input];

	// Check cache first
	const cacheKey = `emb:${texts.join("\|").slice(0, 200)}`;
	const cached = cacheGet(cacheKey);
	if (cached) return res.json(cached);

	let embeddings;
	try {
	const hfResult = await hfEmbeddings(texts);
	if (hfResult && Array.isArray(hfResult) && hfResult.length > 0) {
	embeddings = hfResult;
	}
	} catch {}

	if (!embeddings) {
	embeddings = texts.map(t => localEmbed(typeof t === "string" ? t : JSON.stringify(t)));
	}

	const response = {
	object: "list",
	data: embeddings.map((emb, i) => ({
	object: "embedding", index: i,
	embedding: Array.isArray(emb[0]) ? emb[0] : emb,
	})),
	model: model \|\| "text-embedding-3-small",
	usage: { prompt_tokens: texts.join("").length, total_tokens: texts.join("").length },
	};

	cacheSet(cacheKey, response);
	res.json(response);
	} catch (err) {
	const texts = Array.isArray(req.body?.input) ? req.body.input : [req.body?.input \|\| ""];
	res.json({
	object: "list",
	data: texts.map((t, i) => ({ object: "embedding", index: i, embedding: localEmbed(t \|\| "") })),
	model: "text-embedding-3-small",
	usage: { prompt_tokens: 0, total_tokens: 0 },
	});
	}
	});

	// ─── Chat endpoints ──────────────────────────────────────────────────────────
	app.post("/api/chat", authMiddleware, async (req, res) => {
	try {
	const { model: modelName, messages, options } = req.body;
	const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) \|\| routeByCapability(messages));
	if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` });

	if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
	return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL });
	}

	const routerModel = entry.router_model \|\| "Qwen/Qwen3.5-9B";
	const result = await hfChat(routerModel, messages, false, options \|\| {});
	const content = result.choices?.[0]?.message?.content \|\| "";

	if (req.apiKey) recordUsage(req.apiKey, entry.alias);

	res.json({ model: entry.alias, created_at: new Date().toISOString(), message: { role: "assistant", content }, done: true });
	} catch (err) { res.status(502).json({ error: `Gateway error: ${err.message}. Retrying may help.` }); }
	});

	app.post("/api/generate", authMiddleware, async (req, res) => {
	try {
	const { model: modelName, prompt, options } = req.body;
	const messages = [{ role: "user", content: prompt }];
	const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) \|\| routeByCapability(messages));
	if (!entry) return res.status(404).json({ error: `Model not found: ${modelName}` });

	if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
	return res.status(403).json({ error: `${entry.name} requires a paid plan.`, upgrade: PAYMENTS_URL });
	}

	const routerModel = entry.router_model \|\| "Qwen/Qwen3.5-9B";
	const result = await hfChat(routerModel, messages, false, options \|\| {});
	if (req.apiKey) recordUsage(req.apiKey, entry.alias);

	res.json({ model: entry.alias, created_at: new Date().toISOString(), response: result.choices?.[0]?.message?.content \|\| "", done: true });
	} catch (err) { res.status(502).json({ error: err.message }); }
	});

	app.post("/v1/chat/completions", authMiddleware, async (req, res) => {
	try {
	const { model: modelName, messages, stream, temperature, max_tokens, top_p } = req.body;
	const entry = modelName === "auto" ? routeByCapability(messages) : (resolveModel(modelName) \|\| routeByCapability(messages));
	if (!entry) return res.status(404).json({ error: { message: `Model not found: ${modelName}`, type: "invalid_request_error" } });

	if (req.tier === "free" && !req.allowedModels.includes(entry.alias) && !req.allowedModels.includes("all")) {
	return res.status(403).json({ error: { message: `${entry.name} requires a paid plan.`, type: "insufficient_quota" } });
	}

	const routerModel = entry.router_model \|\| "Qwen/Qwen3.5-9B";

	if (stream) {
	res.setHeader("Content-Type", "text/event-stream");
	res.setHeader("Cache-Control", "no-cache");
	res.setHeader("Connection", "keep-alive");
	try {
	const upstream = await hfChat(routerModel, messages, true, { temperature, max_tokens, top_p });
	upstream.on("data", (chunk) => res.write(chunk));
	upstream.on("end", () => { if (req.apiKey) recordUsage(req.apiKey, entry.alias); res.end(); });
	upstream.on("error", () => res.end());
	} catch (e) {
	res.write(`data: {"error":"${e.message}"}\n\n`);
	res.end();
	}
	return;
	}

	const result = await hfChat(routerModel, messages, false, { temperature, max_tokens, top_p });
	if (req.apiKey) recordUsage(req.apiKey, entry.alias);

	// Return clean OpenAI-compatible response
	res.json({
	id: `chatcmpl-${Date.now()}`,
	object: "chat.completion",
	created: Math.floor(Date.now() / 1000),
	model: entry.alias,
	choices: result.choices \|\| [{ index: 0, message: { role: "assistant", content: "" }, finish_reason: "stop" }],
	usage: result.usage \|\| { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
	});
	} catch (err) { res.status(502).json({ error: { message: err.message, type: "server_error" } }); }
	});

	// ─── Cleanup ─────────────────────────────────────────────────────────────────
	setInterval(() => {
	const today = new Date().toISOString().split("T")[0];
	for (const [key] of freeUsageToday) {
	if (!key.endsWith(`:${today}`)) freeUsageToday.delete(key);
	}
	}, 3600000);

	// ─── Start ───────────────────────────────────────────────────────────────────
	app.listen(PORT, "0.0.0.0", () => {
	console.log(`⚡ Scottzilla Gateway v6.1 \| :${PORT} \| ${models.length} models`);
	console.log(` Internal agents: unlimited \| Free tier: 25/day \| Cache: active`);
	});