""" QuickSilver Pro Chat — Hugging Face Space. A zero-friction try-it demo for QuickSilver Pro. Anyone on HF can chat with DeepSeek V3 / R1 / Qwen 3.5 through our OpenAI-compatible endpoint, without creating an account first. The goal is top-of-funnel discoverability: the banner at the bottom sends them to quicksilverpro.io for their own key. Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly budget cap configured on the QSP side. In-process per-session rate-limit keeps casual spam from spiking the bill. Outbound links all carry `?ref=GHKN4L37` — the reserved REFERRAL_CODES entry earmarked for HF-sourced signups. Lets us attribute signup volume from this Space separate from other channels (Discord, Twitter, direct). """ from __future__ import annotations import os import time from collections import deque from typing import Iterable import gradio as gr from openai import OpenAI # ────────────────────────── Configuration ────────────────────────── QSP_KEY = os.environ.get("QSP_KEY", "").strip() QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1") # Attribution code for this Space — reserved from REFERRAL_CODES private pool # per growth/PROMO.md. Hardcoded here rather than env-configured because it # never changes (a single Space = a single attribution bucket). REF_CODE = "GHKN4L37" SIGNUP_URL = f"https://quicksilverpro.io/?ref={REF_CODE}" CLI_URL = "https://github.com/machinefi/qspro-cli" # Gradio Dropdown accepts (display_label, value) tuples; the callback # receives the value string, so we don't need to parse it back. MODELS = [ ("DeepSeek V3 — general-purpose, fast", "deepseek-v3"), ("DeepSeek R1 — reasoning, slower, deeper", "deepseek-r1"), ("Qwen 3.5-35B-A3B — 262K context, multilingual", "qwen3.5-35b"), ] DEFAULT_MODEL_VALUE = MODELS[0][1] DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant." # Per-session soft rate limit. Not a security boundary — the QSP-side budget # cap on the shared key is. This just keeps one noisy session from blowing # through the daily allowance in 90 seconds. RATE_WINDOW_SEC = 60 RATE_MAX_MSGS = 8 _session_buckets: dict[str, deque] = {} # Tracks which sessions have already received the first-response nudge, so # we only attach it once per session instead of on every assistant message. _session_nudged: set[str] = set() # Appended to the first assistant response per session. Markdown-safe. The # "---" horizontal rule visually separates the nudge from real model output # so users don't confuse it with generated content. NUDGE_MD = ( f"\n\n---\n\n" f"💡 *Liked this? [Get your own key]({SIGNUP_URL}) — $5 in free credits, " f"no card required. Or `pip install quicksilverpro` for the [CLI]({CLI_URL}).*" ) def _rate_limited(session_hash: str) -> bool: now = time.time() bucket = _session_buckets.setdefault(session_hash, deque()) while bucket and now - bucket[0] > RATE_WINDOW_SEC: bucket.popleft() if len(bucket) >= RATE_MAX_MSGS: return True bucket.append(now) return False # ────────────────────────── OpenAI client ────────────────────────── if not QSP_KEY: # Don't crash on import — let the UI render a clear error banner instead, # so the Space owner sees "QSP_KEY secret not set" rather than a 500. client = None else: client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY) def respond( message: str, history: list[tuple[str, str]], model: str, system_prompt: str, temperature: float, max_tokens: int, request: gr.Request | None = None, ) -> Iterable[str]: if client is None: yield ( "⚠️ Space misconfigured: `QSP_KEY` secret is not set. " "Owner: configure it in Settings → Variables and secrets." ) return session_hash = (request.session_hash if request else "anon") or "anon" if _rate_limited(session_hash): yield ( f"⏳ Rate limit reached ({RATE_MAX_MSGS} messages / " f"{RATE_WINDOW_SEC}s). Take a breath, then try again." ) return is_first_response = not (history or []) messages: list[dict[str, str]] = [] if system_prompt.strip(): messages.append({"role": "system", "content": system_prompt.strip()}) for user_msg, assistant_msg in history or []: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) messages.append({"role": "user", "content": message}) try: stream = client.chat.completions.create( model=model, messages=messages, temperature=float(temperature), max_tokens=int(max_tokens), stream=True, ) except Exception as e: yield f"❌ API error: {type(e).__name__}: {str(e)[:300]}" return accumulated = "" for chunk in stream: try: delta = chunk.choices[0].delta.content or "" except (AttributeError, IndexError): delta = "" if delta: accumulated += delta yield accumulated # Append the signup nudge to the first assistant response of the session # only — a persistent nudge on every turn would feel spammy. Guarded by a # set of session hashes so a fast re-click doesn't double-attach. if is_first_response and session_hash not in _session_nudged: _session_nudged.add(session_hash) yield accumulated + NUDGE_MD # ────────────────────────── UI ────────────────────────── HEADER_MD = f""" # ⚡ QuickSilver Pro Chat Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here. Running on [QuickSilver Pro]({SIGNUP_URL}) · Get your own key ($5 free credits): [{SIGNUP_URL.replace('https://', '')}]({SIGNUP_URL}) · CLI: `pip install quicksilverpro` """ FOOTER_MD = f""" --- Powered by QuickSilver Pro — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by MachineFi Labs. """ with gr.Blocks(title="QuickSilver Pro Chat") as demo: gr.Markdown(HEADER_MD) with gr.Row(): with gr.Column(scale=1): model_dropdown = gr.Dropdown( choices=MODELS, value=DEFAULT_MODEL_VALUE, label="Model", interactive=True, ) system_prompt = gr.Textbox( label="System prompt", value=DEFAULT_SYSTEM_PROMPT, lines=3, max_lines=8, ) temperature = gr.Slider( label="Temperature", minimum=0.0, maximum=2.0, step=0.1, value=0.7 ) max_tokens = gr.Slider( label="Max tokens", minimum=64, maximum=4096, step=64, value=1024 ) with gr.Column(scale=3): # Gradio 6.0 removed the submit_btn / retry_btn / undo_btn / clear_btn # args in favor of a more opinionated default layout; dropping them # keeps this compatible with both 5.x and 6.x. gr.ChatInterface( fn=respond, additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens], examples=[ ["Write a concise git commit message for: fixed off-by-one error in pagination"], ["Explain closures in JavaScript in 2 sentences"], ["What's the fastest sorting algorithm for 100k integers and why?"], ["Translate 'Hello, how are you?' into formal Japanese, Hindi, and Russian"], ], cache_examples=False, ) gr.Markdown(FOOTER_MD) if __name__ == "__main__": demo.queue(default_concurrency_limit=4, max_size=64).launch()