Spaces:
Running
Running
feat: cleaner model labels + ?ref attribution + first-response nudge
Browse files
app.py
CHANGED
|
@@ -9,6 +9,10 @@ banner at the bottom sends them to quicksilverpro.io for their own key.
|
|
| 9 |
Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
|
| 10 |
budget cap configured on the QSP side. In-process per-session rate-limit
|
| 11 |
keeps casual spam from spiking the bill.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
from __future__ import annotations
|
|
@@ -26,13 +30,21 @@ from openai import OpenAI
|
|
| 26 |
QSP_KEY = os.environ.get("QSP_KEY", "").strip()
|
| 27 |
QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
MODELS = [
|
| 30 |
-
("
|
| 31 |
-
("
|
| 32 |
-
("
|
| 33 |
]
|
| 34 |
-
|
| 35 |
-
DEFAULT_MODEL_LABEL = MODEL_CHOICES[0]
|
| 36 |
|
| 37 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
|
| 38 |
|
|
@@ -43,6 +55,18 @@ RATE_WINDOW_SEC = 60
|
|
| 43 |
RATE_MAX_MSGS = 8
|
| 44 |
|
| 45 |
_session_buckets: dict[str, deque] = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
|
| 48 |
def _rate_limited(session_hash: str) -> bool:
|
|
@@ -66,14 +90,10 @@ else:
|
|
| 66 |
client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
|
| 67 |
|
| 68 |
|
| 69 |
-
def _parse_model_label(label: str) -> str:
|
| 70 |
-
return label.split(" — ", 1)[0]
|
| 71 |
-
|
| 72 |
-
|
| 73 |
def respond(
|
| 74 |
message: str,
|
| 75 |
history: list[tuple[str, str]],
|
| 76 |
-
|
| 77 |
system_prompt: str,
|
| 78 |
temperature: float,
|
| 79 |
max_tokens: int,
|
|
@@ -94,7 +114,8 @@ def respond(
|
|
| 94 |
)
|
| 95 |
return
|
| 96 |
|
| 97 |
-
|
|
|
|
| 98 |
messages: list[dict[str, str]] = []
|
| 99 |
if system_prompt.strip():
|
| 100 |
messages.append({"role": "system", "content": system_prompt.strip()})
|
|
@@ -127,31 +148,37 @@ def respond(
|
|
| 127 |
accumulated += delta
|
| 128 |
yield accumulated
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
# ────────────────────────── UI ──────────────────────────
|
| 132 |
|
| 133 |
-
HEADER_MD = """
|
| 134 |
# ⚡ QuickSilver Pro Chat
|
| 135 |
|
| 136 |
Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
|
| 137 |
|
| 138 |
-
<sub>Running on [QuickSilver Pro](
|
| 139 |
"""
|
| 140 |
|
| 141 |
-
FOOTER_MD = """
|
| 142 |
---
|
| 143 |
-
<sub>Powered by <a href="
|
| 144 |
"""
|
| 145 |
|
| 146 |
-
# theme moved to launch() in Gradio 6, dropped here to stay forward-compatible
|
| 147 |
with gr.Blocks(title="QuickSilver Pro Chat") as demo:
|
| 148 |
gr.Markdown(HEADER_MD)
|
| 149 |
|
| 150 |
with gr.Row():
|
| 151 |
with gr.Column(scale=1):
|
| 152 |
model_dropdown = gr.Dropdown(
|
| 153 |
-
choices=
|
| 154 |
-
value=
|
| 155 |
label="Model",
|
| 156 |
interactive=True,
|
| 157 |
)
|
|
@@ -168,9 +195,9 @@ with gr.Blocks(title="QuickSilver Pro Chat") as demo:
|
|
| 168 |
label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
|
| 169 |
)
|
| 170 |
with gr.Column(scale=3):
|
| 171 |
-
# Gradio 6.0 removed submit_btn / retry_btn / undo_btn / clear_btn
|
| 172 |
-
# in favor of a more opinionated default layout; dropping them
|
| 173 |
-
# this compatible with both 5.x and 6.x.
|
| 174 |
gr.ChatInterface(
|
| 175 |
fn=respond,
|
| 176 |
additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],
|
|
|
|
| 9 |
Single-tenant QSP key (stored as the `QSP_KEY` Space secret) with a monthly
|
| 10 |
budget cap configured on the QSP side. In-process per-session rate-limit
|
| 11 |
keeps casual spam from spiking the bill.
|
| 12 |
+
|
| 13 |
+
Outbound links all carry `?ref=GHKN4L37` — the reserved REFERRAL_CODES entry
|
| 14 |
+
earmarked for HF-sourced signups. Lets us attribute signup volume from this
|
| 15 |
+
Space separate from other channels (Discord, Twitter, direct).
|
| 16 |
"""
|
| 17 |
|
| 18 |
from __future__ import annotations
|
|
|
|
| 30 |
QSP_KEY = os.environ.get("QSP_KEY", "").strip()
|
| 31 |
QSP_BASE = os.environ.get("QSP_BASE", "https://api.quicksilverpro.io/v1")
|
| 32 |
|
| 33 |
+
# Attribution code for this Space — reserved from REFERRAL_CODES private pool
|
| 34 |
+
# per growth/PROMO.md. Hardcoded here rather than env-configured because it
|
| 35 |
+
# never changes (a single Space = a single attribution bucket).
|
| 36 |
+
REF_CODE = "GHKN4L37"
|
| 37 |
+
SIGNUP_URL = f"https://quicksilverpro.io/?ref={REF_CODE}"
|
| 38 |
+
CLI_URL = "https://github.com/machinefi/qspro-cli"
|
| 39 |
+
|
| 40 |
+
# Gradio Dropdown accepts (display_label, value) tuples; the callback
|
| 41 |
+
# receives the value string, so we don't need to parse it back.
|
| 42 |
MODELS = [
|
| 43 |
+
("DeepSeek V3 — general-purpose, fast", "deepseek-v3"),
|
| 44 |
+
("DeepSeek R1 — reasoning, slower, deeper", "deepseek-r1"),
|
| 45 |
+
("Qwen 3.5-35B-A3B — 262K context, multilingual", "qwen3.5-35b"),
|
| 46 |
]
|
| 47 |
+
DEFAULT_MODEL_VALUE = MODELS[0][1]
|
|
|
|
| 48 |
|
| 49 |
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant."
|
| 50 |
|
|
|
|
| 55 |
RATE_MAX_MSGS = 8
|
| 56 |
|
| 57 |
_session_buckets: dict[str, deque] = {}
|
| 58 |
+
# Tracks which sessions have already received the first-response nudge, so
|
| 59 |
+
# we only attach it once per session instead of on every assistant message.
|
| 60 |
+
_session_nudged: set[str] = set()
|
| 61 |
+
|
| 62 |
+
# Appended to the first assistant response per session. Markdown-safe. The
|
| 63 |
+
# "---" horizontal rule visually separates the nudge from real model output
|
| 64 |
+
# so users don't confuse it with generated content.
|
| 65 |
+
NUDGE_MD = (
|
| 66 |
+
f"\n\n---\n\n"
|
| 67 |
+
f"💡 *Liked this? [Get your own key]({SIGNUP_URL}) — $5 in free credits, "
|
| 68 |
+
f"no card required. Or `pip install quicksilverpro` for the [CLI]({CLI_URL}).*"
|
| 69 |
+
)
|
| 70 |
|
| 71 |
|
| 72 |
def _rate_limited(session_hash: str) -> bool:
|
|
|
|
| 90 |
client = OpenAI(base_url=QSP_BASE, api_key=QSP_KEY)
|
| 91 |
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def respond(
|
| 94 |
message: str,
|
| 95 |
history: list[tuple[str, str]],
|
| 96 |
+
model: str,
|
| 97 |
system_prompt: str,
|
| 98 |
temperature: float,
|
| 99 |
max_tokens: int,
|
|
|
|
| 114 |
)
|
| 115 |
return
|
| 116 |
|
| 117 |
+
is_first_response = not (history or [])
|
| 118 |
+
|
| 119 |
messages: list[dict[str, str]] = []
|
| 120 |
if system_prompt.strip():
|
| 121 |
messages.append({"role": "system", "content": system_prompt.strip()})
|
|
|
|
| 148 |
accumulated += delta
|
| 149 |
yield accumulated
|
| 150 |
|
| 151 |
+
# Append the signup nudge to the first assistant response of the session
|
| 152 |
+
# only — a persistent nudge on every turn would feel spammy. Guarded by a
|
| 153 |
+
# set of session hashes so a fast re-click doesn't double-attach.
|
| 154 |
+
if is_first_response and session_hash not in _session_nudged:
|
| 155 |
+
_session_nudged.add(session_hash)
|
| 156 |
+
yield accumulated + NUDGE_MD
|
| 157 |
+
|
| 158 |
|
| 159 |
# ────────────────────────── UI ──────────────────────────
|
| 160 |
|
| 161 |
+
HEADER_MD = f"""
|
| 162 |
# ⚡ QuickSilver Pro Chat
|
| 163 |
|
| 164 |
Try **DeepSeek V3 / R1** and **Qwen 3.5-35B-A3B** via an OpenAI-compatible API — no signup needed here.
|
| 165 |
|
| 166 |
+
<sub>Running on [QuickSilver Pro]({SIGNUP_URL}) · Get your own key ($5 free credits): [{SIGNUP_URL.replace('https://', '')}]({SIGNUP_URL}) · CLI: `pip install quicksilverpro`</sub>
|
| 167 |
"""
|
| 168 |
|
| 169 |
+
FOOTER_MD = f"""
|
| 170 |
---
|
| 171 |
+
<sub>Powered by <a href="{SIGNUP_URL}">QuickSilver Pro</a> — open-source LLM inference, OpenAI-compatible, ~20% below OpenRouter / Together / Fireworks. Built by <a href="{SIGNUP_URL}">MachineFi Labs</a>.</sub>
|
| 172 |
"""
|
| 173 |
|
|
|
|
| 174 |
with gr.Blocks(title="QuickSilver Pro Chat") as demo:
|
| 175 |
gr.Markdown(HEADER_MD)
|
| 176 |
|
| 177 |
with gr.Row():
|
| 178 |
with gr.Column(scale=1):
|
| 179 |
model_dropdown = gr.Dropdown(
|
| 180 |
+
choices=MODELS,
|
| 181 |
+
value=DEFAULT_MODEL_VALUE,
|
| 182 |
label="Model",
|
| 183 |
interactive=True,
|
| 184 |
)
|
|
|
|
| 195 |
label="Max tokens", minimum=64, maximum=4096, step=64, value=1024
|
| 196 |
)
|
| 197 |
with gr.Column(scale=3):
|
| 198 |
+
# Gradio 6.0 removed the submit_btn / retry_btn / undo_btn / clear_btn
|
| 199 |
+
# args in favor of a more opinionated default layout; dropping them
|
| 200 |
+
# keeps this compatible with both 5.x and 6.x.
|
| 201 |
gr.ChatInterface(
|
| 202 |
fn=respond,
|
| 203 |
additional_inputs=[model_dropdown, system_prompt, temperature, max_tokens],
|