"""Perplexity browser provider — routes generation through a local GraceKelly instance which drives the Perplexity Pro web UI via Playwright. Why this exists: - Project budget is $0. Direct Anthropic / OpenAI APIs are paid. - A live Perplexity Pro subscription exposes frontier models (Claude Sonnet 4.6 + reasoning, GPT-5.4, Gemini Pro, Kimi K2.5) at no incremental cost per request. - GraceKelly (D:\\GraceKelly) is an existing local FastAPI service that already handles Playwright session management, model selection, the "thinking" toggle, and prompt submit/parse against the Perplexity UI. We just hit `POST /api/v1/pipeline` with `{prompt, model}` and read the plain-text `answer` back. Latency is ~20-40s per call (browser path), so this provider is intended for evaluation runs and one-off probes, not for the interactive Streamlit chat surface. """ from __future__ import annotations import json import re import time from urllib import error as urlerror from urllib import request as urlrequest from nl_sql.llm.providers.base import ( GenerateRequest, GenerateResponse, ProviderError, ) # ANSI escape sequences (terminal colour / formatting codes). Perplexity's # web UI sometimes renders model output through a Markdown pipeline that # leaves these codes in the copy-back text — e.g. `[4m`/`[0m` (underline # on/off) around tool argument quotes. They break downstream JSON parsing. _ANSI_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\[[0-9;]+m") # Sonnet routed through Perplexity sometimes returns its answer as the # rendered JSON of the `generate_sql` output contract — a literal # `{"sql": "...", "rationale": "..."}` string instead of a bare SQL # statement. The downstream `_strip_to_sql` parser doesn't recognise this # shape and falls back to grabbing everything after the first `SELECT`, # trailing `","rationale":...` and all — which then 400s in sqlglot as # invalid SQL. Pre-unwrap the JSON here so the parser sees clean SQL. _SQL_JSON_HINT = re.compile(r'^\s*\{.*"sql"\s*:', re.DOTALL) # Tolerant regex for the `"sql": "..."` key/value pair. Used as a fallback # when `_SQL_JSON_HINT` confirms envelope shape but strict `json.loads` # fails — typically because the Perplexity Markdown layer leaves literal # newlines inside the SQL string (legal SQL, illegal JSON). The # `\\.|[^"\\]` alternation captures `\"` / `\\` escape sequences as well # as any non-quote / non-backslash byte (newlines included, since they # aren't in the negated class). Anchoring to envelope-shaped responses # (via the gate above) prevents this from false-positiving on bare SQL # that happens to contain `"sql":` as a literal substring. _SQL_KV_RE = re.compile(r'"sql"\s*:\s*"((?:\\.|[^"\\])*)"') # Single-pass decoder for the JSON string escapes we actually see in # Perplexity output. Sequential `.replace()` would double-decode — e.g., # `\\n` (three source bytes `\`, `\`, `n`, which JSON parses to `\n`, # two chars: backslash + n) would first collapse `\\` → `\`, producing # `\n` (two bytes), and a subsequent `\n` → newline pass would corrupt # it into a real newline. Regex-substitution processes each escape # exactly once. _JSON_ESCAPE_RE = re.compile(r"\\(.)") _JSON_ESCAPE_TABLE = { '"': '"', "\\": "\\", "/": "/", "b": "\b", "f": "\f", "n": "\n", "r": "\r", "t": "\t", } def _decode_json_string_escapes(raw: str) -> str: """Decode JSON string escapes left-to-right in a single pass. Unknown escapes (e.g., `\\u` Unicode sequences) pass through unchanged — the real Perplexity payloads don't use them, and handling them properly would require pulling a real JSON parser back in (defeating the point of this tolerant fallback). """ return _JSON_ESCAPE_RE.sub( lambda m: _JSON_ESCAPE_TABLE.get(m.group(1), m.group(0)), raw, ) def _unwrap_sql_json(text: str) -> str: """If `text` is the JSON output-contract envelope, return just the SQL.""" if not _SQL_JSON_HINT.match(text): return text try: obj = json.loads(text) except json.JSONDecodeError: # Tolerate trailing prose after the JSON object by snipping at the # final balanced brace and retrying. last = text.rfind("}") obj = None if last != -1: try: obj = json.loads(text[: last + 1]) except json.JSONDecodeError: obj = None if isinstance(obj, dict): sql = obj.get("sql") if isinstance(sql, str) and sql.strip(): return sql.strip() # Regex fallback: strict JSON parsing fails when the SQL value contains # literal newlines (which the Perplexity Markdown layer routinely # inserts). Pull the SQL value out by structure. Safe because the # envelope shape is already confirmed by `_SQL_JSON_HINT.match` above. match = _SQL_KV_RE.search(text) if match: sql = _decode_json_string_escapes(match.group(1)).strip() if sql: return sql return text class PerplexityProvider: """LLMProvider that proxies generate() calls to a local GraceKelly server. GraceKelly drives the Perplexity web UI via Playwright with a logged-in Chrome profile, so the model behind the scenes is whichever the caller picked in Perplexity's model menu (default `claude-sonnet-4-6` here, which corresponds to Claude Sonnet 4.6 with reasoning enabled). """ name: str = "perplexity" def __init__( self, *, model: str = "claude-sonnet-4-6", base_url: str = "http://127.0.0.1:8011", timeout_seconds: float = 180.0, ) -> None: if not model.strip(): raise ProviderError("PerplexityProvider requires non-empty model") self.model = model self._base_url = base_url.rstrip("/") self._timeout = timeout_seconds def generate(self, req: GenerateRequest) -> GenerateResponse: prompt = req.prompt if req.system: prompt = f"{req.system}\n\n{prompt}" payload = json.dumps( { "prompt": prompt, "model": self.model, "reliability_level": "quick", "multi_model": False, "dry_run": False, } ).encode("utf-8") http_request = urlrequest.Request( f"{self._base_url}/api/v1/pipeline", data=payload, headers={ "Content-Type": "application/json", "Accept": "application/json", }, method="POST", ) t0 = time.perf_counter() try: with urlrequest.urlopen(http_request, timeout=self._timeout) as response: body = response.read().decode("utf-8") except urlerror.HTTPError as exc: raise ProviderError( f"GraceKelly /api/v1/pipeline returned {exc.code}: " f"{exc.read().decode('utf-8', errors='replace')[:400]}" ) from exc except (urlerror.URLError, TimeoutError) as exc: raise ProviderError( f"GraceKelly unreachable at {self._base_url}: {exc!r}. " "Start it with `python -m uvicorn gracekelly.main:create_app " "--factory --host 127.0.0.1 --port 8011` " "and set GRACEKELLY_EXECUTION_PROFILE=hybrid." ) from exc elapsed_ms = (time.perf_counter() - t0) * 1000.0 parsed = json.loads(body) answer = _ANSI_RE.sub("", str(parsed.get("answer") or "")) answer = _unwrap_sql_json(answer) # Perplexity browser path does not surface token counts. Use word # count as a coarse proxy so the eval reports show something # plausible without misrepresenting actual billing units. approx_in = max(1, len(prompt.split())) approx_out = max(1, len(answer.split())) return GenerateResponse( text=answer, model=str(parsed.get("model_id") or self.model), input_tokens=approx_in, output_tokens=approx_out, latency_ms=elapsed_ms, )