Spaces:

liovina
/

nl-sql

Sleeping

App Files Files Community

nl-sql / src /nl_sql /llm /providers /perplexity.py

liovina

Deploy NL_SQL HEAD to HF Space

942050b verified 23 days ago

raw

history blame contribute delete

8.19 kB

	"""Perplexity browser provider — routes generation through a local GraceKelly
	instance which drives the Perplexity Pro web UI via Playwright.

	Why this exists:
	- Project budget is $0. Direct Anthropic / OpenAI APIs are paid.
	- A live Perplexity Pro subscription exposes frontier models (Claude Sonnet
	4.6 + reasoning, GPT-5.4, Gemini Pro, Kimi K2.5) at no incremental cost
	per request.
	- GraceKelly (D:\\GraceKelly) is an existing local FastAPI service that
	already handles Playwright session management, model selection, the
	"thinking" toggle, and prompt submit/parse against the Perplexity UI.

	We just hit `POST /api/v1/pipeline` with `{prompt, model}` and read the
	plain-text `answer` back. Latency is ~20-40s per call (browser path), so
	this provider is intended for evaluation runs and one-off probes, not for
	the interactive Streamlit chat surface.
	"""

	from __future__ import annotations

	import json
	import re
	import time
	from urllib import error as urlerror
	from urllib import request as urlrequest

	from nl_sql.llm.providers.base import (
	GenerateRequest,
	GenerateResponse,
	ProviderError,
	)

	# ANSI escape sequences (terminal colour / formatting codes). Perplexity's
	# web UI sometimes renders model output through a Markdown pipeline that
	# leaves these codes in the copy-back text — e.g. `[4m`/`[0m` (underline
	# on/off) around tool argument quotes. They break downstream JSON parsing.
	_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]\|\[[0-9;]+m")

	# Sonnet routed through Perplexity sometimes returns its answer as the
	# rendered JSON of the `generate_sql` output contract — a literal
	# `{"sql": "...", "rationale": "..."}` string instead of a bare SQL
	# statement. The downstream `_strip_to_sql` parser doesn't recognise this
	# shape and falls back to grabbing everything after the first `SELECT`,
	# trailing `","rationale":...` and all — which then 400s in sqlglot as
	# invalid SQL. Pre-unwrap the JSON here so the parser sees clean SQL.
	_SQL_JSON_HINT = re.compile(r'^\s\{."sql"\s*:', re.DOTALL)

	# Tolerant regex for the `"sql": "..."` key/value pair. Used as a fallback
	# when `_SQL_JSON_HINT` confirms envelope shape but strict `json.loads`
	# fails — typically because the Perplexity Markdown layer leaves literal
	# newlines inside the SQL string (legal SQL, illegal JSON). The
	# `\\.\|[^"\\]` alternation captures `\"` / `\\` escape sequences as well
	# as any non-quote / non-backslash byte (newlines included, since they
	# aren't in the negated class). Anchoring to envelope-shaped responses
	# (via the gate above) prevents this from false-positiving on bare SQL
	# that happens to contain `"sql":` as a literal substring.
	_SQL_KV_RE = re.compile(r'"sql"\s:\s"((?:\\.\|[^"\\])*)"')

	# Single-pass decoder for the JSON string escapes we actually see in
	# Perplexity output. Sequential `.replace()` would double-decode — e.g.,
	# `\\n` (three source bytes `\`, `\`, `n`, which JSON parses to `\n`,
	# two chars: backslash + n) would first collapse `\\` → `\`, producing
	# `\n` (two bytes), and a subsequent `\n` → newline pass would corrupt
	# it into a real newline. Regex-substitution processes each escape
	# exactly once.
	_JSON_ESCAPE_RE = re.compile(r"\\(.)")
	_JSON_ESCAPE_TABLE = {
	'"': '"',
	"\\": "\\",
	"/": "/",
	"b": "\b",
	"f": "\f",
	"n": "\n",
	"r": "\r",
	"t": "\t",
	}


	def _decode_json_string_escapes(raw: str) -> str:
	"""Decode JSON string escapes left-to-right in a single pass.

	Unknown escapes (e.g., `\\u` Unicode sequences) pass through unchanged
	— the real Perplexity payloads don't use them, and handling them
	properly would require pulling a real JSON parser back in (defeating
	the point of this tolerant fallback).
	"""
	return _JSON_ESCAPE_RE.sub(
	lambda m: _JSON_ESCAPE_TABLE.get(m.group(1), m.group(0)),
	raw,
	)


	def _unwrap_sql_json(text: str) -> str:
	"""If `text` is the JSON output-contract envelope, return just the SQL."""
	if not _SQL_JSON_HINT.match(text):
	return text
	try:
	obj = json.loads(text)
	except json.JSONDecodeError:
	# Tolerate trailing prose after the JSON object by snipping at the
	# final balanced brace and retrying.
	last = text.rfind("}")
	obj = None
	if last != -1:
	try:
	obj = json.loads(text[: last + 1])
	except json.JSONDecodeError:
	obj = None
	if isinstance(obj, dict):
	sql = obj.get("sql")
	if isinstance(sql, str) and sql.strip():
	return sql.strip()
	# Regex fallback: strict JSON parsing fails when the SQL value contains
	# literal newlines (which the Perplexity Markdown layer routinely
	# inserts). Pull the SQL value out by structure. Safe because the
	# envelope shape is already confirmed by `_SQL_JSON_HINT.match` above.
	match = _SQL_KV_RE.search(text)
	if match:
	sql = _decode_json_string_escapes(match.group(1)).strip()
	if sql:
	return sql
	return text


	class PerplexityProvider:
	"""LLMProvider that proxies generate() calls to a local GraceKelly server.

	GraceKelly drives the Perplexity web UI via Playwright with a logged-in
	Chrome profile, so the model behind the scenes is whichever the caller
	picked in Perplexity's model menu (default `claude-sonnet-4-6` here,
	which corresponds to Claude Sonnet 4.6 with reasoning enabled).
	"""

	name: str = "perplexity"

	def __init__(
	self,
	*,
	model: str = "claude-sonnet-4-6",
	base_url: str = "http://127.0.0.1:8011",
	timeout_seconds: float = 180.0,
	) -> None:
	if not model.strip():
	raise ProviderError("PerplexityProvider requires non-empty model")
	self.model = model
	self._base_url = base_url.rstrip("/")
	self._timeout = timeout_seconds

	def generate(self, req: GenerateRequest) -> GenerateResponse:
	prompt = req.prompt
	if req.system:
	prompt = f"{req.system}\n\n{prompt}"
	payload = json.dumps(
	{
	"prompt": prompt,
	"model": self.model,
	"reliability_level": "quick",
	"multi_model": False,
	"dry_run": False,
	}
	).encode("utf-8")

	http_request = urlrequest.Request(
	f"{self._base_url}/api/v1/pipeline",
	data=payload,
	headers={
	"Content-Type": "application/json",
	"Accept": "application/json",
	},
	method="POST",
	)
	t0 = time.perf_counter()
	try:
	with urlrequest.urlopen(http_request, timeout=self._timeout) as response:
	body = response.read().decode("utf-8")
	except urlerror.HTTPError as exc:
	raise ProviderError(
	f"GraceKelly /api/v1/pipeline returned {exc.code}: "
	f"{exc.read().decode('utf-8', errors='replace')[:400]}"
	) from exc
	except (urlerror.URLError, TimeoutError) as exc:
	raise ProviderError(
	f"GraceKelly unreachable at {self._base_url}: {exc!r}. "
	"Start it with `python -m uvicorn gracekelly.main:create_app "
	"--factory --host 127.0.0.1 --port 8011` "
	"and set GRACEKELLY_EXECUTION_PROFILE=hybrid."
	) from exc

	elapsed_ms = (time.perf_counter() - t0) * 1000.0
	parsed = json.loads(body)
	answer = _ANSI_RE.sub("", str(parsed.get("answer") or ""))
	answer = _unwrap_sql_json(answer)
	# Perplexity browser path does not surface token counts. Use word
	# count as a coarse proxy so the eval reports show something
	# plausible without misrepresenting actual billing units.
	approx_in = max(1, len(prompt.split()))
	approx_out = max(1, len(answer.split()))
	return GenerateResponse(
	text=answer,
	model=str(parsed.get("model_id") or self.model),
	input_tokens=approx_in,
	output_tokens=approx_out,
	latency_ms=elapsed_ms,
	)