"""Perplexity browser provider — routes generation through a local GraceKelly
instance which drives the Perplexity Pro web UI via Playwright.

Why this exists:
- Project budget is $0. Direct Anthropic / OpenAI APIs are paid.
- A live Perplexity Pro subscription exposes frontier models (Claude Sonnet
  4.6 + reasoning, GPT-5.4, Gemini Pro, Kimi K2.5) at no incremental cost
  per request.
- GraceKelly (D:\\GraceKelly) is an existing local FastAPI service that
  already handles Playwright session management, model selection, the
  "thinking" toggle, and prompt submit/parse against the Perplexity UI.

We just hit `POST /api/v1/pipeline` with `{prompt, model}` and read the
plain-text `answer` back. Latency is ~20-40s per call (browser path), so
this provider is intended for evaluation runs and one-off probes, not for
the interactive Streamlit chat surface.
"""

from __future__ import annotations

import json
import re
import time
from urllib import error as urlerror
from urllib import request as urlrequest

from nl_sql.llm.providers.base import (
    GenerateRequest,
    GenerateResponse,
    ProviderError,
)

# ANSI escape sequences (terminal colour / formatting codes). Perplexity's
# web UI sometimes renders model output through a Markdown pipeline that
# leaves these codes in the copy-back text — e.g. `[4m`/`[0m` (underline
# on/off) around tool argument quotes. They break downstream JSON parsing.
_ANSI_RE = re.compile(r"\x1b\[[0-9;]*[A-Za-z]|\[[0-9;]+m")

# Sonnet routed through Perplexity sometimes returns its answer as the
# rendered JSON of the `generate_sql` output contract — a literal
# `{"sql": "...", "rationale": "..."}` string instead of a bare SQL
# statement. The downstream `_strip_to_sql` parser doesn't recognise this
# shape and falls back to grabbing everything after the first `SELECT`,
# trailing `","rationale":...` and all — which then 400s in sqlglot as
# invalid SQL. Pre-unwrap the JSON here so the parser sees clean SQL.
_SQL_JSON_HINT = re.compile(r'^\s*\{.*"sql"\s*:', re.DOTALL)

# Tolerant regex for the `"sql": "..."` key/value pair. Used as a fallback
# when `_SQL_JSON_HINT` confirms envelope shape but strict `json.loads`
# fails — typically because the Perplexity Markdown layer leaves literal
# newlines inside the SQL string (legal SQL, illegal JSON). The
# `\\.|[^"\\]` alternation captures `\"` / `\\` escape sequences as well
# as any non-quote / non-backslash byte (newlines included, since they
# aren't in the negated class). Anchoring to envelope-shaped responses
# (via the gate above) prevents this from false-positiving on bare SQL
# that happens to contain `"sql":` as a literal substring.
_SQL_KV_RE = re.compile(r'"sql"\s*:\s*"((?:\\.|[^"\\])*)"')

# Single-pass decoder for the JSON string escapes we actually see in
# Perplexity output. Sequential `.replace()` would double-decode — e.g.,
# `\\n` (three source bytes `\`, `\`, `n`, which JSON parses to `\n`,
# two chars: backslash + n) would first collapse `\\` → `\`, producing
# `\n` (two bytes), and a subsequent `\n` → newline pass would corrupt
# it into a real newline. Regex-substitution processes each escape
# exactly once.
_JSON_ESCAPE_RE = re.compile(r"\\(.)")
_JSON_ESCAPE_TABLE = {
    '"': '"',
    "\\": "\\",
    "/": "/",
    "b": "\b",
    "f": "\f",
    "n": "\n",
    "r": "\r",
    "t": "\t",
}


def _decode_json_string_escapes(raw: str) -> str:
    """Decode JSON string escapes left-to-right in a single pass.

    Unknown escapes (e.g., `\\u` Unicode sequences) pass through unchanged
    — the real Perplexity payloads don't use them, and handling them
    properly would require pulling a real JSON parser back in (defeating
    the point of this tolerant fallback).
    """
    return _JSON_ESCAPE_RE.sub(
        lambda m: _JSON_ESCAPE_TABLE.get(m.group(1), m.group(0)),
        raw,
    )


def _unwrap_sql_json(text: str) -> str:
    """If `text` is the JSON output-contract envelope, return just the SQL."""
    if not _SQL_JSON_HINT.match(text):
        return text
    try:
        obj = json.loads(text)
    except json.JSONDecodeError:
        # Tolerate trailing prose after the JSON object by snipping at the
        # final balanced brace and retrying.
        last = text.rfind("}")
        obj = None
        if last != -1:
            try:
                obj = json.loads(text[: last + 1])
            except json.JSONDecodeError:
                obj = None
    if isinstance(obj, dict):
        sql = obj.get("sql")
        if isinstance(sql, str) and sql.strip():
            return sql.strip()
    # Regex fallback: strict JSON parsing fails when the SQL value contains
    # literal newlines (which the Perplexity Markdown layer routinely
    # inserts). Pull the SQL value out by structure. Safe because the
    # envelope shape is already confirmed by `_SQL_JSON_HINT.match` above.
    match = _SQL_KV_RE.search(text)
    if match:
        sql = _decode_json_string_escapes(match.group(1)).strip()
        if sql:
            return sql
    return text


class PerplexityProvider:
    """LLMProvider that proxies generate() calls to a local GraceKelly server.

    GraceKelly drives the Perplexity web UI via Playwright with a logged-in
    Chrome profile, so the model behind the scenes is whichever the caller
    picked in Perplexity's model menu (default `claude-sonnet-4-6` here,
    which corresponds to Claude Sonnet 4.6 with reasoning enabled).
    """

    name: str = "perplexity"

    def __init__(
        self,
        *,
        model: str = "claude-sonnet-4-6",
        base_url: str = "http://127.0.0.1:8011",
        timeout_seconds: float = 180.0,
    ) -> None:
        if not model.strip():
            raise ProviderError("PerplexityProvider requires non-empty model")
        self.model = model
        self._base_url = base_url.rstrip("/")
        self._timeout = timeout_seconds

    def generate(self, req: GenerateRequest) -> GenerateResponse:
        prompt = req.prompt
        if req.system:
            prompt = f"{req.system}\n\n{prompt}"
        payload = json.dumps(
            {
                "prompt": prompt,
                "model": self.model,
                "reliability_level": "quick",
                "multi_model": False,
                "dry_run": False,
            }
        ).encode("utf-8")

        http_request = urlrequest.Request(
            f"{self._base_url}/api/v1/pipeline",
            data=payload,
            headers={
                "Content-Type": "application/json",
                "Accept": "application/json",
            },
            method="POST",
        )
        t0 = time.perf_counter()
        try:
            with urlrequest.urlopen(http_request, timeout=self._timeout) as response:
                body = response.read().decode("utf-8")
        except urlerror.HTTPError as exc:
            raise ProviderError(
                f"GraceKelly /api/v1/pipeline returned {exc.code}: "
                f"{exc.read().decode('utf-8', errors='replace')[:400]}"
            ) from exc
        except (urlerror.URLError, TimeoutError) as exc:
            raise ProviderError(
                f"GraceKelly unreachable at {self._base_url}: {exc!r}. "
                "Start it with `python -m uvicorn gracekelly.main:create_app "
                "--factory --host 127.0.0.1 --port 8011` "
                "and set GRACEKELLY_EXECUTION_PROFILE=hybrid."
            ) from exc

        elapsed_ms = (time.perf_counter() - t0) * 1000.0
        parsed = json.loads(body)
        answer = _ANSI_RE.sub("", str(parsed.get("answer") or ""))
        answer = _unwrap_sql_json(answer)
        # Perplexity browser path does not surface token counts. Use word
        # count as a coarse proxy so the eval reports show something
        # plausible without misrepresenting actual billing units.
        approx_in = max(1, len(prompt.split()))
        approx_out = max(1, len(answer.split()))
        return GenerateResponse(
            text=answer,
            model=str(parsed.get("model_id") or self.model),
            input_tokens=approx_in,
            output_tokens=approx_out,
            latency_ms=elapsed_ms,
        )