"""HTML builder — assembles the full ISP Handbook HTML document.

Uses Jinja2 templates for HTML generation. Data preparation logic is
preserved from the original string-concatenation approach. The output
is a self-contained HTML suitable for Playwright Chromium PDF export.
"""

from __future__ import annotations

import base64
import logging
import mimetypes
import os
import re
from pathlib import Path
from typing import Any

from jinja2 import Environment, FileSystemLoader, select_autoescape
from markupsafe import Markup

from app.core.config import get_settings
from app.core.fonts import font_face_css, select_font_family
from app.services.normalizer import normalize_section, normalize_university
from app.services.renderers import (
    fetch_image_data_uri,
    render_global_blocks,
    sort_toc,
    _extract_university_funding,
)
from app.services.utils import (
    format_money_figures,
    get_any,
    h,
    handbook_anchor,
    hb_slug,
    is_truthy,
    sort_sections_stable,
)

logger = logging.getLogger(__name__)

# Jinja2 environment — templates live alongside the app package
_TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates"


def _get_jinja_env() -> Environment:
    """Create a Jinja2 environment pointing to our templates directory."""
    env = Environment(
        loader=FileSystemLoader(str(_TEMPLATES_DIR)),
        autoescape=select_autoescape(["html"]),
        trim_blocks=True,
        lstrip_blocks=True,
    )
    return env


def _static_base_url() -> str:
    """Return absolute file:// URL to the static directory."""
    static_dir = Path(__file__).resolve().parent.parent / "static"
    return static_dir.as_uri()


def _unused_pdf_override_css(font_stack: str) -> str:
    """Legacy inline PDF override CSS — kept for reference only.
    All styling now lives in static/css/print.css for Chromium rendering.
    """
    return ""


# Section class map
SECTION_CLASS_MAP = {
    "overview": "sec-overview",
    "how_the_program_works": "sec-how",
    "qualification_requirements": "sec-qualification",
    "enrolment_steps": "sec-steps",
    "withdrawal_refund_policy": "sec-policy",
    "refund_guidelines": "sec-refund",
    "program_contributions": "sec-contributions",
    "program_features_breakdown": "sec-breakdown",
    "funding_options_available": "sec-funding",
    "summary_of_universities": "sec-summary",
    "summary_of_universities_cosigner": "sec-summary-cosigner",
}

PAGE_BREAK_KEYS = {
    "overview",
    "how_the_program_works",
    "qualification_requirements",
    "enrolment_steps",
    "withdrawal_refund_policy",
    "refund_guidelines",
    "program_contributions",
    "program_features_breakdown",
    "funding_options_available",
    "summary_of_universities",
    "summary_of_universities_cosigner",
}


def _collect_program_option_inconsistencies(value: Any, path: str, hits: list[str]) -> None:
    """Collect paths where only REGULAR or PRIME appears."""
    if isinstance(value, dict):
        for k, v in value.items():
            _collect_program_option_inconsistencies(v, f"{path}.{k}" if path else str(k), hits)
        return
    if isinstance(value, list):
        for i, v in enumerate(value):
            _collect_program_option_inconsistencies(v, f"{path}[{i}]", hits)
        return
    if value is None:
        return

    text = str(value)
    has_regular = bool(re.search(r"\bREGULAR\b", text, flags=re.IGNORECASE))
    has_prime = bool(re.search(r"\bPRIME\b", text, flags=re.IGNORECASE))
    if has_regular ^ has_prime:
        hits.append(path)


def _prepare_university_data(
    uni_raw: dict[str, Any],
    allow_remote: bool,
    include_inactive_programs: bool,
    debug: bool,
    stats: dict[str, Any],
) -> dict[str, Any]:
    """Prepare a single university's template data.

    Extracts overview, campus image, benefits, programs, and extra sections
    from the raw sections list. This moves the logic that was in
    render_university_section into a data-preparation step so that the
    Jinja2 template handles the HTML.
    """
    uni_name = uni_raw["name"]
    sections = uni_raw.get("sections", [])
    is_first = uni_raw.get("_is_first", False)

    stats["universities"] = stats.get("universities", 0) + 1

    # Build section map; merge duplicate "programs"
    sec_map: dict[str, dict] = {}
    for s in sections:
        if not isinstance(s, dict):
            continue
        k = str(s.get("section_key", ""))
        if not k:
            continue
        if k == "programs" and k in sec_map:
            existing = sec_map["programs"].get("section_json", {})
            incoming = s.get("section_json", {})
            if not isinstance(existing, dict):
                existing = {}
            if not isinstance(incoming, dict):
                incoming = {}
            a = existing.get("programs", [])
            b = incoming.get("programs", [])
            if not isinstance(a, list):
                a = []
            if not isinstance(b, list):
                b = []
            existing["programs"] = a + b
            sec_map["programs"]["section_json"] = existing
            continue
        sec_map[k] = s

    # Campus image
    # Disable university campus-image embedding in the generation path.
    # Large per-school images were the main source of handbook timeouts in Space.
    img_section = sec_map.get("campus_image") or sec_map.get("image")
    campus_image = ""
    campus_caption = ""
    if img_section:
        j = img_section.get("section_json", {})
        if isinstance(j, dict):
            campus_caption = str(j.get("caption", "")).strip()
            stats["images_placeholder"] = stats.get("images_placeholder", 0) + 1

    # Overview and website
    resolved_website = (uni_raw.get("website") or "").strip()
    overview_data = None

    if "overview" in sec_map:
        overview_json = sec_map["overview"].get("section_json", {})
        if not isinstance(overview_json, dict):
            overview_json = {}

        site_from_overview = get_any(
            overview_json,
            ["university_website", "university_website_url", "website", "site", "url", "homepage", "web_url"],
        )
        if not resolved_website and site_from_overview:
            resolved_website = site_from_overview

        overview_data = {
            "founded": get_any(overview_json, ["founded", "Founded"]),
            "total_students": get_any(overview_json, ["total_students", "Total Students"]),
            "undergraduates": get_any(overview_json, ["undergraduates", "Undergraduate Students", "undergraduate_students"]),
            "postgraduates": get_any(overview_json, ["postgraduate_students", "Postgraduate Students"]),
            "acceptance_rate": get_any(overview_json, ["acceptance_rate", "Acceptance Rate"]),
            "location": get_any(overview_json, ["location", "Location"]),
            "tuition": format_money_figures(str(get_any(overview_json, [
                "tuition_out_of_state_yearly",
                "Yearly Out of State Tuition Fees",
                "Yearly Out-of-State Tuition Fees",
                "Yearly Tuition Fees",
                "Yearly Out-of-State Tuition Fees:",
            ]) or "")) or None,
        }

    if resolved_website:
        stats["university_links"] = stats.get("university_links", 0) + 1
        stats["website_rows"] = stats.get("website_rows", 0) + 1

    # Benefits
        # Benefits + Funding
    benefits = []
    funding_heading = "Funding Available"
    funding_items: list[str] = []

    if "benefits" in sec_map:
        j = sec_map["benefits"].get("section_json", {})
        if not isinstance(j, dict):
            j = {}

        raw_benefits = j.get("benefits", [])
        if isinstance(raw_benefits, list):
            benefits = [str(b).strip() for b in raw_benefits if str(b).strip()]
        else:
            benefits = []

        funding_heading, funding_items = _extract_university_funding(
            j,
            {
                "school_category": uni_raw.get("school_category"),
                "status": "in" if is_truthy(uni_raw.get("is_active", True)) else "out",
            },
        )

    # Programs
    programs = None
    if "programs" in sec_map:
        j = sec_map["programs"].get("section_json", {})
        if not isinstance(j, dict):
            j = {}
        programs_raw = j.get("programs", [])
        if not isinstance(programs_raw, list):
            programs_raw = []

        if not include_inactive_programs:
            programs_raw = [
                p for p in programs_raw
                if isinstance(p, dict) and is_truthy(
                    p.get("program_active", p.get("is_active", p.get("active", 1)))
                )
            ]

        programs = []
        seen_names = set()
        for p in programs_raw:
            if not isinstance(p, dict):
                continue
            program_name = str(p.get("program_name", "")).strip()
            # Deduplicate by lowercase program name
            key = program_name.lower()
            if key in seen_names:
                continue
            seen_names.add(key)
            link = str(p.get("program_link", "")).strip()
            if not link and isinstance(p.get("program_links"), dict):
                link = str(p["program_links"].get("web_link", "")).strip()

            programs.append({
                "name": program_name,
                "link": link,
                "designation": str(p.get("designation", "")),
                "entrance": str(p.get("entrance_exam", p.get("entrance_examination", ""))),
            })

    # Extra sections
    skip_keys = {"campus_image", "image", "overview", "benefits", "programs"}
    extra_sections = []
    for s in sections:
        if not isinstance(s, dict):
            continue
        k = str(s.get("section_key", ""))
        if not k or k in skip_keys:
            continue
        title = str(s.get("section_title", ""))
        j = s.get("section_json", {})
        if not isinstance(j, dict):
            j = {}
        rendered = render_global_blocks(k, title, j, debug)
        extra_sections.append({"rendered_html": Markup(rendered)})

    classes = ["uni"]
    if not is_first:
        classes.append("page-break")

    return {
        "name": uni_name,
        "anchor": uni_raw.get("anchor"),
        "sort_order": uni_raw.get("sort_order"),
        "website": resolved_website,
        "classes": classes,
        "overview": overview_data,
        "campus_image": campus_image,
        "campus_caption": campus_caption,
               "benefits": benefits,
        "funding_heading": funding_heading,
        "funding_items": funding_items,
        "programs": programs,
        "extra_sections": extra_sections,
    }


def build_handbook_html(
    globals_data: list[dict[str, Any]],
    by_uni: dict[int, dict[str, Any]],
    images: dict[str, Any],
    allow_remote: bool,
    include_inactive_programs: bool = False,
    debug: bool = False,
) -> str:
    """Build the full handbook HTML document using Jinja2 templates.

    Preserves the same data preparation logic from the original version.
    Rendering is delegated to Jinja2 templates with Playwright-compatible
    HTML/CSS output.
    """
    env = _get_jinja_env()
    template = env.get_template("handbook.html")

    font_meta = select_font_family()
    font_css = font_face_css(font_meta)

    # Base URL for static assets (CSS, images, etc.)
    base_url = _static_base_url()

    stats: dict[str, Any] = {
        "universities": 0,
        "images_embedded": 0,
        "images_placeholder": 0,
        "program_links_total": 0,
        "program_missing_links_total": 0,
        "missing_program_links": {},
        "university_links": 0,
        "website_rows": 0,
        "program_option_warnings": [],
    }

    # ── Cover Image ──
    cover_image = images.get("coverImage", "")
    if cover_image and os.path.isfile(cover_image):
        cover_image = Path(cover_image).as_uri()
    else:
        cover_image = ""

    # ── TOC Image ──
    toc_image = images.get("tocImage", "")
    if toc_image and os.path.isfile(toc_image):
        toc_image = Path(toc_image).as_uri()
    else:
        toc_image = ""

    # ── Header Image (repeating page header) ──
    header_image = images.get("headerImage", "")
    if header_image and os.path.isfile(header_image):
        mime = mimetypes.guess_type(header_image)[0] or "image/jpeg"
        with open(header_image, "rb") as f:
            header_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}"
    else:
        header_image = ""

    # ── Label Image (repeating right-side label) ──
    label_image = images.get("labelImage", "")
    if label_image and os.path.isfile(label_image):
        mime = mimetypes.guess_type(label_image)[0] or "image/jpeg"
        with open(label_image, "rb") as f:
            label_image = f"data:{mime};base64,{base64.b64encode(f.read()).decode()}"
    else:
        # Fallback to remote URL when local file is unavailable
        label_image = "https://finsapdev.qhtestingserver.com/MODEL_APIS/handbook/images/label.jpeg"

    # ── Prepare active universities (sorted: Tier One first, Tier Two second) ──
    active_universities: list[dict[str, Any]] = []
    for uid, uni in by_uni.items():
        if not isinstance(uni, dict):
            continue
        if not is_truthy(uni.get("is_active", True)):
            continue
        name = str(uni.get("university_name", f"University #{uid}"))
        anchor = handbook_anchor("uni", name, int(uid))
        school_category = str(uni.get("school_category", "")).strip()
        tier = uni.get("tier")
        tier_label = str(uni.get("tier_label", "")).strip()
        active_universities.append({
            "id": int(uid),
            "anchor": anchor,
            "name": name,
            "sections": uni.get("sections", []) if isinstance(uni.get("sections"), list) else [],
            "website": str(uni.get("website", "")),
            "sort_order": int(uni["sort_order"]) if uni.get("sort_order") is not None and str(uni.get("sort_order", "")).lstrip("-").isdigit() else None,
            "school_category": school_category,
            "tier": tier,
            "tier_label": tier_label,
        })

    # Explicit university display order
    _UNIVERSITY_ORDER: list[str] = [
        "Indiana University of Pennsylvania",
        "Missouri State University",
        "University of Louisville",
        "University of Delaware",
        "Grand Valley State University",
        "Quinnipiac University",
        "William Jessup University",
        "Wilkes University",
        "University of South Dakota",
        "California Baptist University",
        "Illinois State University",
        "Virginia Commonwealth University",
        "Rutgers University-Camden",
        "University of Oklahoma",
        "Saint Louis University",
        "University of Alabama at Birmingham",
        "Oregon State University",
        "Rochester Institute of Technology",
        "Lewis University",
        "Texas State University",
        "Drew University",
        "University of Missouri- Saint Louis",
        "Montana State University",
        "Oklahoma City University",
        "University of Dayton",
        "Webster University",
        "Rockhurst University",
    ]
    _uni_order_map = {name.lower().strip(): idx for idx, name in enumerate(_UNIVERSITY_ORDER)}

    def _tier_sort(u: dict) -> tuple:
        name_lower = (u.get("name") or "").lower().strip()
        explicit = _uni_order_map.get(name_lower)
        if explicit is not None:
            return (0, explicit, 0)
        # Universities not in the explicit list go after, sorted by tier then alpha
        t = u.get("tier")
        rank = t if isinstance(t, int) else 99
        return (1, rank, name_lower, u.get("id", 0))
    active_universities.sort(key=_tier_sort)

    # ── Normalise globals ──
    globals_data = sort_sections_stable(globals_data)

    required_keys = [
        "table_of_contents",
        "overview",
        "how_the_program_works",
    ]
    existing_keys = {str(g.get("section_key", "")).lower() for g in globals_data if isinstance(g, dict)}
    missing = [k for k in required_keys if k not in existing_keys]
    if missing:
        msg = f"Handbook required sections missing: {','.join(missing)}"
        logger.error(msg)
        raise RuntimeError(msg)

    general_sections: list[dict[str, Any]] = []
    toc_sort_order = None
    toc_title = "Table of Contents"

    for idx, g in enumerate(globals_data):
        if not isinstance(g, dict):
            continue
        key_raw = str(g.get("section_key", ""))
        key = key_raw.lower()
        sort_order = int(g["sort_order"]) if g.get("sort_order") is not None and str(g.get("sort_order", "")).lstrip("-").isdigit() else None

        if key == "table_of_contents" and toc_sort_order is None:
            toc_sort_order = sort_order if sort_order is not None else (idx + 1)
            toc_title = str(g.get("section_title", "Table of Contents"))
            continue

        section_hits: list[str] = []
        _collect_program_option_inconsistencies(
            g.get("section_json", {}),
            f"global.{key_raw}",
            section_hits,
        )
        for hit in section_hits:
            if hit not in stats["program_option_warnings"]:
                stats["program_option_warnings"].append(hit)

        anchor = handbook_anchor("g", str(g.get("section_title", g.get("section_key", "section"))), idx)
        general_sections.append({
            "anchor": anchor,
            "data": g,
            "sort_order": sort_order,
        })

    # ── Build TOC items ──
    toc_items: list[dict[str, Any]] = []
    for gs in general_sections:
        # Prefer the JSON-level title (display-ready) over the DB section_title
        gs_json = gs["data"].get("section_json", {})
        if isinstance(gs_json, dict) and gs_json.get("title", "").strip():
            title = gs_json["title"].strip()
        else:
            title = str(gs["data"].get("section_title", gs["data"].get("section_key", "Section")))
        toc_items.append({
            "title": title,
            "target": "#" + gs["anchor"],
            "level": 0,
            "bold": True,
            "sort": gs["sort_order"],
        })

    for u in active_universities:
        toc_items.append({
            "title": u["name"],
            "target": "#" + u["anchor"],
            "level": 1,
            "bold": False,
            "sort": u.get("sort_order"),
        })

    # ── Prepare sorted TOC items for template ──
    sorted_toc = sort_toc(list(toc_items))
    toc_items_sorted = []
    for e in sorted_toc:
        if not isinstance(e, dict):
            continue
        title = str(e.get("title", "")).strip()
        if not title:
            continue
        level = max(0, min(3, int(e.get("level", 0))))
        bold = bool(e.get("bold", False))
        upper = bool(e.get("upper", False))
        if level == 0:
            bold = True
            upper = True
        display_title = title.upper() if upper else title
        page = str(e.get("page", "")).strip()

        toc_items_sorted.append({
            "title": title,
            "display_title": display_title,
            "target": str(e.get("target", e.get("anchor", ""))).strip(),
            "level": level,
            "bold": bold,
            "upper": upper,
            "page": page,
        })

    # ── Prepare general sections with rendered HTML and typed blocks ──
    template_sections = []
    for gs in general_sections:
        data = gs["data"]
        key_lower = str(data.get("section_key", "")).lower()

        sec_class = SECTION_CLASS_MAP.get(key_lower)
        if sec_class is None:
            sec_class = "sec-" + re.sub(r"[^a-z0-9]+", "-", key_lower)

        section_json = data.get("section_json", {})
        if not isinstance(section_json, dict):
            section_json = {}

        # Typed blocks for the new rendering path
        blocks = normalize_section(
            str(data.get("section_key", "")),
            str(data.get("section_title", "")),
            section_json,
            debug=debug,
        )

        # Legacy HTML fallback
        section_html = render_global_blocks(
            str(data.get("section_key", "")),
            str(data.get("section_title", "")),
            section_json,
            debug,
        )

        if not section_html.strip() and not blocks:
            logger.warning(
                "Empty section render key=%s sort_order=%s",
                data.get("section_key"),
                data.get("sort_order"),
            )

        template_sections.append({
            "anchor": gs["anchor"],
            "data": data,
            "page_break": key_lower in PAGE_BREAK_KEYS,
            "sec_class": sec_class,
            "blocks": blocks,
            "rendered_html": Markup(section_html),
        })

    # ── Prepare university data for templates (both old + new paths) ──
    # Group by tier for tier heading insertion in the PDF output
    university_template_data = []
    university_block_data = []
    # Track which tier label was last emitted so we can insert tier divider headings
    _seen_tier_labels: set[str] = set()

    for idx, uni_raw in enumerate(active_universities):
        uni_raw["_is_first"] = (idx == 0)

        # Insert tier group heading when tier changes
        current_tier_label = str(uni_raw.get("tier_label", "")).strip()
        if current_tier_label and current_tier_label not in _seen_tier_labels:
            _seen_tier_labels.add(current_tier_label)
            # Mark this university as starting a new tier group
            uni_raw["_tier_group_start"] = True
            uni_raw["_tier_group_label"] = f"{current_tier_label} Schools"

        uni_hits: list[str] = []
        _collect_program_option_inconsistencies(
            uni_raw.get("sections", []),
            f"university.{uni_raw.get('name', idx)}",
            uni_hits,
        )
        for hit in uni_hits:
            if hit not in stats["program_option_warnings"]:
                stats["program_option_warnings"].append(hit)

        # Legacy path
        uni_data = _prepare_university_data(
            uni_raw, allow_remote, include_inactive_programs, debug, stats,
        )
        # Carry tier metadata to template data
        uni_data["tier"] = uni_raw.get("tier")
        uni_data["tier_label"] = uni_raw.get("tier_label", "")
        uni_data["tier_group_start"] = uni_raw.get("_tier_group_start", False)
        uni_data["tier_group_label"] = uni_raw.get("_tier_group_label", "")
        university_template_data.append(uni_data)
        # New block path
        uni_block = normalize_university(
            uni_raw, allow_remote, include_inactive_programs, debug, stats,
        )
        university_block_data.append(uni_block)

    # ── Bottom pages ──
    bottom_pages_urls = []
    raw_bottom = images.get("bottomPages", [])
    if isinstance(raw_bottom, list):
        for img_path in raw_bottom:
            if os.path.isfile(str(img_path)):
                bottom_pages_urls.append(Path(str(img_path)).as_uri())

    # ── Render template ──
    if stats["program_option_warnings"]:
        logger.warning(
            "Program option consistency warnings (missing REGULAR or PRIME pair): %s",
            stats["program_option_warnings"],
        )

    html = template.render(
        font_css=Markup(font_css),
        base_url=base_url,
        extra_css="",
        header_image=header_image,
        label_image=label_image,
        cover_image=cover_image,
        toc_image=toc_image,
        toc_items=toc_items,
        toc_items_sorted=toc_items_sorted,
        toc_title=toc_title,
        toc_sort_order=toc_sort_order,
        general_sections=template_sections,
        summary_block=None,
        universities=university_template_data,
        university_blocks=university_block_data,
        bottom_pages=bottom_pages_urls,
        debug=debug,
        stats=stats,
    )

    return html