Spaces:

MathFrames
/

manim-render-api

Sleeping

App Files Files Community

Verdiola commited on Oct 18, 2025

Commit

73ec2a2

verified ·

1 Parent(s): 5fe40ea

add smaller model

Browse files

Files changed (1) hide show

app/main.py +762 -24

app/main.py CHANGED Viewed

@@ -1,8 +1,9 @@
-import json, os, re, uuid, subprocess, sys, time, traceback, threading
 from io import BytesIO
 from collections import deque
 from pathlib import Path
-from typing import Optional, Tuple
 from fastapi import FastAPI, HTTPException, Response
 from fastapi.middleware.cors import CORSMiddleware
@@ -22,6 +23,7 @@ from google.genai import types
 API_KEY = os.getenv("GEMINI_API_KEY", "")
 # Switch to 2.5 Flash as requested
 MODEL   = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
 PORT    = int(os.getenv("PORT", "7860"))
 client = genai.Client(api_key=API_KEY) if API_KEY else None
@@ -79,6 +81,7 @@ class RateLimiter:
         self.acquire()
 limiter = RateLimiter(10)
 def gemini_call(*, system: str, contents):
     """Wrapper to: enforce RPM and standardize text extraction."""
@@ -92,6 +95,20 @@ def gemini_call(*, system: str, contents):
     )
     return getattr(resp, "text", str(resp))
 # ---------------- prompts ----------------
 SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
 Return ONLY valid Python code (no backticks, no prose).
@@ -124,6 +141,161 @@ class AutoScene(Scene):
         self.wait(0.75)
 """
 # ---------- NEW: carry full CLI error back to the refiner ----------
 class RenderError(Exception):
     def __init__(self, log: str):
@@ -169,7 +341,295 @@ def _preflight_sanitize(code: str) -> str:
     c = re.sub(r",\s*\)", ")", c)
     return c
-def _run_manim(scene_code: str, run_id: Optional[str] = None) -> Tuple[bytes, Optional[Path]]:
     """Render MP4 (fast) and also save a steady-state PNG (last frame)."""
     run_id = run_id or str(uuid.uuid4())[:8]
     work = RUNS / run_id; work.mkdir(parents=True, exist_ok=True)
@@ -183,9 +643,11 @@ def _run_manim(scene_code: str, run_id: Optional[str] = None) -> Tuple[bytes, Op
     env = os.environ.copy()
     env["PYTHONPATH"] = str(work)
     # 1) Render video
     cmd_video = [
-        "manim", "-ql", "--disable_caching",
         "--media_dir", str(media),
         "-o", f"{run_id}.mp4",
         str(scene_path), "AutoScene",
@@ -215,7 +677,7 @@ def _run_manim(scene_code: str, run_id: Optional[str] = None) -> Tuple[bytes, Op
     # 2) Save last frame PNG (leverages our CAPTURE_POINT rule)
     png_path = None
     cmd_png = [
-        "manim", "-ql", "--disable_caching", "-s",  # -s saves the last frame as an image
         "--media_dir", str(media),
         str(scene_path), "AutoScene",
     ]
@@ -247,12 +709,30 @@ def _upload_image_to_gemini(png_path: Path):
     return file_ref
-def llm_generate_manim_code(prompt: str, previous_code: Optional[str] = None) -> str:
     """First-pass generation (capture-aware)."""
     if not client:
         return DEFAULT_SCENE
     try:
         contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
         code = _clean_code(response_text)
         if "class AutoScene" not in code:
@@ -263,7 +743,12 @@ def llm_generate_manim_code(prompt: str, previous_code: Optional[str] = None) ->
         traceback.print_exc()
         return previous_code or DEFAULT_SCENE
-def llm_refine_from_error(previous_code: str, error_message: str, original_user_prompt: str) -> str:
     """When Manim fails; send the *real* CLI log/trace to Gemini."""
     if not client:
         return previous_code or DEFAULT_SCENE
@@ -286,8 +771,26 @@ Requirements:
 - Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
 - Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
 - Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
 - Return ONLY the corrected Python code (no backticks).
 """
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=user_prompt)
         code = _clean_code(response_text)
         if "class AutoScene" not in code:
@@ -298,7 +801,12 @@ Requirements:
         traceback.print_exc()
         return previous_code or DEFAULT_SCENE
-def llm_visual_refine_from_image(original_user_prompt: str, previous_code: str, png_path: Optional[Path]) -> str:
     """
     Use the screenshot to request layout/legibility/placement fixes.
     Includes the original prompt and current code, and asks for minimal edits.
@@ -324,8 +832,25 @@ Tasks (optimize for readability and visual quality without changing the math mea
 - Keep animation semantics as-is unless they're obviously broken.
 - Keep exactly one class AutoScene(Scene).
 - Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
 Return ONLY the revised Python code (no backticks).
 """
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=[file_ref, visual_prompt])
         code = _clean_code(response_text)
@@ -337,18 +862,24 @@ Return ONLY the revised Python code (no backticks).
         traceback.print_exc()
         return previous_code
-def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine: bool = True) -> bytes:
     """
     Generate → render; on error, refine up to N times from Manim traceback → re-render.
     If first render succeeds and do_visual_refine==True, run an image-based refinement
     using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
     """
     # 1) initial generation (capture-aware)
-    code = llm_generate_manim_code(user_prompt)
     # 2) render attempt
     try:
-        mp4_bytes, png_path = _run_manim(code, run_id="iter0")
     except RenderError as e:
         print("Render failed (iter0), attempting error-based refinement...", file=sys.stderr)
         if max_error_refines <= 0:
@@ -357,9 +888,14 @@ def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine:
         last_err = e.log or ""
         while attempts < max_error_refines:
             attempts += 1
-            refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
             try:
-                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
                 code = refined
                 break
             except RenderError as e2:
@@ -376,9 +912,14 @@ def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine:
         last_err = traceback.format_exc()
         while attempts < max_error_refines:
             attempts += 1
-            refined = llm_refine_from_error(previous_code=code, error_message=last_err, original_user_prompt=user_prompt)
             try:
-                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}")
                 code = refined
                 break
             except Exception:
@@ -388,10 +929,15 @@ def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine:
     # 3) optional visual refinement loop
     if do_visual_refine and png_path and png_path.exists():
-        refined2 = llm_visual_refine_from_image(original_user_prompt=user_prompt, previous_code=code, png_path=png_path)
         if refined2.strip() != code.strip():
             try:
-                mp4_bytes2, _ = _run_manim(refined2, run_id="iter2")
                 return mp4_bytes2
             except Exception:
                 print("Visual refine render failed; returning best known render.", file=sys.stderr)
@@ -399,9 +945,153 @@ def refine_loop(user_prompt: str, max_error_refines: int = 3, do_visual_refine:
     return mp4_bytes
 # ---------------- API ----------------
 class PromptIn(BaseModel):
     prompt: str
 class EmailIn(BaseModel):
     email: str
@@ -424,19 +1114,15 @@ def health():
     return {"ok": True, "model": MODEL, "has_gemini": bool(API_KEY)}
 @app.post("/generate-code")
-def generate_code(inp: PromptIn):
     """Return ONLY the generated Manim Python code (no rendering)."""
-    if not inp.prompt or not inp.prompt.strip():
-        raise HTTPException(400, "Missing prompt")
-    code = llm_generate_manim_code(inp.prompt.strip())
     return {"code": code}
 @app.post("/generate-and-render")
 def generate_and_render(inp: PromptIn):
-    if not inp.prompt or not inp.prompt.strip():
-        raise HTTPException(400, "Missing prompt")
     try:
-        mp4 = refine_loop(inp.prompt.strip(), max_error_refines=3, do_visual_refine=True)
     except Exception:
         raise HTTPException(500, "Failed to produce video after refinement")
     return Response(
@@ -445,6 +1131,58 @@ def generate_and_render(inp: PromptIn):
         headers={"Content-Disposition": 'inline; filename="result.mp4"'}
     )
 @app.post("/store-email")
 def store_email(email: EmailIn):
     """Store the provided email address in the configured Hugging Face dataset."""

+import json, os, re, uuid, subprocess, sys, time, traceback, threading, base64
 from io import BytesIO
 from collections import deque
 from pathlib import Path
+from typing import Optional, Tuple, List, Dict, Any
+from dataclasses import dataclass, field
 from fastapi import FastAPI, HTTPException, Response
 from fastapi.middleware.cors import CORSMiddleware
 API_KEY = os.getenv("GEMINI_API_KEY", "")
 # Switch to 2.5 Flash as requested
 MODEL   = os.getenv("GEMINI_MODEL", "gemini-2.5-flash")
+SMALL_MODEL = os.getenv("GEMINI_SMALL_MODEL") or MODEL
 PORT    = int(os.getenv("PORT", "7860"))
 client = genai.Client(api_key=API_KEY) if API_KEY else None
         self.acquire()
 limiter = RateLimiter(10)
+storyboard_limiter = RateLimiter(30)
 def gemini_call(*, system: str, contents):
     """Wrapper to: enforce RPM and standardize text extraction."""
     )
     return getattr(resp, "text", str(resp))
+def gemini_small_call(*, system: str, contents: str) -> str:
+    """Lightweight wrapper for the storyboard assistant (smaller model)."""
+    if not client:
+        raise RuntimeError("Gemini client is not configured")
+    target_model = SMALL_MODEL or MODEL
+    storyboard_limiter.acquire()
+    resp = client.models.generate_content(
+        model=target_model,
+        config=types.GenerateContentConfig(system_instruction=system),
+        contents=contents,
+    )
+    return getattr(resp, "text", str(resp))
 # ---------------- prompts ----------------
 SYSTEM_PROMPT = """You are a Manim CE (0.19.x) code generator/refiner.
 Return ONLY valid Python code (no backticks, no prose).
         self.wait(0.75)
 """
+STORYBOARD_SYSTEM_PROMPT = """You are MathFrames' storyboard director.
+You interview educators, refine their ideas, and maintain a structured shot list for a short Manim video.
+Always respond with a single JSON object matching this schema exactly:
+{
+  "reply": "<short conversational answer for the user>",
+  "plan": {
+    "concept": "<core idea you are visualizing>",
+    "notes": "<optional reminders or staging notes>",
+    "scenes": [
+      {
+        "title": "Scene 1: Setup",
+        "objective": "<what this scene accomplishes>",
+        "steps": ["<bullet-level action>", "..."]
+      }
+    ]
+  },
+  "questions": ["<optional clarification question>", "..."]
+}
+Rules:
+- Keep scene titles in the format: "Scene N: Subtitle".
+- Each scene must list 1-5 clear, imperative steps or beats (use educational language, no code).
+- Reflect any user-provided edits exactly.
+- If the user supplies a plan JSON, treat it as the source of truth and improve it gently.
+- Ask for clarification only when needed; otherwise leave the questions array empty.
+- Never include Markdown fences, prose outside JSON, or code snippets.
+# Professional editor guidance (use to drive the conversation naturally):
+- Confirm the concept/topic and any subtopics that should appear.
+- Capture the learning goal: what must the viewer understand by the end?
+- Clarify how deep the explanation should go (introductory vs. detailed walk-through).
+- Ask about any specific visuals, references, or prior scenes the user wants included.
+- Check whether there's an existing script or outline to honor.
+- Note any stylistic tone or audience expectations (e.g., middle school vs. college).
+"""
+STORYBOARD_CONFIRM_SYSTEM_PROMPT = """You are MathFrames' storyboard director.
+The user has finalized their plan. Craft the final handoff for the rendering model.
+Return a JSON object:
+{
+  "reply": "<brief confirmation for the user>",
+  "render_prompt": "<single paragraph prompt for the Manim code generator>",
+  "plan": { ... same structure as provided ... }
+}
+Guidelines:
+- Keep render_prompt concise but fully descriptive. Mention each scene's purpose and key visuals.
+- Respect the provided storyboard plan exactly—do not invent new scenes or steps.
+- Include relevant settings (style, length, audience, resolution) when supplied.
+- Do not add Markdown or code; respond with JSON only.
+"""
+MAX_STORYBOARD_SCENES = 6
+class ScenePayload(BaseModel):
+    id: Optional[str] = None
+    title: str
+    objective: Optional[str] = ""
+    steps: List[str]
+    @validator("title", pre=True)
+    def _clean_title(cls, value: Any) -> str:
+        if isinstance(value, str):
+            value = value.strip()
+        if not value:
+            return "Scene"
+        return value
+    @validator("steps", pre=True)
+    def _coerce_steps(cls, value: Any) -> List[str]:
+        collected: List[str] = []
+        if isinstance(value, str):
+            candidates = value.replace("\r", "").split("\n")
+            collected.extend(candidates)
+        elif isinstance(value, (list, tuple)):
+            for item in value:
+                if isinstance(item, str):
+                    collected.extend(item.replace("\r", "").split("\n"))
+                elif isinstance(item, (list, tuple)):
+                    for sub in item:
+                        if isinstance(sub, str):
+                            collected.append(sub)
+        cleaned = []
+        for step in collected:
+            step = str(step).strip(" •\t-")
+            if step:
+                cleaned.append(step)
+        return cleaned or ["Outline the key idea for this scene."]
+class PlanPayload(BaseModel):
+    concept: str
+    scenes: List[ScenePayload]
+    notes: Optional[str] = ""
+    @validator("concept", pre=True)
+    def _clean_concept(cls, value: Any) -> str:
+        if isinstance(value, str):
+            value = value.strip()
+        return value or "Untitled Concept"
+    @validator("scenes", pre=True)
+    def _ensure_scenes(cls, value: Any) -> List[Any]:
+        if isinstance(value, (list, tuple)):
+            return list(value)
+        return []
+class StoryboardChatIn(BaseModel):
+    session_id: Optional[str] = None
+    message: Optional[str] = ""
+    plan: Optional[PlanPayload] = None
+    settings: Optional[Dict[str, Any]] = None
+    @validator("message", pre=True, always=True)
+    def _default_message(cls, value: Any) -> str:
+        if value is None:
+            return ""
+        return str(value)
+    @validator("settings", pre=True, always=True)
+    def _sanitize_settings(cls, value: Any) -> Dict[str, Any]:
+        if isinstance(value, dict):
+            return value
+        return {}
+class StoryboardConfirmIn(BaseModel):
+    session_id: Optional[str] = None
+    plan: PlanPayload
+    settings: Optional[Dict[str, Any]] = None
+    @validator("settings", pre=True, always=True)
+    def _sanitize_settings(cls, value: Any) -> Dict[str, Any]:
+        if isinstance(value, dict):
+            return value
+        return {}
+@dataclass
+class PlanSession:
+    session_id: str
+    messages: List[Dict[str, Any]] = field(default_factory=list)
+    plan: Optional[PlanPayload] = None
+    settings: Dict[str, Any] = field(default_factory=dict)
+    created_at: float = field(default_factory=time.time)
+    updated_at: float = field(default_factory=time.time)
+PLAN_SESSIONS: Dict[str, PlanSession] = {}
+PLAN_LOCK = threading.Lock()
 # ---------- NEW: carry full CLI error back to the refiner ----------
 class RenderError(Exception):
     def __init__(self, log: str):
     c = re.sub(r",\s*\)", ")", c)
     return c
+def _extract_json_dict(raw: str) -> Dict[str, Any]:
+    """Best-effort JSON extraction from the LLM response."""
+    if not raw:
+        raise ValueError("Empty response from model")
+    stripped = raw.strip()
+    if stripped.startswith("```"):
+        stripped = re.sub(r"^```(?:json)?", "", stripped, flags=re.IGNORECASE).strip()
+        stripped = re.sub(r"```$", "", stripped).strip()
+    try:
+        return json.loads(stripped)
+    except json.JSONDecodeError:
+        match = re.search(r"\{.*\}", stripped, flags=re.DOTALL)
+        if match:
+            candidate = match.group(0)
+            try:
+                return json.loads(candidate)
+            except json.JSONDecodeError:
+                pass
+    raise ValueError("Model did not return valid JSON")
+def _generate_scene_id(index: int) -> str:
+    return f"scene-{index}-{uuid.uuid4().hex[:6]}"
+def _normalize_scene_title(index: int, title: str) -> str:
+    title = title.strip()
+    if not title:
+        return f"Scene {index}: Beat"
+    prefix = f"Scene {index}"
+    if not title.lower().startswith("scene"):
+        return f"{prefix}: {title}"
+    parts = title.split(":", 1)
+    if len(parts) == 2:
+        return f"{prefix}: {parts[1].strip()}"
+    return f"{prefix}: {title.split(maxsplit=1)[-1]}"
+def _sanitize_plan(plan: Optional[PlanPayload], *, concept_hint: str = "Untitled Concept") -> PlanPayload:
+    if not plan:
+        default_scene = ScenePayload(
+            id=_generate_scene_id(1),
+            title="Scene 1: Setup",
+            objective=f"Introduce {concept_hint}",
+            steps=[
+                f"Display the title \"{concept_hint}\"",
+                "Provide quick context for the viewer",
+                "Highlight the main question to explore",
+            ],
+        )
+        return PlanPayload(concept=concept_hint, notes="", scenes=[default_scene])
+    concept = plan.concept.strip() or concept_hint or "Untitled Concept"
+    sanitized_scenes: List[ScenePayload] = []
+    for idx, scene in enumerate(plan.scenes[:MAX_STORYBOARD_SCENES], start=1):
+        steps = [str(step).strip() for step in scene.steps if step and str(step).strip()]
+        if not steps:
+            steps = [f"Explain the next idea for {concept}."]
+        title = _normalize_scene_title(idx, scene.title or f"Scene {idx}")
+        objective = (scene.objective or "").strip()
+        sanitized_scenes.append(
+            ScenePayload(
+                id=scene.id or _generate_scene_id(idx),
+                title=title,
+                objective=objective or f"Advance the story about {concept}.",
+                steps=steps,
+            )
+        )
+    if not sanitized_scenes:
+        sanitized_scenes.append(
+            ScenePayload(
+                id=_generate_scene_id(1),
+                title="Scene 1: Setup",
+                objective=f"Introduce {concept}",
+                steps=[
+                    f"Present the main idea \"{concept}\"",
+                    "Explain why it matters to the viewer",
+                ],
+            )
+        )
+    notes = (plan.notes or "").strip()
+    return PlanPayload(concept=concept, notes=notes, scenes=sanitized_scenes)
+def _plan_to_public_dict(plan: PlanPayload) -> Dict[str, Any]:
+    return plan.dict()
+def _format_conversation(messages: List[Dict[str, Any]], limit: int = 8) -> str:
+    if not messages:
+        return "None yet."
+    recent = messages[-limit:]
+    lines = []
+    for msg in recent:
+        role = msg.get("role", "assistant").title()
+        content = str(msg.get("content", "")).strip()
+        lines.append(f"{role}: {content}")
+    return "\n".join(lines)
+def _audience_label(value: Optional[str]) -> Optional[str]:
+    mapping = {
+        "ms": "middle school students",
+        "hs": "high school students",
+        "ug": "undergraduate students",
+    }
+    return mapping.get(str(value).lower()) if value else None
+def _style_label(value: Optional[str]) -> Optional[str]:
+    mapping = {
+        "minimal": "minimal visuals (focus on narration and a few key elements)",
+        "steps": "step-by-step exposition with clear transitions",
+        "geometry": "geometry-focused visuals that highlight shapes and spatial relationships",
+    }
+    return mapping.get(str(value).lower()) if value else None
+def _length_label(value: Optional[str]) -> Optional[str]:
+    mapping = {
+        "short": "short (~30–45s)",
+        "medium": "medium (~60–90s)",
+    }
+    return mapping.get(str(value).lower()) if value else None
+def _quality_from_settings(settings: Optional[Dict[str, Any]]) -> str:
+    if not settings:
+        return "medium"
+    resolution = str(settings.get("resolution", "")).lower()
+    if resolution == "480p":
+        return "low"
+    if resolution == "1080p":
+        return "high"
+    return "medium"
+def _quality_flag(quality: str) -> str:
+    return {
+        "low": "-ql",
+        "medium": "-qm",
+        "high": "-qh",
+    }.get(quality, "-qm")
+def _compose_default_render_prompt(plan: PlanPayload, settings: Dict[str, Any], conversation: List[Dict[str, Any]]) -> str:
+    lines = [
+        f"Create a concise Manim CE 0.19 scene illustrating the concept \"{plan.concept}\".",
+        "Structure the animation around these storyboard scenes:",
+    ]
+    for scene in plan.scenes:
+        lines.append(f"- {scene.title} ({scene.objective})")
+        for step in scene.steps:
+            lines.append(f"  • {step}")
+    if plan.notes:
+        lines.append(f"Production notes: {plan.notes}")
+    if settings:
+        audience_text = _audience_label(settings.get("audience"))
+        style_text = _style_label(settings.get("style"))
+        length_text = _length_label(settings.get("length"))
+        lines.append("Production settings to honor:")
+        if audience_text:
+            lines.append(f"- Tailor explanations for {audience_text} (language, pacing, assumptions).")
+        if style_text:
+            lines.append(f"- Presentation style: {style_text}.")
+        if length_text:
+            lines.append(f"- Keep total runtime {length_text}.")
+        resolution = settings.get("resolution")
+        if resolution:
+            lines.append(f"- Render for {resolution} output (frame layout should read well at that resolution).")
+    if conversation:
+        lines.append("Incorporate the important constraints already discussed with the user.")
+    lines.append("Follow the CAPTURE policy: include # CAPTURE_POINT just before the final self.wait(0.75).")
+    return "\n".join(lines)
+def _prune_plan_sessions(max_sessions: int = 200, max_age_seconds: int = 3600) -> None:
+    now = time.time()
+    with PLAN_LOCK:
+        if len(PLAN_SESSIONS) > max_sessions:
+            sorted_items = sorted(PLAN_SESSIONS.items(), key=lambda item: item[1].updated_at)
+            for session_id, _ in sorted_items[: len(PLAN_SESSIONS) - max_sessions]:
+                PLAN_SESSIONS.pop(session_id, None)
+        for session_id, session in list(PLAN_SESSIONS.items()):
+            if now - session.updated_at > max_age_seconds:
+                PLAN_SESSIONS.pop(session_id, None)
+def _get_or_create_session(session_id: Optional[str], settings: Optional[Dict[str, Any]] = None) -> PlanSession:
+    with PLAN_LOCK:
+        if session_id and session_id in PLAN_SESSIONS:
+            session = PLAN_SESSIONS[session_id]
+            if settings:
+                session.settings.update(settings)
+            return session
+        new_id = session_id or uuid.uuid4().hex
+        session = PlanSession(session_id=new_id)
+        if settings:
+            session.settings.update(settings)
+        PLAN_SESSIONS[new_id] = session
+    _prune_plan_sessions()
+    return session
+def _storyboard_model_reply(session: PlanSession, user_message: str) -> Tuple[str, PlanPayload, List[str]]:
+    concept_hint = session.plan.concept if session.plan else (user_message.strip() or "Untitled Concept")
+    session.plan = _sanitize_plan(session.plan, concept_hint=concept_hint)
+    session.updated_at = time.time()
+    plan_json = json.dumps(_plan_to_public_dict(session.plan), indent=2)
+    settings_json = json.dumps(session.settings or {}, indent=2)
+    history_text = _format_conversation(session.messages)
+    latest_message = user_message.strip() or "User adjusted the storyboard without additional text."
+    contents = f"""You are refining a math animation storyboard with the user.
+Current storyboard plan JSON:
+{plan_json}
+Session settings:
+{settings_json}
+Conversation so far:
+{history_text}
+Update the plan if needed and craft your reply (JSON only). Latest user message:
+{latest_message}
+"""
+    raw_response = gemini_small_call(system=STORYBOARD_SYSTEM_PROMPT, contents=contents)
+    try:
+        parsed = _extract_json_dict(raw_response)
+    except Exception as exc:
+        print("Storyboard model JSON parse failed:", exc, file=sys.stderr)
+        parsed = {}
+    reply_text = str(parsed.get("reply") or "").strip() or "Understood—updating the storyboard."
+    plan_data = parsed.get("plan")
+    new_plan = session.plan
+    if isinstance(plan_data, dict):
+        try:
+            new_plan = PlanPayload(**plan_data)
+        except Exception as exc:
+            print("Unable to parse plan from storyboard model:", exc, file=sys.stderr)
+    session.plan = _sanitize_plan(new_plan, concept_hint=session.plan.concept if session.plan else concept_hint)
+    questions_field = parsed.get("questions") or []
+    questions = [str(q).strip() for q in questions_field if isinstance(q, (str, int)) and str(q).strip()]
+    session.updated_at = time.time()
+    return reply_text, session.plan, questions
+def _storyboard_model_confirm(session: PlanSession) -> Tuple[str, PlanPayload, str]:
+    session.plan = _sanitize_plan(session.plan, concept_hint=session.plan.concept if session.plan else "Untitled Concept")
+    plan_json = json.dumps(_plan_to_public_dict(session.plan), indent=2)
+    settings_json = json.dumps(session.settings or {}, indent=2)
+    history_text = _format_conversation(session.messages)
+    contents = f"""The user has approved this storyboard plan:
+{plan_json}
+Session settings:
+{settings_json}
+Conversation summary:
+{history_text}
+Produce the confirmation JSON only (no Markdown)."""
+    raw_response = gemini_small_call(system=STORYBOARD_CONFIRM_SYSTEM_PROMPT, contents=contents)
+    try:
+        parsed = _extract_json_dict(raw_response)
+    except Exception as exc:
+        print("Storyboard confirm JSON parse failed:", exc, file=sys.stderr)
+        parsed = {}
+    reply_text = str(parsed.get("reply") or "").strip() or "Great! Locking the storyboard and preparing the renderer."
+    plan_data = parsed.get("plan")
+    final_plan = session.plan
+    if isinstance(plan_data, dict):
+        try:
+            final_plan = PlanPayload(**plan_data)
+        except Exception as exc:
+            print("Unable to parse confirmed plan:", exc, file=sys.stderr)
+    final_plan = _sanitize_plan(final_plan, concept_hint=final_plan.concept if final_plan else session.plan.concept)
+    render_prompt = str(parsed.get("render_prompt") or "").strip()
+    if not render_prompt:
+        render_prompt = _compose_default_render_prompt(final_plan, session.settings, session.messages)
+    session.plan = final_plan
+    session.updated_at = time.time()
+    return reply_text, final_plan, render_prompt
+def _run_manim(scene_code: str, run_id: Optional[str] = None, quality: str = "medium") -> Tuple[bytes, Optional[Path]]:
     """Render MP4 (fast) and also save a steady-state PNG (last frame)."""
     run_id = run_id or str(uuid.uuid4())[:8]
     work = RUNS / run_id; work.mkdir(parents=True, exist_ok=True)
     env = os.environ.copy()
     env["PYTHONPATH"] = str(work)
+    quality_flag = _quality_flag(quality)
     # 1) Render video
     cmd_video = [
+        "manim", quality_flag, "--disable_caching",
         "--media_dir", str(media),
         "-o", f"{run_id}.mp4",
         str(scene_path), "AutoScene",
     # 2) Save last frame PNG (leverages our CAPTURE_POINT rule)
     png_path = None
     cmd_png = [
+        "manim", quality_flag, "--disable_caching", "-s",  # -s saves the last frame as an image
         "--media_dir", str(media),
         str(scene_path), "AutoScene",
     ]
     return file_ref
+def llm_generate_manim_code(
+    prompt: str,
+    settings: Optional[Dict[str, Any]] = None,
+    previous_code: Optional[str] = None,
+) -> str:
     """First-pass generation (capture-aware)."""
     if not client:
         return DEFAULT_SCENE
     try:
         contents = f"Create AutoScene for: {prompt}\nRemember the CAPTURE POLICY and Common API constraints."
+        if settings:
+            audience_text = _audience_label(settings.get("audience"))
+            style_text = _style_label(settings.get("style"))
+            length_text = _length_label(settings.get("length"))
+            contents += "\nProduction settings to respect:"
+            if audience_text:
+                contents += f"\n- Tailor explanations for {audience_text}."
+            if style_text:
+                contents += f"\n- Style: {style_text}."
+            if length_text:
+                contents += f"\n- Target runtime: {length_text}."
+            resolution = settings.get("resolution")
+            if resolution:
+                contents += f"\n- Design visuals that read clearly at {resolution}."
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=contents)
         code = _clean_code(response_text)
         if "class AutoScene" not in code:
         traceback.print_exc()
         return previous_code or DEFAULT_SCENE
+def llm_refine_from_error(
+    previous_code: str,
+    error_message: str,
+    original_user_prompt: str,
+    settings: Optional[Dict[str, Any]] = None,
+) -> str:
     """When Manim fails; send the *real* CLI log/trace to Gemini."""
     if not client:
         return previous_code or DEFAULT_SCENE
 - Keep the CAPTURE POLICY and ensure # CAPTURE_POINT is at the final steady layout.
 - Scan for nonexistent methods (e.g., `.to_center`) or invalid kwargs (e.g., `vertex=` on RightAngle) and replace with valid Manim CE 0.19 API.
 - Prefer `.center()`/`.move_to(ORIGIN)`, and `.move_to()`, `.align_to()`, `.to_edge()`, `.next_to()` for layout.
+- Apply the smallest change necessary to resolve the failure; do not overhaul structure, pacing, or stylistic choices the user made.
+- Preserve all existing text content (titles, labels, strings) unless it directly causes the error.
+- Do not alter functional math/logic that already works; only touch the problematic lines needed for a successful render.
 - Return ONLY the corrected Python code (no backticks).
 """
+        if settings:
+            audience_text = _audience_label(settings.get("audience"))
+            style_text = _style_label(settings.get("style"))
+            length_text = _length_label(settings.get("length"))
+            extra = "\nProduction targets to preserve:"
+            if audience_text:
+                extra += f"\n- Audience: {audience_text}."
+            if style_text:
+                extra += f"\n- Style: {style_text}."
+            if length_text:
+                extra += f"\n- Runtime goal: {length_text}."
+            resolution = settings.get("resolution")
+            if resolution:
+                extra += f"\n- Ensure layout reads clearly at {resolution}."
+            user_prompt += extra
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=user_prompt)
         code = _clean_code(response_text)
         if "class AutoScene" not in code:
         traceback.print_exc()
         return previous_code or DEFAULT_SCENE
+def llm_visual_refine_from_image(
+    original_user_prompt: str,
+    previous_code: str,
+    png_path: Optional[Path],
+    settings: Optional[Dict[str, Any]] = None,
+) -> str:
     """
     Use the screenshot to request layout/legibility/placement fixes.
     Includes the original prompt and current code, and asks for minimal edits.
 - Keep animation semantics as-is unless they're obviously broken.
 - Keep exactly one class AutoScene(Scene).
 - Preserve the CAPTURE POLICY and place `# CAPTURE_POINT` at the final steady layout with self.wait(0.75) and NO outro after that.
+- Make the minimal adjustments needed to fix readability; do not rework the overall composition or pacing beyond what the user already authored.
+- Preserve all text labels, titles, and strings as written unless they directly cause overlap/legibility issues.
+- Avoid rewriting functioning math/logic—only adjust positioning, styling, or other elements required to fix the visual defect.
 Return ONLY the revised Python code (no backticks).
 """
+        if settings:
+            audience_text = _audience_label(settings.get("audience"))
+            style_text = _style_label(settings.get("style"))
+            length_text = _length_label(settings.get("length"))
+            visual_prompt += "\nKeep these production settings in mind:"
+            if audience_text:
+                visual_prompt += f"\n- Audience: {audience_text}."
+            if style_text:
+                visual_prompt += f"\n- Style: {style_text}."
+            if length_text:
+                visual_prompt += f"\n- Runtime target: {length_text}."
+            resolution = settings.get("resolution")
+            if resolution:
+                visual_prompt += f"\n- Layout should stay readable at {resolution}."
         response_text = gemini_call(system=SYSTEM_PROMPT, contents=[file_ref, visual_prompt])
         code = _clean_code(response_text)
         traceback.print_exc()
         return previous_code
+def refine_loop(
+    user_prompt: str,
+    settings: Optional[Dict[str, Any]] = None,
+    max_error_refines: int = 3,
+    do_visual_refine: bool = True,
+) -> bytes:
     """
     Generate → render; on error, refine up to N times from Manim traceback → re-render.
     If first render succeeds and do_visual_refine==True, run an image-based refinement
     using the saved steady-state PNG, then re-render. Fallback to the best successful MP4.
     """
     # 1) initial generation (capture-aware)
+    code = llm_generate_manim_code(user_prompt, settings=settings)
+    quality = _quality_from_settings(settings)
     # 2) render attempt
     try:
+        mp4_bytes, png_path = _run_manim(code, run_id="iter0", quality=quality)
     except RenderError as e:
         print("Render failed (iter0), attempting error-based refinement...", file=sys.stderr)
         if max_error_refines <= 0:
         last_err = e.log or ""
         while attempts < max_error_refines:
             attempts += 1
+            refined = llm_refine_from_error(
+                previous_code=code,
+                error_message=last_err,
+                original_user_prompt=user_prompt,
+                settings=settings,
+            )
             try:
+                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}", quality=quality)
                 code = refined
                 break
             except RenderError as e2:
         last_err = traceback.format_exc()
         while attempts < max_error_refines:
             attempts += 1
+            refined = llm_refine_from_error(
+                previous_code=code,
+                error_message=last_err,
+                original_user_prompt=user_prompt,
+                settings=settings,
+            )
             try:
+                mp4_bytes, png_path = _run_manim(refined, run_id=f"iter_err_{attempts}", quality=quality)
                 code = refined
                 break
             except Exception:
     # 3) optional visual refinement loop
     if do_visual_refine and png_path and png_path.exists():
+        refined2 = llm_visual_refine_from_image(
+            original_user_prompt=user_prompt,
+            previous_code=code,
+            png_path=png_path,
+            settings=settings,
+        )
         if refined2.strip() != code.strip():
             try:
+                mp4_bytes2, _ = _run_manim(refined2, run_id="iter2", quality=quality)
                 return mp4_bytes2
             except Exception:
                 print("Visual refine render failed; returning best known render.", file=sys.stderr)
     return mp4_bytes
+def _auto_fix_render(
+    user_prompt: str,
+    code: str,
+    settings: Optional[Dict[str, Any]],
+    initial_log: str,
+    max_attempts: int = 3,
+) -> Tuple[Optional[str], Optional[bytes], str]:
+    """Attempt to auto-fix user code via LLM refinement if available."""
+    if not client:
+        return None, None, initial_log
+    quality = _quality_from_settings(settings)
+    attempt_code = code
+    last_log = initial_log
+    for attempt in range(max_attempts):
+        refined = llm_refine_from_error(
+            previous_code=attempt_code,
+            error_message=last_log,
+            original_user_prompt=user_prompt,
+            settings=settings,
+        )
+        if refined.strip() == attempt_code.strip():
+            break
+        attempt_code = refined
+        try:
+            mp4_bytes, _ = _run_manim(
+                attempt_code,
+                run_id=f"manual_fix_{attempt}",
+                quality=quality,
+            )
+            return attempt_code, mp4_bytes, ""
+        except RenderError as err:
+            last_log = err.log or last_log
+    return None, None, last_log
 # ---------------- API ----------------
+@app.post("/storyboard/chat")
+def storyboard_chat(inp: StoryboardChatIn):
+    if not client:
+        raise HTTPException(500, "Gemini client is not configured")
+    if not inp.message.strip() and not inp.plan:
+        raise HTTPException(400, "Message or plan updates are required.")
+    session = _get_or_create_session(inp.session_id, inp.settings or {})
+    if inp.settings:
+        session.settings.update(inp.settings)
+    if inp.plan:
+        try:
+            session.plan = _sanitize_plan(inp.plan, concept_hint=inp.plan.concept)
+        except Exception as exc:
+            print("Failed to apply user-supplied plan:", exc, file=sys.stderr)
+    user_message = inp.message.strip()
+    if user_message:
+        session.messages.append({"role": "user", "content": user_message})
+    else:
+        session.messages.append({"role": "user", "content": "[Plan updated without additional message]"})
+    try:
+        reply_text, plan_model, questions = _storyboard_model_reply(session, user_message)
+    except Exception as exc:
+        print("Storyboard chat error:", exc, file=sys.stderr)
+        raise HTTPException(500, "Storyboard assistant failed to respond")
+    session.messages.append({"role": "assistant", "content": reply_text})
+    return {
+        "session_id": session.session_id,
+        "reply": reply_text,
+        "plan": plan_model.dict(),
+        "questions": questions,
+        "settings": session.settings,
+    }
+@app.post("/storyboard/confirm")
+def storyboard_confirm(inp: StoryboardConfirmIn):
+    if not client:
+        raise HTTPException(500, "Gemini client is not configured")
+    session = _get_or_create_session(inp.session_id, inp.settings or {})
+    if inp.settings:
+        session.settings.update(inp.settings)
+    session.plan = _sanitize_plan(inp.plan, concept_hint=inp.plan.concept)
+    session.messages.append({"role": "user", "content": "[User confirmed the storyboard plan]"})
+    try:
+        reply_text, final_plan, render_prompt = _storyboard_model_confirm(session)
+    except Exception as exc:
+        print("Storyboard confirm error:", exc, file=sys.stderr)
+        final_plan = session.plan
+        render_prompt = _compose_default_render_prompt(final_plan, session.settings, session.messages)
+        reply_text = "Plan confirmed. Falling back to a templated prompt."
+    session.messages.append({"role": "assistant", "content": reply_text})
+    return {
+        "session_id": session.session_id,
+        "reply": reply_text,
+        "render_prompt": render_prompt,
+        "plan": final_plan.dict(),
+        "settings": session.settings,
+    }
 class PromptIn(BaseModel):
     prompt: str
+    settings: Optional[Dict[str, Any]] = None
+    @validator("prompt")
+    def _validate_prompt(cls, value: str) -> str:
+        if not value or not value.strip():
+            raise ValueError("Prompt cannot be empty")
+        return value.strip()
+    @validator("settings", pre=True, always=True)
+    def _sanitize_settings(cls, value: Any) -> Optional[Dict[str, Any]]:
+        if isinstance(value, dict):
+            return value
+        return None
+class GenerateCodeIn(PromptIn):
+    pass
+class RenderCodeIn(BaseModel):
+    code: str
+    prompt: Optional[str] = ""
+    settings: Optional[Dict[str, Any]] = None
+    auto_fix: bool = False
+    @validator("code")
+    def _validate_code(cls, value: str) -> str:
+        if not value or not value.strip():
+            raise ValueError("Code cannot be empty")
+        return value
+    @validator("prompt", pre=True, always=True)
+    def _sanitize_prompt(cls, value: Any) -> str:
+        return str(value or "").strip()
+    @validator("settings", pre=True, always=True)
+    def _sanitize_settings(cls, value: Any) -> Optional[Dict[str, Any]]:
+        if isinstance(value, dict):
+            return value
+        return None
 class EmailIn(BaseModel):
     email: str
     return {"ok": True, "model": MODEL, "has_gemini": bool(API_KEY)}
 @app.post("/generate-code")
+def generate_code(inp: GenerateCodeIn):
     """Return ONLY the generated Manim Python code (no rendering)."""
+    code = llm_generate_manim_code(inp.prompt, settings=inp.settings)
     return {"code": code}
 @app.post("/generate-and-render")
 def generate_and_render(inp: PromptIn):
     try:
+        mp4 = refine_loop(inp.prompt, settings=inp.settings, max_error_refines=3, do_visual_refine=True)
     except Exception:
         raise HTTPException(500, "Failed to produce video after refinement")
     return Response(
         headers={"Content-Disposition": 'inline; filename="result.mp4"'}
     )
+@app.post("/render-code")
+def render_code(inp: RenderCodeIn):
+    quality = _quality_from_settings(inp.settings)
+    try:
+        mp4_bytes, _ = _run_manim(inp.code, run_id="manual", quality=quality)
+        return Response(
+            content=mp4_bytes,
+            media_type="video/mp4",
+            headers={"Content-Disposition": 'inline; filename="result.mp4"'}
+        )
+    except RenderError as exc:
+        log = exc.log or ""
+        if not inp.auto_fix:
+            raise HTTPException(
+                status_code=400,
+                detail={
+                    "error": "Render failed",
+                    "message": "Render failed. Attempting automatic fix...",
+                },
+            )
+        fixed_code, fixed_video, final_log = _auto_fix_render(
+            user_prompt=inp.prompt or "User-edited Manim code",
+            code=inp.code,
+            settings=inp.settings,
+            initial_log=log,
+        )
+        if fixed_code and fixed_video:
+            payload = {
+                "auto_fixed": True,
+                "message": "Your code triggered a Manim error, so I applied the smallest possible fix (keeping your edits) and reran the render.",
+                "code": fixed_code,
+                "video_base64": base64.b64encode(fixed_video).decode("utf-8"),
+                "video_mime_type": "video/mp4",
+                "files": [
+                    {"filename": "scene.py", "contents": fixed_code}
+                ],
+                "meta": {"resolution": inp.settings.get("resolution") if inp.settings else None},
+                "log_tail": (log or "")[-600:]
+            }
+            return Response(
+                content=json.dumps(payload),
+                media_type="application/json",
+            )
+        detail_log = (final_log or log)[-6000:]
+        raise HTTPException(
+            status_code=400,
+            detail={"error": "Render failed", "log": detail_log, "code": inp.code},
+        )
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail={"error": "Unexpected render failure", "log": str(exc)})
 @app.post("/store-email")
 def store_email(email: EmailIn):
     """Store the provided email address in the configured Hugging Face dataset."""