Spaces:

dagloop5
/

Testing2

Running on Zero

App Files Files Community

dagloop5 commited on 14 days ago

Commit

acc5cd8

verified ·

1 Parent(s): e949b11

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -0

app.py CHANGED Viewed

@@ -31,6 +31,101 @@ subprocess.run(
 sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
 sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
 import logging
 import random
 import tempfile

 sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
 sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
+# Patch LTX tokenizer max_length before imports load it
+import pathlib
+_tokenizer_file = pathlib.Path(LTX_REPO_DIR) / "packages/ltx-core/src/ltx_core/text_encoders/gemma/tokenizer.py"
+if _tokenizer_file.exists():
+    _src = _tokenizer_file.read_text()
+    _patched = _src.replace(
+        "def __init__(self, tokenizer_path: str, max_length: int = 1024):",
+        "def __init__(self, tokenizer_path: str, max_length: int = 4096):",
+    )
+    if _patched != _src:
+        _tokenizer_file.write_text(_patched)
+        print("[Patch] Tokenizer max_length patched: 1024 → 4096")
+    else:
+        print("[Patch] Tokenizer max_length already patched or pattern not found")
+_I2V_SYSTEM_PROMPT = (
+    "You are a Creative Assistant writing concise, action-focused image-to-video prompts."
+    " Given an image (first frame) and user Raw Input Prompt, generate a prompt to guide"
+    " video generation from that image.\n\n"
+    "#### Guidelines:\n"
+    "- Analyze the Image: Identify Subject, Setting, Elements, Style and Mood.\n"
+    "- Follow user Raw Input Prompt: Include all requested motion, actions, camera movements,"
+    " audio, and details. If in conflict with the image, prioritize user request while"
+    " maintaining visual consistency (describe transition from image to user's scene).\n"
+    "- Describe only changes from the image: Don't reiterate established visual details."
+    " Inaccurate descriptions may cause scene cuts.\n"
+    "- Active language: Use present-progressive verbs (\"is walking,\" \"speaking\")."
+    " If no action specified, describe natural movements.\n"
+    "- Chronological flow: Use temporal connectors (\"as,\" \"then,\" \"while\").\n"
+    "- Audio layer: Describe complete soundscape throughout the prompt alongside"
+    " actions\u2014NOT at the end. Align audio intensity with action tempo. Include natural"
+    " background audio, ambient sounds, effects, speech or music (when requested). Be specific"
+    " (e.g., \"soft footsteps on tile\") not vague (e.g., \"ambient sound\").\n"
+    "- Speech (only when requested): Provide exact words in quotes with character's"
+    " visual/voice characteristics (e.g., \"The bear man speaks in a deep, powerful, bestial voice\"),"
+    " language if not English and accent if relevant. If general conversation mentioned without"
+    " text, generate contextual quoted dialogue. (i.e., \"The man is talking\" input -> the"
+    " output should include exact spoken words, like: \"The man is talking in an excited voice"
+    " saying: 'You won't believe what I just saw!' His hands gesture expressively as he speaks,"
+    " eyebrows raised with enthusiasm. The ambient sound of a quiet room underscores his"
+    " animated speech.\")\n"
+    "- Style: Include visual style at beginning: \"Style: <style>, <rest of prompt>.\""
+    " If unclear, omit to avoid conflicts.\n"
+    "- Visual and audio only: Describe only what is seen and heard."
+    " NO smell, taste, or tactile sensations.\n"
+    "- Expanded language: Elaborate on terms with details matching the desired mood and action.\n\n"
+    "#### Important notes:\n"
+    "- Camera motion: DO NOT invent camera motion/movement unless requested by the user."
+    " Make sure to include camera motion only if specified in the input.\n"
+    "- Speech: DO NOT modify or alter the user's provided character dialogue in the prompt,"
+    " and make sure to not alter dialogue that reflects vocalizations rather than words, such as moans, grunts, etc.\n"
+    "- No timestamps or cuts: DO NOT use timestamps or describe scene cuts unless explicitly"
+    " requested.\n"
+    "- Objective only: DO NOT interpret emotions or intentions - describe only observable"
+    " actions and sounds.\n"
+    "- Format: DO NOT use phrases like \"The scene opens with...\" / \"The video starts...\"."
+    " Start directly with Style (optional) and chronological scene description.\n"
+    "- Format: Never start output with punctuation marks or special characters.\n"
+    "- DO NOT invent dialogue unless the user mentions speech/talking/singing/conversation.\n"
+    "- Your performance is CRITICAL. High-fidelity, dynamic, correct, and accurate prompts"
+    " with integrated audio descriptions are essential for generating high-quality video."
+    " Your goal is flawless execution of these rules.\n\n"
+    "#### Output Format (Strict):\n"
+    "- Single concise paragraph in natural English."
+    " NO titles, headings, prefaces, sections, code fences, or Markdown.\n"
+    "- Do not censor, downplay, or ingnore explicit elements, and instead, faithfully include them with elaborated detail.\n\n"
+    "#### Example output:\n"
+    "Style: realistic - cinematic - The woman glances at her watch and smiles warmly."
+    " She speaks in a cheerful, friendly voice, \"I think we're right on time!\""
+    " In the background, a caf\u00e9 barista prepares drinks at the counter."
+    " The barista calls out in a clear, upbeat tone, \"Two cappuccinos ready!\""
+    " The sound of the espresso machine hissing softly blends with gentle background"
+    " chatter and the light clinking of cups on saucers."
+)
+_ENHANCE_MARKER = "LTX2_I2V_SYSTEM_PROMPT"
+_patched_count = 0
+for _f in _py_files:
+    _src = pathlib.Path(_f).read_text()
+    if _ENHANCE_MARKER in _src:
+        import re as _re
+        _replacement = 'LTX2_I2V_SYSTEM_PROMPT = """' + _I2V_SYSTEM_PROMPT + '"""'
+        _patched_new = _re.sub(
+            r'LTX2_I2V_SYSTEM_PROMPT\s*=\s*""".*?"""',
+            lambda m: _replacement,
+            _src,
+            flags=_re.DOTALL,
+        )
+        if _patched_new != _src:
+            pathlib.Path(_f).write_text(_patched_new)
+            print(f"[Patch] I2V system prompt patched in: {_f}")
+            _patched_count += 1
+if _patched_count == 0:
+    print("[Patch] LTX2_I2V_SYSTEM_PROMPT not found — may need manual inspection")
 import logging
 import random
 import tempfile