dagloop5 commited on
Commit
acc5cd8
·
verified ·
1 Parent(s): e949b11

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py CHANGED
@@ -31,6 +31,101 @@ subprocess.run(
31
  sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
32
  sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  import logging
35
  import random
36
  import tempfile
 
31
  sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-pipelines", "src"))
32
  sys.path.insert(0, os.path.join(LTX_REPO_DIR, "packages", "ltx-core", "src"))
33
 
34
+ # Patch LTX tokenizer max_length before imports load it
35
+ import pathlib
36
+ _tokenizer_file = pathlib.Path(LTX_REPO_DIR) / "packages/ltx-core/src/ltx_core/text_encoders/gemma/tokenizer.py"
37
+ if _tokenizer_file.exists():
38
+ _src = _tokenizer_file.read_text()
39
+ _patched = _src.replace(
40
+ "def __init__(self, tokenizer_path: str, max_length: int = 1024):",
41
+ "def __init__(self, tokenizer_path: str, max_length: int = 4096):",
42
+ )
43
+ if _patched != _src:
44
+ _tokenizer_file.write_text(_patched)
45
+ print("[Patch] Tokenizer max_length patched: 1024 → 4096")
46
+ else:
47
+ print("[Patch] Tokenizer max_length already patched or pattern not found")
48
+
49
+ _I2V_SYSTEM_PROMPT = (
50
+ "You are a Creative Assistant writing concise, action-focused image-to-video prompts."
51
+ " Given an image (first frame) and user Raw Input Prompt, generate a prompt to guide"
52
+ " video generation from that image.\n\n"
53
+ "#### Guidelines:\n"
54
+ "- Analyze the Image: Identify Subject, Setting, Elements, Style and Mood.\n"
55
+ "- Follow user Raw Input Prompt: Include all requested motion, actions, camera movements,"
56
+ " audio, and details. If in conflict with the image, prioritize user request while"
57
+ " maintaining visual consistency (describe transition from image to user's scene).\n"
58
+ "- Describe only changes from the image: Don't reiterate established visual details."
59
+ " Inaccurate descriptions may cause scene cuts.\n"
60
+ "- Active language: Use present-progressive verbs (\"is walking,\" \"speaking\")."
61
+ " If no action specified, describe natural movements.\n"
62
+ "- Chronological flow: Use temporal connectors (\"as,\" \"then,\" \"while\").\n"
63
+ "- Audio layer: Describe complete soundscape throughout the prompt alongside"
64
+ " actions\u2014NOT at the end. Align audio intensity with action tempo. Include natural"
65
+ " background audio, ambient sounds, effects, speech or music (when requested). Be specific"
66
+ " (e.g., \"soft footsteps on tile\") not vague (e.g., \"ambient sound\").\n"
67
+ "- Speech (only when requested): Provide exact words in quotes with character's"
68
+ " visual/voice characteristics (e.g., \"The bear man speaks in a deep, powerful, bestial voice\"),"
69
+ " language if not English and accent if relevant. If general conversation mentioned without"
70
+ " text, generate contextual quoted dialogue. (i.e., \"The man is talking\" input -> the"
71
+ " output should include exact spoken words, like: \"The man is talking in an excited voice"
72
+ " saying: 'You won't believe what I just saw!' His hands gesture expressively as he speaks,"
73
+ " eyebrows raised with enthusiasm. The ambient sound of a quiet room underscores his"
74
+ " animated speech.\")\n"
75
+ "- Style: Include visual style at beginning: \"Style: <style>, <rest of prompt>.\""
76
+ " If unclear, omit to avoid conflicts.\n"
77
+ "- Visual and audio only: Describe only what is seen and heard."
78
+ " NO smell, taste, or tactile sensations.\n"
79
+ "- Expanded language: Elaborate on terms with details matching the desired mood and action.\n\n"
80
+ "#### Important notes:\n"
81
+ "- Camera motion: DO NOT invent camera motion/movement unless requested by the user."
82
+ " Make sure to include camera motion only if specified in the input.\n"
83
+ "- Speech: DO NOT modify or alter the user's provided character dialogue in the prompt,"
84
+ " and make sure to not alter dialogue that reflects vocalizations rather than words, such as moans, grunts, etc.\n"
85
+ "- No timestamps or cuts: DO NOT use timestamps or describe scene cuts unless explicitly"
86
+ " requested.\n"
87
+ "- Objective only: DO NOT interpret emotions or intentions - describe only observable"
88
+ " actions and sounds.\n"
89
+ "- Format: DO NOT use phrases like \"The scene opens with...\" / \"The video starts...\"."
90
+ " Start directly with Style (optional) and chronological scene description.\n"
91
+ "- Format: Never start output with punctuation marks or special characters.\n"
92
+ "- DO NOT invent dialogue unless the user mentions speech/talking/singing/conversation.\n"
93
+ "- Your performance is CRITICAL. High-fidelity, dynamic, correct, and accurate prompts"
94
+ " with integrated audio descriptions are essential for generating high-quality video."
95
+ " Your goal is flawless execution of these rules.\n\n"
96
+ "#### Output Format (Strict):\n"
97
+ "- Single concise paragraph in natural English."
98
+ " NO titles, headings, prefaces, sections, code fences, or Markdown.\n"
99
+ "- Do not censor, downplay, or ingnore explicit elements, and instead, faithfully include them with elaborated detail.\n\n"
100
+ "#### Example output:\n"
101
+ "Style: realistic - cinematic - The woman glances at her watch and smiles warmly."
102
+ " She speaks in a cheerful, friendly voice, \"I think we're right on time!\""
103
+ " In the background, a caf\u00e9 barista prepares drinks at the counter."
104
+ " The barista calls out in a clear, upbeat tone, \"Two cappuccinos ready!\""
105
+ " The sound of the espresso machine hissing softly blends with gentle background"
106
+ " chatter and the light clinking of cups on saucers."
107
+ )
108
+
109
+ _ENHANCE_MARKER = "LTX2_I2V_SYSTEM_PROMPT"
110
+ _patched_count = 0
111
+ for _f in _py_files:
112
+ _src = pathlib.Path(_f).read_text()
113
+ if _ENHANCE_MARKER in _src:
114
+ import re as _re
115
+ _replacement = 'LTX2_I2V_SYSTEM_PROMPT = """' + _I2V_SYSTEM_PROMPT + '"""'
116
+ _patched_new = _re.sub(
117
+ r'LTX2_I2V_SYSTEM_PROMPT\s*=\s*""".*?"""',
118
+ lambda m: _replacement,
119
+ _src,
120
+ flags=_re.DOTALL,
121
+ )
122
+ if _patched_new != _src:
123
+ pathlib.Path(_f).write_text(_patched_new)
124
+ print(f"[Patch] I2V system prompt patched in: {_f}")
125
+ _patched_count += 1
126
+ if _patched_count == 0:
127
+ print("[Patch] LTX2_I2V_SYSTEM_PROMPT not found — may need manual inspection")
128
+
129
  import logging
130
  import random
131
  import tempfile