Anshu13 commited on
Commit
39d368e
·
verified ·
1 Parent(s): e10ab97

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -37
app.py CHANGED
@@ -4,57 +4,34 @@ from PIL import Image
4
  import whisper
5
  from transformers import AutoProcessor, AutoModelForImageTextToText
6
 
7
- processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B",trust_remote_code=True)
8
- model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B",trust_remote_code=True)
 
9
  whisper_model = whisper.load_model("base")
10
 
11
  def build_instruction(user_text):
12
- return f"""
13
- You are a professional AI prompt engineer.
14
-
15
- Convert the input into a highly detailed AI generation prompt.
16
-
17
- Include:
18
- - Subject
19
- - Environment
20
- - Summary
21
-
22
- Make it visually rich and optimized for all existing AI models.
23
- The Prompt should e detailed prompt about words ranging from 50 to 70.
24
 
25
- Input: {user_text}
26
-
27
- Return only the final prompt.
28
- """
29
  def text_to_prompt(user_text):
30
  instruction = build_instruction(user_text)
31
-
32
- inputs = processor(
33
- text=instruction,
34
- return_tensors="pt"
35
- ).to(model.device)
36
 
37
  input_len = inputs.input_ids.shape[1]
38
- output = model.generate(**inputs, max_new_tokens=150)
39
- generated_tokens = output[0][input_len:]
40
-
41
- return processor.decode(generated_tokens, skip_special_tokens=True)
42
 
43
  def image_text_to_prompt(image_path, user_text):
44
  image = Image.open(image_path)
45
  instruction = build_instruction(user_text)
46
-
47
- inputs = processor(
48
- images=image,
49
- text=instruction,
50
- return_tensors="pt"
51
- ).to(model.device)
52
-
53
  input_len = inputs.input_ids.shape[1]
 
54
  output = model.generate(**inputs, max_new_tokens=150)
55
- generated_tokens = output[0][input_len:]
56
-
57
- return processor.decode(generated_tokens, skip_special_tokens=True)
58
 
59
  def audio_to_prompt(audio_path):
60
  result = whisper_model.transcribe(audio_path)
 
4
  import whisper
5
  from transformers import AutoProcessor, AutoModelForImageTextToText
6
 
7
+
8
+ processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
9
+ model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
10
  whisper_model = whisper.load_model("base")
11
 
12
  def build_instruction(user_text):
13
+ return f"You are a professional AI prompt engineer. Convert the input into a highly detailed AI generation prompt. Include: Subject, Environment, Summary. Input: {user_text}\nReturn only the final prompt."
 
 
 
 
 
 
 
 
 
 
 
14
 
 
 
 
 
15
  def text_to_prompt(user_text):
16
  instruction = build_instruction(user_text)
17
+ inputs = processor(text=instruction, return_tensors="pt")
 
 
 
 
18
 
19
  input_len = inputs.input_ids.shape[1]
20
+
21
+ output = model.generate(**inputs, max_new_tokens=150)
22
+
23
+ return processor.decode(output[0][input_len:], skip_special_tokens=True)
24
 
25
  def image_text_to_prompt(image_path, user_text):
26
  image = Image.open(image_path)
27
  instruction = build_instruction(user_text)
28
+ inputs = processor(images=image, text=instruction, return_tensors="pt")
29
+
 
 
 
 
 
30
  input_len = inputs.input_ids.shape[1]
31
+
32
  output = model.generate(**inputs, max_new_tokens=150)
33
+
34
+ return processor.decode(output[0][input_len:], skip_special_tokens=True)
 
35
 
36
  def audio_to_prompt(audio_path):
37
  result = whisper_model.transcribe(audio_path)