Spaces:
Runtime error
Runtime error
File size: 2,995 Bytes
e5b2062 39d368e 4acb49a e5b2062 39d368e e5b2062 4acb49a e5b2062 39d368e 4acb49a e5b2062 4acb49a e5b2062 39d368e 4acb49a e5b2062 39d368e 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 4acb49a e5b2062 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 | import torch
import gradio as gr
from PIL import Image
import whisper
from transformers import AutoProcessor, AutoModelForImageTextToText
processor = AutoProcessor.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
model = AutoModelForImageTextToText.from_pretrained("deepseek-community/Janus-Pro-1B", trust_remote_code=True)
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
whisper_model = whisper.load_model("base")
def build_instruction(user_text):
return f"You are a professional AI prompt engineer. Convert the input into a highly detailed AI generation prompt. Include: Subject, Environment, Summary. Input: {user_text}\nReturn only the final prompt."
def text_to_prompt(user_text):
instruction = build_instruction(user_text)
inputs = processor(text=instruction, return_tensors="pt").to(device)
input_len = inputs.input_ids.shape[1]
output = model.generate(**inputs, max_new_tokens=200)
return processor.decode(output[0][input_len:], skip_special_tokens=True).strip()
def image_text_to_prompt(image_path, user_text):
if not user_text:
user_text = "Describe this image in detail."
image = Image.open(image_path).convert("RGB")
instruction = build_instruction(user_text)
inputs = processor(images=[image], text=instruction, return_tensors="pt").to(device)
input_len = inputs.input_ids.shape[1]
output = model.generate(**inputs, max_new_tokens=200)
return processor.decode(output[0][input_len:], skip_special_tokens=True).strip()
def audio_to_prompt(audio_path):
result = whisper_model.transcribe(audio_path)
return text_to_prompt(result["text"])
def generate_prompt_ui(input_type, text, image, audio):
try:
if input_type == "Text":
return text_to_prompt(text)
elif input_type == "Image + Text":
return image_text_to_prompt(image, text)
elif input_type == "Audio":
return audio_to_prompt(audio)
except Exception as e:
return f"Error: {str(e)}"
# Gradio UI setup
with gr.Blocks() as app:
gr.Markdown("# 🧠 Janus-Pro Prompt Generator")
input_type = gr.Radio(["Text", "Image + Text", "Audio"], label="Select Input Type", value="Text")
text_input = gr.Textbox(label="Enter your idea")
image_input = gr.Image(type="filepath", label="Upload Image", visible=False)
audio_input = gr.Audio(type="filepath", label="Upload Audio", visible=False)
output = gr.Textbox(label="Generated Prompt")
btn = gr.Button("Generate 🚀")
def toggle(choice):
return (
gr.update(visible=(choice != "Audio")),
gr.update(visible=(choice == "Image + Text")),
gr.update(visible=(choice == "Audio"))
)
input_type.change(toggle, input_type, [text_input, image_input, audio_input])
btn.click(generate_prompt_ui, [input_type, text_input, image_input, audio_input], output)
app.launch() |