| |
| import streamlit as st |
| from transformers import BlipProcessor, BlipForConditionalGeneration |
| from PIL import Image |
| import requests |
| from io import BytesIO |
| from deep_translator import GoogleTranslator |
| import torch |
|
|
| |
| st.set_page_config(page_title="AI Image Caption Generator", page_icon="๐ผ๏ธ") |
|
|
| st.title("๐ผ๏ธ AI Image Caption Generator") |
| st.write("Upload an image or paste a Google Image URL to get multiple captions generated in your preferred language!") |
|
|
| |
| languages = { |
| "English": "en", |
| "Urdu": "ur", |
| "Hindi": "hi", |
| "French": "fr", |
| "Spanish": "es", |
| "Arabic": "ar" |
| } |
|
|
| |
| upload_option = st.radio("Choose Image Input Method:", ("Upload from Computer", "Paste Image URL")) |
|
|
| uploaded_file = None |
| image_url = None |
|
|
| if upload_option == "Upload from Computer": |
| uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"]) |
| else: |
| image_url = st.text_input("Paste Image URL (must be direct link ending with .jpg/.png/.jpeg)") |
|
|
| |
| selected_language = st.selectbox("๐ Choose Output Language", list(languages.keys())) |
|
|
| |
| image = None |
|
|
| if uploaded_file: |
| image = Image.open(uploaded_file).convert('RGB') |
| elif image_url: |
| try: |
| response = requests.get(image_url) |
| if response.status_code == 200: |
| image = Image.open(BytesIO(response.content)).convert('RGB') |
| else: |
| st.error("Failed to fetch image. Please check the URL.") |
| except Exception as e: |
| st.error(f"Error fetching image: {e}") |
|
|
| if image: |
| st.image(image, caption="Selected Image", width=300) |
|
|
| with st.spinner("Generating captions... please wait โณ"): |
| |
| resized_image = image.resize((384, 384)) |
|
|
| processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base") |
| model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base") |
|
|
| inputs = processor(resized_image, return_tensors="pt") |
|
|
| |
| out = model.generate( |
| **inputs, |
| num_beams=5, |
| num_return_sequences=3, |
| max_length=50, |
| early_stopping=True |
| ) |
|
|
| captions = [processor.decode(o, skip_special_tokens=True) for o in out] |
|
|
| st.success("๐ Captions Generated Successfully!") |
|
|
| st.subheader(f"Here are the captions in {selected_language}:") |
|
|
| for idx, cap in enumerate(captions): |
| try: |
| translated_caption = GoogleTranslator(source='auto', target=languages[selected_language]).translate(cap) |
| except Exception as e: |
| translated_caption = f"(Translation Error: Showing English) {cap}" |
|
|
| st.text_area(f"โ๏ธ Caption {idx+1}", value=translated_caption, height=80) |
|
|
| st.caption("Tip: You can edit, copy, or download captions easily! โ๏ธ") |
| else: |
| st.info("Please upload an image or paste a valid URL to proceed.") |
|
|