Dua Rajper commited on
Commit
09c368a
·
verified ·
1 Parent(s): fd833df

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -0
app.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import easyocr
4
+ from transformers import pipeline, AutoTokenizer, AutoModel
5
+
6
+ # Load CLIP model
7
+ @st.cache_resource
8
+ def load_clip_model():
9
+ pipe = pipeline("feature-extraction", model="fxmarty/clip-vision-model-tiny")
10
+ tokenizer = AutoTokenizer.from_pretrained("fxmarty/clip-vision-model-tiny")
11
+ model = AutoModel.from_pretrained("fxmarty/clip-vision-model-tiny")
12
+ return pipe, tokenizer, model
13
+
14
+ pipe, tokenizer, model = load_clip_model()
15
+
16
+ # Initialize OCR
17
+ @st.cache_resource
18
+ def load_ocr():
19
+ return easyocr.Reader(['en'])
20
+
21
+ reader = load_ocr()
22
+
23
+ # Streamlit App
24
+ st.title("🖼️ Multimodal AI Assistant")
25
+ st.write("Upload an image and ask a question about it!")
26
+
27
+ # Upload image
28
+ uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
29
+
30
+ if uploaded_file is not None:
31
+ # Display Image
32
+ image = Image.open(uploaded_file)
33
+ st.image(image, caption="Uploaded Image", use_column_width=True)
34
+
35
+ # Extract text using OCR
36
+ with st.spinner("Extracting text from image..."):
37
+ extracted_text = reader.readtext(uploaded_file, detail=0)
38
+
39
+ st.write("### 📝 Extracted Text:", extracted_text)
40
+
41
+ # User asks a question
42
+ user_question = st.text_input("🤖 Ask a question about the image:")
43
+
44
+ if user_question:
45
+ with st.spinner("Analyzing image and question..."):
46
+ inputs = tokenizer(user_question, return_tensors="pt")
47
+ outputs = model(**inputs)
48
+
49
+ st.write("### 🏆 AI Response:")
50
+ st.write("CLIP Model Processed the Input! (Further improvements coming soon)")