Dua Rajper commited on
Commit
6828d65
Β·
verified Β·
1 Parent(s): 66544f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -1,50 +1,58 @@
1
  import streamlit as st
2
  from PIL import Image
3
- import easyocr
4
  import torch
5
- from transformers import CLIPProcessor, CLIPModel, pipeline
 
6
 
7
- # Load CLIP Model & Processor
8
  @st.cache_resource
9
  def load_clip_model():
10
- model = CLIPModel.from_pretrained("fxmarty/clip-vision-model-tiny")
 
 
 
11
  processor = CLIPProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
12
  return model, processor
13
 
14
  model, processor = load_clip_model()
15
 
16
- # Initialize OCR
17
  @st.cache_resource
18
  def load_ocr():
19
  return easyocr.Reader(['en'])
20
 
21
  reader = load_ocr()
22
 
23
- # Streamlit App
 
24
  st.title("πŸ–ΌοΈ Multimodal AI Assistant")
25
  st.write("Upload an image and ask a question about it!")
26
 
27
- # Upload image
28
- uploaded_file = st.file_uploader("Upload an image", type=["jpg", "png", "jpeg"])
29
 
30
  if uploaded_file is not None:
31
  # Display Image
32
  image = Image.open(uploaded_file)
33
  st.image(image, caption="Uploaded Image", use_column_width=True)
34
 
35
- # Extract text using OCR
36
- with st.spinner("Extracting text from image..."):
37
  extracted_text = reader.readtext(uploaded_file, detail=0)
38
-
39
- st.write("### πŸ“ Extracted Text:", extracted_text)
40
-
41
- # User asks a question
 
 
 
 
42
  user_question = st.text_input("πŸ€– Ask a question about the image:")
43
 
44
  if user_question:
45
- with st.spinner("Analyzing image and question..."):
46
  inputs = processor(text=[user_question], images=image, return_tensors="pt")
47
  outputs = model.get_image_features(**inputs)
48
 
49
  st.write("### πŸ† AI Response:")
50
- st.write("CLIP Model Processed the Image! (Further improvements coming soon)")
 
1
  import streamlit as st
2
  from PIL import Image
 
3
  import torch
4
+ import easyocr
5
+ from transformers import CLIPProcessor, CLIPModel
6
 
7
+ # ---- Load CLIP Model ---- #
8
  @st.cache_resource
9
  def load_clip_model():
10
+ model = CLIPModel.from_pretrained(
11
+ "fxmarty/clip-vision-model-tiny",
12
+ ignore_mismatched_sizes=True # Fix model size mismatch
13
+ )
14
  processor = CLIPProcessor.from_pretrained("fxmarty/clip-vision-model-tiny")
15
  return model, processor
16
 
17
  model, processor = load_clip_model()
18
 
19
+ # ---- Load OCR (EasyOCR) ---- #
20
  @st.cache_resource
21
  def load_ocr():
22
  return easyocr.Reader(['en'])
23
 
24
  reader = load_ocr()
25
 
26
+ # ---- Streamlit UI ---- #
27
+ st.set_page_config(page_title="Multimodal AI Assistant", layout="wide")
28
  st.title("πŸ–ΌοΈ Multimodal AI Assistant")
29
  st.write("Upload an image and ask a question about it!")
30
 
31
+ # ---- Upload Image ---- #
32
+ uploaded_file = st.file_uploader("πŸ“€ Upload an image", type=["jpg", "png", "jpeg"])
33
 
34
  if uploaded_file is not None:
35
  # Display Image
36
  image = Image.open(uploaded_file)
37
  st.image(image, caption="Uploaded Image", use_column_width=True)
38
 
39
+ # Extract Text using OCR
40
+ with st.spinner("πŸ” Extracting text from image..."):
41
  extracted_text = reader.readtext(uploaded_file, detail=0)
42
+
43
+ st.write("### πŸ“ Extracted Text:")
44
+ if extracted_text:
45
+ st.success(extracted_text)
46
+ else:
47
+ st.warning("No readable text found in the image.")
48
+
49
+ # ---- Ask a Question About the Image ---- #
50
  user_question = st.text_input("πŸ€– Ask a question about the image:")
51
 
52
  if user_question:
53
+ with st.spinner("πŸ” Analyzing image and generating response..."):
54
  inputs = processor(text=[user_question], images=image, return_tensors="pt")
55
  outputs = model.get_image_features(**inputs)
56
 
57
  st.write("### πŸ† AI Response:")
58
+ st.write("CLIP Model has processed the image! (Further improvements coming soon)")