Spaces:
Sleeping
Sleeping
Update doc_preprocessing.py
Browse files- doc_preprocessing.py +2 -2
doc_preprocessing.py
CHANGED
|
@@ -6,8 +6,8 @@ import streamlit as st
|
|
| 6 |
import numpy as np
|
| 7 |
import os
|
| 8 |
|
| 9 |
-
# emb_model_ = "intfloat/multilingual-e5-large-instruct"
|
| 10 |
emb_model = "intfloat/multilingual-e5-base"
|
|
|
|
| 11 |
def extract_text(file):
|
| 12 |
text = ""
|
| 13 |
# Check if the input is a file path (string) or a file-like object
|
|
@@ -69,7 +69,7 @@ def get_embeddings(texts)-> np.ndarray:
|
|
| 69 |
st.error(f"Error generating embeddings: {e}")
|
| 70 |
return np.array([])
|
| 71 |
|
| 72 |
-
@st.
|
| 73 |
def process_files(files):
|
| 74 |
all_chunks = []
|
| 75 |
all_embeddings = []
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
import os
|
| 8 |
|
|
|
|
| 9 |
emb_model = "intfloat/multilingual-e5-base"
|
| 10 |
+
|
| 11 |
def extract_text(file):
|
| 12 |
text = ""
|
| 13 |
# Check if the input is a file path (string) or a file-like object
|
|
|
|
| 69 |
st.error(f"Error generating embeddings: {e}")
|
| 70 |
return np.array([])
|
| 71 |
|
| 72 |
+
@st.cache_ressource
|
| 73 |
def process_files(files):
|
| 74 |
all_chunks = []
|
| 75 |
all_embeddings = []
|