FlashCode-Lab commited on
Commit
c19023b
·
verified ·
1 Parent(s): 3b72c67

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -0
app.py ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_community.document_loaders import PyPDFLoader, TextLoader, Docx2txtLoader
6
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
7
+ from langchain.chains import RetrievalQA
8
+
9
+ # 1. 接入顶级大脑 API
10
+ llm = HuggingFaceEndpoint(
11
+ repo_id="Qwen/Qwen2.5-7B-Instruct",
12
+ huggingfacehub_api_token=os.getenv("HF_TOKEN")
13
+ )
14
+
15
+ # 2. 向量化模型(负责把文字变成AI能懂的坐标)
16
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
17
+
18
+ def process_files(files):
19
+ all_docs = []
20
+ for file in files:
21
+ # 根据文件后缀选择加载器
22
+ if file.name.endswith('.pdf'):
23
+ loader = PyPDFLoader(file.name)
24
+ elif file.name.endswith('.docx'):
25
+ loader = Docx2txtLoader(file.name)
26
+ else:
27
+ loader = TextLoader(file.name)
28
+ all_docs.extend(loader.load())
29
+
30
+ # 3. 智能切片(防止文档太长 AI 记不住)
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=600, chunk_overlap=100)
32
+ split_docs = text_splitter.split_documents(all_docs)
33
+
34
+ # 4. 存入临时向量库
35
+ vectorstore = FAISS.from_documents(split_docs, embeddings)
36
+ return vectorstore
37
+
38
+ # 5. 问答函数
39
+ def predict(message, history, file_output):
40
+ if not file_output:
41
+ return "请先上传 PDF/Word/TXT 文档,我才能开启私有大脑模式。"
42
+
43
+ # 处理文件并建立检索链
44
+ vectorstore = process_files(file_output)
45
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
46
+
47
+ response = qa_chain.invoke(message)
48
+ return response["result"]
49
+
50
+ # 6. 界面设计
51
+ demo = gr.ChatInterface(
52
+ predict,
53
+ additional_inputs=[gr.File(file_count="multiple", label="上传私有文档 (.pdf, .docx, .txt)")],
54
+ title="我的全能私有大脑",
55
+ description="上传文件后,我可以基于文档内容回答任何专业问题。"
56
+ )
57
+
58
+ demo.launch()