FlashCode-Lab commited on
Commit
4971129
·
verified ·
1 Parent(s): 4fb210a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -21
app.py CHANGED
@@ -4,34 +4,60 @@ from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
- # 修改这里:使用更稳定的导入路径
8
- from langchain.chains.retrieval_qa.base import RetrievalQA
9
 
10
- # 1. 引擎初始化
 
11
  llm = HuggingFaceEndpoint(
12
  repo_id="Qwen/Qwen2.5-7B-Instruct",
13
- huggingfacehub_api_token=os.getenv("HF_TOKEN")
 
14
  )
15
 
16
- # 2. 知识库加载
17
- if not os.path.exists("knowledge.txt"):
18
- with open("knowledge.txt", "w", encoding="utf-8") as f:
19
- f.write("这里是你的私有大脑知识库。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- loader = TextLoader("knowledge.txt", encoding="utf-8")
22
- docs = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(loader.load())
23
- embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
24
- vectorstore = FAISS.from_documents(docs, embeddings)
25
-
26
- # 3. 构建问答链
27
- qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vectorstore.as_retriever())
28
 
29
- # 4. 界面逻辑
30
- def chat(msg, history):
31
  try:
32
- res = qa_chain.invoke({"query": msg})
33
- return res["result"]
 
34
  except Exception as e:
35
- return f"运行出错:{str(e)}"
 
 
 
 
 
 
 
 
 
36
 
37
- gr.ChatInterface(chat).launch()
 
 
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
+ from langchain.chains import RetrievalQA
 
8
 
9
+ # 1. 链接顶级大脑 Qwen 2.5
10
+ # 系统会自动读取你刚才设置的 Secret: HF_TOKEN
11
  llm = HuggingFaceEndpoint(
12
  repo_id="Qwen/Qwen2.5-7B-Instruct",
13
+ huggingfacehub_api_token=os.getenv("HF_TOKEN"),
14
+ timeout=300
15
  )
16
 
17
+ # 2. 初始化与加载私有知识库
18
+ def init_knowledge_base():
19
+ # 如果没有 knowledge.txt,先创建一个初始模版
20
+ if not os.path.exists("knowledge.txt"):
21
+ with open("knowledge.txt", "w", encoding="utf-8") as f:
22
+ f.write("大脑初始化成功。请在 knowledge.txt 文件中输入你的私有知识。")
23
+
24
+ # 读取知识库
25
+ loader = TextLoader("knowledge.txt", encoding="utf-8")
26
+ documents = loader.load()
27
+
28
+ # 文本切分:让 AI 更好找重点
29
+ text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=100)
30
+ docs = text_splitter.split_documents(documents)
31
+
32
+ # 向量化处理:将文字坐标化 (使用中文优化模型)
33
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
34
+ vectorstore = FAISS.from_documents(docs, embeddings)
35
+ return vectorstore
36
 
37
+ # 启动时先加载一次知识库
38
+ vectorstore = init_knowledge_base()
39
+ qa_chain = RetrievalQA.from_chain_type(
40
+ llm=llm,
41
+ retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
42
+ )
 
43
 
44
+ # 3. 聊天交互逻辑
45
+ def predict(message, history):
46
  try:
47
+ # AI 会先搜索 knowledge.txt,再结合大模型给出回答
48
+ response = qa_chain.invoke({"query": message})
49
+ return response["result"]
50
  except Exception as e:
51
+ return f"大脑思考时遇到了一点问题,请检查 Token 或网络。详情: {str(e)}"
52
+
53
+ # 4. 搭建前端界面
54
+ demo = gr.ChatInterface(
55
+ predict,
56
+ title="我的全能私有大脑",
57
+ description="我已经读取了你的知识库。你可以问我关于你自己的事,也可以和我聊任何话题。",
58
+ examples=["你是谁?", "解释一下知识库里的核心内容", "帮我写一个 Python 脚本"],
59
+ theme="soft"
60
+ )
61
 
62
+ if __name__ == "__main__":
63
+ demo.launch()