FlashCode-Lab commited on
Commit
da656c9
·
verified ·
1 Parent(s): f026bba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -41
app.py CHANGED
@@ -4,60 +4,38 @@ from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
- from langchain.chains import RetrievalQA
 
8
 
9
- # 1. 链接顶级大脑 Qwen 2.5
10
- # 系统会自动读取你刚才设置的 Secret: HF_TOKEN
11
  llm = HuggingFaceEndpoint(
12
  repo_id="Qwen/Qwen2.5-7B-Instruct",
13
- huggingfacehub_api_token=os.getenv("HF_TOKEN"),
14
- timeout=300
15
  )
16
 
17
- # 2. 初始化与加载私有知识库
18
- def init_knowledge_base():
19
- # 如果没有 knowledge.txt,先创建一个初始模版
20
- if not os.path.exists("knowledge.txt"):
21
- with open("knowledge.txt", "w", encoding="utf-8") as f:
22
- f.write("大脑初始化成功。请在 knowledge.txt 文件中输入你的私有知识。")
23
-
24
- # 读取知识库
25
- loader = TextLoader("knowledge.txt", encoding="utf-8")
26
- documents = loader.load()
27
-
28
- # 文本切分:让 AI 更好找重点
29
- text_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=100)
30
- docs = text_splitter.split_documents(documents)
31
-
32
- # 向量化处理:将文字坐标化 (使用中文优化模型)
33
- embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
34
- vectorstore = FAISS.from_documents(docs, embeddings)
35
- return vectorstore
36
 
37
- # 启动时先加载一次知识库
38
- vectorstore = init_knowledge_base()
 
 
 
 
 
39
  qa_chain = RetrievalQA.from_chain_type(
40
  llm=llm,
41
  retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
42
  )
43
 
44
- # 3. 聊天交互逻辑
45
- def predict(message, history):
46
  try:
47
- # AI 会先搜索 knowledge.txt,再结合大模型给出回答
48
  response = qa_chain.invoke({"query": message})
49
  return response["result"]
50
  except Exception as e:
51
- return f"大脑思考时遇到了一点问题,请检查 Token 或网络详情: {str(e)}"
52
-
53
- # 4. 搭建前端界面
54
- demo = gr.ChatInterface(
55
- predict,
56
- title="我的全能私有大脑",
57
- description="我已经读取了你的知识库。你可以问我关于你自己的事,也可以和我聊任何话题。",
58
- examples=["你是谁?", "解释一下知识库里的核心内容", "帮我写一个 Python 脚本"],
59
- theme="soft"
60
- )
61
 
62
- if __name__ == "__main__":
63
- demo.launch()
 
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
+ # 核心修正:使用新的导入路径
8
+ from langchain.chains.retrieval_qa.base import RetrievalQA
9
 
10
+ # 1. 初始化引擎
 
11
  llm = HuggingFaceEndpoint(
12
  repo_id="Qwen/Qwen2.5-7B-Instruct",
13
+ huggingfacehub_api_token=os.getenv("HF_TOKEN")
 
14
  )
15
 
16
+ # 2. 检查并加载知识库
17
+ if not os.path.exists("knowledge.txt"):
18
+ with open("knowledge.txt", "w", encoding="utf-8") as f:
19
+ f.write("私有大脑已上线。")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ loader = TextLoader("knowledge.txt", encoding="utf-8")
22
+ docs = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(loader.load())
23
+ # 使用中文友化的向量模型
24
+ embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
25
+ vectorstore = FAISS.from_documents(docs, embeddings)
26
+
27
+ # 3. 创建问答链条
28
  qa_chain = RetrievalQA.from_chain_type(
29
  llm=llm,
30
  retriever=vectorstore.as_retriever(search_kwargs={"k": 3})
31
  )
32
 
33
+ # 4. 聊天函数
34
+ def chat(message, history):
35
  try:
 
36
  response = qa_chain.invoke({"query": message})
37
  return response["result"]
38
  except Exception as e:
39
+ return f"大脑响应异常,请检查 Token。错误原因: {str(e)}"
 
 
 
 
 
 
 
 
 
40
 
41
+ gr.ChatInterface(chat, title="全能私有大脑 v2.0").launch()