FlashCode-Lab commited on
Commit
ddd0536
·
verified ·
1 Parent(s): 2901b24

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -34
app.py CHANGED
@@ -1,59 +1,64 @@
1
  import os
2
  import gradio as gr
3
- from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 
4
  from langchain_community.vectorstores import FAISS
5
  from langchain_community.document_loaders import TextLoader
6
  from langchain_text_splitters import CharacterTextSplitter
7
- from langchain.chains import RetrievalQA
8
 
9
- # 1. 配置大模型 - 换一种更稳健连接方式
10
- # 我们明确指定使用异步/同步通用的传输协议
11
- llm = HuggingFaceEndpoint(
12
- repo_id="Qwen/Qwen2.5-7B-Instruct",
13
- huggingfacehub_api_token=os.getenv("HF_TOKEN"),
14
- timeout=300,
15
- task="text-generation" # 明确任务类型
16
  )
17
 
18
- # 2. 知识库加载逻辑
19
- def load_kb():
20
  if not os.path.exists("knowledge.txt"):
21
  with open("knowledge.txt", "w", encoding="utf-8") as f:
22
- f.write("私有大脑知识库就绪。")
23
 
24
  loader = TextLoader("knowledge.txt", encoding="utf-8")
25
- docs = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(loader.load())
 
 
26
 
27
- # 使用中文优化的 Embedding 模型
28
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
29
- vectorstore = FAISS.from_documents(docs, embeddings)
30
- return vectorstore
31
 
32
- # 初始化问答链
33
- vs = load_kb()
34
- # 注意:这里我们使用最新的 invoke 接口
35
- qa_chain = RetrievalQA.from_chain_type(
36
- llm=llm,
37
- retriever=vs.as_retriever(search_kwargs={"k": 3})
38
- )
39
 
40
- # 3. 聊天处理函数
41
  def chat_fn(message, history):
42
  try:
43
- # 使用 invoke 替代旧直接调用,解决 InferenceClient 兼容性
44
- result = qa_chain.invoke({"query": message})
45
- return result["result"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  except Exception as e:
47
- # 如果 Token 权限有问题,给出清晰提示
48
- if "401" in str(e):
49
- return "错误:Token 无效或权限不足,请检查 Settings 里的 HF_TOKEN。"
50
- return f"大脑响应异常:{str(e)}"
51
 
52
- # 4. 构建前端界面
53
  demo = gr.ChatInterface(
54
  chat_fn,
55
- title="全能私有大脑 v2.5",
56
- description="针对最新 API 进行了深度优化,现在可以正常调取知识库了。"
57
  )
58
 
59
  if __name__ == "__main__":
 
1
  import os
2
  import gradio as gr
3
+ from huggingface_hub import InferenceClient
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain_community.vectorstores import FAISS
6
  from langchain_community.document_loaders import TextLoader
7
  from langchain_text_splitters import CharacterTextSplitter
 
8
 
9
+ # 1. 初始化官方推理客户端 (直接绕过 LangChain 不兼容 Endpoint)
10
+ client = InferenceClient(
11
+ model="Qwen/Qwen2.5-7B-Instruct",
12
+ token=os.getenv("HF_TOKEN")
 
 
 
13
  )
14
 
15
+ # 2. 知识库加载与向量化
16
+ def init_vector_db():
17
  if not os.path.exists("knowledge.txt"):
18
  with open("knowledge.txt", "w", encoding="utf-8") as f:
19
+ f.write("私有大脑已上线。")
20
 
21
  loader = TextLoader("knowledge.txt", encoding="utf-8")
22
+ # 按照语义切分,防止回答断章取义
23
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
24
+ docs = text_splitter.split_documents(loader.load())
25
 
26
+ # 使用轻量级中文向量模型
27
  embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
28
+ return FAISS.from_documents(docs, embeddings)
 
29
 
30
+ vector_db = init_vector_db()
 
 
 
 
 
 
31
 
32
+ # 3. 核心聊天逻辑
33
  def chat_fn(message, history):
34
  try:
35
+ # 第一步:在知识库中寻找最相关片段
36
+ docs = vector_db.similarity_search(message, k=3)
37
+ context = "\n".join([doc.page_content for doc in docs])
38
+
39
+ # 第二步:构建提示词(Prompt)
40
+ prompt = f"你是全能私有大脑。请参考以下已知信息回答用户问题。\n\n已知信息:\n{context}\n\n问题:{message}\n回答:"
41
+
42
+ # 第三步:使用官方最新方法进行推理
43
+ response = ""
44
+ for token in client.chat_completion(
45
+ messages=[{"role": "user", "content": prompt}],
46
+ max_tokens=500,
47
+ stream=True
48
+ ):
49
+ token_str = token.choices[0].delta.content
50
+ if token_str:
51
+ response += token_str
52
+ return response
53
+
54
  except Exception as e:
55
+ return f"大脑响应异常,请尝试在 Settings 中 Factory Restart。错误详情: {str(e)}"
 
 
 
56
 
57
+ # 4. 界面设计
58
  demo = gr.ChatInterface(
59
  chat_fn,
60
+ title="全能私有大脑 v3.0 (终极稳定版)",
61
+ description="已彻底解决 InferenceClient 兼容性问题。现在可以流畅调取私有知识库了。"
62
  )
63
 
64
  if __name__ == "__main__":