Spaces:

FlashCode-Lab
/

super-ai-brain

Running

App Files Files Community

FlashCode-Lab commited on 12 days ago

Commit

ddd0536

verified ·

1 Parent(s): 2901b24

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -34

app.py CHANGED Viewed

@@ -1,59 +1,64 @@
 import os
 import gradio as gr
-from langchain_huggingface import HuggingFaceEndpoint, HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import CharacterTextSplitter
-from langchain.chains import RetrievalQA
-# 1. 配置大模型 - 换一种更稳健的连接方式
-# 我们明确指定使用异步/同步通用的传输协议
-llm = HuggingFaceEndpoint(
-    repo_id="Qwen/Qwen2.5-7B-Instruct",
-    huggingfacehub_api_token=os.getenv("HF_TOKEN"),
-    timeout=300,
-    task="text-generation" # 明确任务类型
 )
-# 2. 知识库加载逻辑
-def load_kb():
     if not os.path.exists("knowledge.txt"):
         with open("knowledge.txt", "w", encoding="utf-8") as f:
-            f.write("私有大脑知识库已就绪。")
     loader = TextLoader("knowledge.txt", encoding="utf-8")
-    docs = CharacterTextSplitter(chunk_size=500, chunk_overlap=50).split_documents(loader.load())
-    # 使用中文优化的 Embedding 模型
     embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
-    vectorstore = FAISS.from_documents(docs, embeddings)
-    return vectorstore
-# 初始化问答链
-vs = load_kb()
-# 注意：这里我们使用最新的 invoke 接口
-qa_chain = RetrievalQA.from_chain_type(
-    llm=llm,
-    retriever=vs.as_retriever(search_kwargs={"k": 3})
-)
-# 3. 聊天处理函数
 def chat_fn(message, history):
     try:
-        # 使用 invoke 替代旧的直接调用，解决 InferenceClient 兼容性
-        result = qa_chain.invoke({"query": message})
-        return result["result"]
     except Exception as e:
-        # 如果 Token 权限有问题，给出清晰提示
-        if "401" in str(e):
-            return "错误：Token 无效或权限不足，请检查 Settings 里的 HF_TOKEN。"
-        return f"大脑响应异常：{str(e)}"
-# 4. 构建前端界面
 demo = gr.ChatInterface(
     chat_fn,
-    title="全能私有大脑 v2.5",
-    description="针对最新 API 进行了深度优化，现在可以正常调取知识库了。"
 )
 if __name__ == "__main__":

 import os
 import gradio as gr
+from huggingface_hub import InferenceClient
+from langchain_huggingface import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.document_loaders import TextLoader
 from langchain_text_splitters import CharacterTextSplitter
+# 1. 初始化官方推理客户端 (直接绕过 LangChain 不兼容的 Endpoint)
+client = InferenceClient(
+    model="Qwen/Qwen2.5-7B-Instruct",
+    token=os.getenv("HF_TOKEN")
 )
+# 2. 知识库加载与向量化
+def init_vector_db():
     if not os.path.exists("knowledge.txt"):
         with open("knowledge.txt", "w", encoding="utf-8") as f:
+            f.write("私有大脑已上线。")
     loader = TextLoader("knowledge.txt", encoding="utf-8")
+    # 按照语义切分，防止回答断章取义
+    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
+    docs = text_splitter.split_documents(loader.load())
+    # 使用轻量级中文向量模型
     embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
+    return FAISS.from_documents(docs, embeddings)
+vector_db = init_vector_db()
+# 3. 核心聊天逻辑
 def chat_fn(message, history):
     try:
+        # 第一步：在知识库中寻找最相关的片段
+        docs = vector_db.similarity_search(message, k=3)
+        context = "\n".join([doc.page_content for doc in docs])
+        # 第二步：构建提示词（Prompt）
+        prompt = f"你是全能私有大脑。请参考以下已知信息回答用户问题。\n\n已知信息：\n{context}\n\n问题：{message}\n回答："
+        # 第三步：使用官方最新方法进行推理
+        response = ""
+        for token in client.chat_completion(
+            messages=[{"role": "user", "content": prompt}],
+            max_tokens=500,
+            stream=True
+        ):
+            token_str = token.choices[0].delta.content
+            if token_str:
+                response += token_str
+        return response
     except Exception as e:
+        return f"大脑响应异常，请尝试在 Settings 中 Factory Restart。错误详情: {str(e)}"
+# 4. 界面设计
 demo = gr.ChatInterface(
     chat_fn,
+    title="全能私有大脑 v3.0 (终极稳定版)",
+    description="已彻底解决 InferenceClient 兼容性问题。现在可以流畅调取私有知识库了。"
 )
 if __name__ == "__main__":