FlashCode-Lab commited on
Commit
6307063
·
verified ·
1 Parent(s): ddd0536

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -55
app.py CHANGED
@@ -1,65 +1,52 @@
1
- import os
2
  import gradio as gr
3
  from huggingface_hub import InferenceClient
4
- from langchain_huggingface import HuggingFaceEmbeddings
5
- from langchain_community.vectorstores import FAISS
6
- from langchain_community.document_loaders import TextLoader
7
- from langchain_text_splitters import CharacterTextSplitter
8
 
9
- # 1. 初始化官方推理客户端 (直接绕过 LangChain 不兼容的 Endpoint)
10
- client = InferenceClient(
11
- model="Qwen/Qwen2.5-7B-Instruct",
12
- token=os.getenv("HF_TOKEN")
13
- )
14
 
15
- # 2. 知识库加载与向量化
16
- def init_vector_db():
17
- if not os.path.exists("knowledge.txt"):
18
- with open("knowledge.txt", "w", encoding="utf-8") as f:
19
- f.write("私有大脑已上线。")
20
-
21
- loader = TextLoader("knowledge.txt", encoding="utf-8")
22
- # 按照语义切分,防止回答断章取义
23
- text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
24
- docs = text_splitter.split_documents(loader.load())
25
-
26
- # 使用轻量级中文向量模型
27
- embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-zh-v1.5")
28
- return FAISS.from_documents(docs, embeddings)
29
 
30
- vector_db = init_vector_db()
31
 
32
- # 3. 核心聊天逻辑
33
- def chat_fn(message, history):
34
- try:
35
- # 第一步:在知识库中寻找最相关的片段
36
- docs = vector_db.similarity_search(message, k=3)
37
- context = "\n".join([doc.page_content for doc in docs])
38
-
39
- # 第二步:构建提示词(Prompt)
40
- prompt = f"你是全能私有大脑。请参考以下已知信息回答用户问题。\n\n已知信息:\n{context}\n\n问题:{message}\n回答:"
41
-
42
- # 第三步:使用官方最新方法进行推理
43
- response = ""
44
- for token in client.chat_completion(
45
- messages=[{"role": "user", "content": prompt}],
46
- max_tokens=500,
47
- stream=True
48
- ):
49
- token_str = token.choices[0].delta.content
50
- if token_str:
51
- response += token_str
52
- return response
 
 
 
 
 
 
53
 
54
- except Exception as e:
55
- return f"大脑响应异常,请尝试在 Settings 中 Factory Restart。错误详情: {str(e)}"
 
 
 
56
 
57
- # 4. 界面设计
58
- demo = gr.ChatInterface(
59
- chat_fn,
60
- title="全能私有大脑 v3.0 (终极稳定版)",
61
- description="已彻底解决 InferenceClient 兼容性问题。现在可以流畅调取私有知识库了。"
62
- )
63
 
64
  if __name__ == "__main__":
65
- demo.launch()
 
 
1
  import gradio as gr
2
  from huggingface_hub import InferenceClient
 
 
 
 
3
 
4
+ # 初始化客户端
5
+ client = InferenceClient("your-model-id")
 
 
 
6
 
7
+ def respond(message, history, system_message, max_tokens, temperature, top_p):
8
+ messages = [{"role": "system", "content": system_message}]
9
+ for val in history:
10
+ if val[0]: messages.append({"role": "user", "content": val[0]})
11
+ if val[1]: messages.append({"role": "assistant", "content": val[1]})
 
 
 
 
 
 
 
 
 
12
 
13
+ messages.append({"role": "user", "content": message})
14
 
15
+ response = ""
16
+ # 修复了旧版 InferenceClient 可能存在的迭代问题,确保流式输出顺滑
17
+ for message in client.chat_completion(
18
+ messages,
19
+ max_tokens=max_tokens,
20
+ stream=True,
21
+ temperature=temperature,
22
+ top_p=top_p,
23
+ ):
24
+ token = message.choices[0].delta.content
25
+ response += token
26
+ yield response
27
+
28
+ # 升级后的专业 UI 布局
29
+ with gr.Blocks(theme=gr.themes.Soft(), css=".gradio-container {background-color: #0b0f19;}") as demo:
30
+ gr.Markdown("# 全能私有大脑 v4.0 (Ultra Stable)")
31
+
32
+ with gr.Row():
33
+ # 左侧控制区
34
+ with gr.Column(scale=1):
35
+ system_input = gr.Textbox(value="你是一个资深安全审计专家...", label="系统指令")
36
+ with gr.Accordion("高级参数设置", open=False):
37
+ temp = gr.Slider(minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature")
38
+ tokens = gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max Tokens")
39
+
40
+ # 实时渲染预览区 (例如用于显示代码执行或漏洞拓扑)
41
+ render_box = gr.HTML("✨ 实时安全分析渲染就绪...")
42
 
43
+ # 右侧聊天区
44
+ with gr.Column(scale=2):
45
+ chat = gr.Chatbot(height=600, show_copy_button=True, avatar_images=(None, "https://path-to-your-icon.png"))
46
+ msg = gr.Textbox(placeholder="输入指令进行分析...", container=False)
47
+ clear = gr.ClearButton([msg, chat])
48
 
49
+ msg.submit(respond, [msg, chat, system_input, tokens, temp], [chat])
 
 
 
 
 
50
 
51
  if __name__ == "__main__":
52
+ demo.launch()