| import langchain_community,langchain |
| from langchain_community.llms import LlamaCpp |
| from llama_cpp import Llama |
| from langchain.schema import AIMessage, HumanMessage, SystemMessage |
| import gradio as gr |
|
|
| |
| space_model_path = "./model/llama-3.2-1b-instruct-q8_0.gguf" |
| model_path = "hugging-quants/Llama-3.2-1B-Instruct-Q8_0-GGUF" |
| file_name = "llama-3.2-1b-instruct-q8_0.gguf" |
| Llama.from_pretrained(repo_id = model_path, filename=file_name, local_dir="./model") |
| system_message = "You are a helpful assistant who acts like a pirate." |
| llm = LlamaCpp( |
| model_path=space_model_path, |
| temperature=0.8, |
| max_tokens=250, |
| top_p=0.6, |
| verbose=True |
| ) |
|
|
|
|
| def stream_response(message, history): |
| print(f"Input: {message}. History: {history}\n") |
|
|
| history_langchain_format = [] |
| history_langchain_format.append(SystemMessage(content=system_message)) |
|
|
| for human, ai in history: |
| history_langchain_format.append(HumanMessage(content=human)) |
| history_langchain_format.append(AIMessage(content=ai)) |
|
|
| if message is not None: |
| history_langchain_format.append(HumanMessage(content=message)) |
| partial_message = "" |
| for response in llm.stream(history_langchain_format): |
| partial_message += response |
| yield partial_message |
|
|
|
|
| demo_interface = gr.ChatInterface( |
|
|
| stream_response, |
| textbox=gr.Textbox(placeholder="Send to the LLM...", |
| container=False, |
| autoscroll=True, |
| scale=7), |
| ) |
|
|
| demo_interface.launch(share=False, debug=True) |