BankBot-AI / ollama_integration.py
mohsin-devs's picture
update
1652384
import os
import requests
import json
import time
# Auto-detect backend: Groq if API key is set, otherwise Ollama
GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
USE_GROQ = bool(GROQ_API_KEY)
BANKING_KEYWORDS = [
"account", "loan", "card", "balance",
"transfer", "bank", "interest", "emi",
"credit", "debit", "kyc", "upi", "cheque",
"deposit", "fd", "rd", "branch", "ifsc",
"transaction", "payment", "savings", "checking",
"mortgage", "investment", "fintech", "wallet",
"rate", "rates", "support", "customer", "care",
"help", "contact", "helpline", "number", "call",
"document", "required", "identity", "proof", "open"
]
SYSTEM_PROMPT = """You are BankBot, a professional banking assistant for Central Bank.
You ONLY answer banking-related questions. If the question is not related to banking, politely refuse.
Never answer questions about politics, sports, entertainment, coding, or personal advice.
CORE GUIDELINES:
1. ALWAYS communicate in {language}.
2. CONTEXT AWARENESS: Use the provided chat history to understand follow-up questions. For example, if the user asks "What is the interest rate?" and then "for home loan", you must understand they are asking about home loan interest rates.
3. CLARIFYING QUESTIONS: If a user's query is ambiguous (e.g., "how much?"), ask for missing details (e.g., "How much for what service? Balance check or loan EMI?").
4. CALCULATIONS: Perform financial calculations (EMI, Interest, Eligibility) if information is provided.
5. DOCUMENT ANALYSIS: If text from a PDF statement is provided, summarize it or answer specific questions about it.
6. PROFESSIONALISM: Maintain a helpful, formal, and secure tone."""
def is_banking_query(user_input):
input_lower = user_input.lower()
return any(word in input_lower for word in BANKING_KEYWORDS)
def get_active_backend():
"""Returns 'groq' if GROQ_API_KEY is set, otherwise 'ollama'."""
return "groq" if USE_GROQ else "ollama"
# ─── Groq AI Functions ────────────────────────────────────────────────────────
def get_groq_response(prompt, history=None, model="llama-3.3-70b-versatile", language="English"):
"""Fetches a response from Groq AI API."""
try:
from groq import Groq
client = Groq(api_key=GROQ_API_KEY)
sys_prompt = SYSTEM_PROMPT.format(language=language)
messages = [{"role": "system", "content": sys_prompt}]
if history:
for msg in history[-10:]:
if msg.get("role") and msg.get("content"):
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": prompt})
response = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1,
max_tokens=1000,
)
return response.choices[0].message.content
except Exception as e:
print(f"Groq Error: {e}")
return None
def stream_groq_response(prompt, history=None, model="llama-3.3-70b-versatile", language="English"):
"""Yields streamed response chunks from Groq AI API."""
try:
from groq import Groq
client = Groq(api_key=GROQ_API_KEY)
sys_prompt = SYSTEM_PROMPT.format(language=language)
messages = [{"role": "system", "content": sys_prompt}]
if history:
for msg in history[-10:]:
if msg.get("role") and msg.get("content"):
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": prompt})
stream = client.chat.completions.create(
model=model,
messages=messages,
temperature=0.1,
max_tokens=1000,
stream=True,
)
for chunk in stream:
content = chunk.choices[0].delta.content
if content:
yield content
except Exception as e:
print(f"Groq Stream Error: {e}")
yield None
# ─── Ollama Functions ─────────────────────────────────────────────────────────
def get_ollama_response(prompt, history=None, model="llama3:latest", language="English"):
"""Fetches a response from a local Ollama instance."""
url = "http://127.0.0.1:11434/api/chat"
sys_prompt = SYSTEM_PROMPT.format(language=language)
messages = [{"role": "system", "content": sys_prompt}]
if history:
for msg in history[-10:]:
if msg.get("role") and msg.get("content"):
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model,
"messages": messages,
"stream": False,
"options": {"temperature": 0.1, "top_p": 0.9, "num_predict": 1000}
}
try:
response = requests.post(url, json=payload, timeout=90)
response.raise_for_status()
data = response.json()
return data.get("message", {}).get("content", "")
except Exception as e:
print(f"Ollama Error: {e}")
if model == "llama3:latest":
return get_ollama_response(prompt, history, model="llama3")
return None
def stream_ollama_response(prompt, history=None, model="llama3:latest", language="English"):
"""Yields chunks of the response from a local Ollama instance for streaming."""
url = "http://127.0.0.1:11434/api/chat"
sys_prompt = SYSTEM_PROMPT.format(language=language)
messages = [{"role": "system", "content": sys_prompt}]
if history:
for msg in history[-10:]:
if msg.get("role") and msg.get("content"):
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": prompt})
payload = {
"model": model,
"messages": messages,
"stream": True,
"options": {"temperature": 0.1, "top_p": 0.9, "num_predict": 1000}
}
try:
response = requests.post(url, json=payload, timeout=90, stream=True)
response.raise_for_status()
for line in response.iter_lines():
if line:
chunk = json.loads(line)
if 'message' in chunk and 'content' in chunk['message']:
yield chunk['message']['content']
if chunk.get('done'):
break
except Exception as e:
print(f"Ollama Stream Error: {e}")
if model == "llama3:latest":
yield from stream_ollama_response(prompt, history, model="llama3")
else:
yield None
# ─── Unified Wrapper Functions ────────────────────────────────────────────────
def get_ai_response(prompt, history=None, language="English"):
"""Auto-selects Groq or Ollama based on environment."""
if USE_GROQ:
return get_groq_response(prompt, history, language=language)
return get_ollama_response(prompt, history, language=language)
def stream_ai_response(prompt, history=None, language="English"):
"""Auto-selects streaming from Groq or Ollama based on environment."""
if USE_GROQ:
yield from stream_groq_response(prompt, history, language=language)
else:
yield from stream_ollama_response(prompt, history, language=language)
def check_ollama_connection():
"""Checks if the local Ollama server is running."""
if USE_GROQ:
return True
try:
response = requests.get("http://127.0.0.1:11434/", timeout=2)
return response.status_code == 200
except:
return False