|
|
from langgraph.graph import StateGraph, START, END |
|
|
from typing import TypedDict, Annotated |
|
|
from langchain_groq import ChatGroq |
|
|
from langchain_openai import OpenAIEmbeddings, ChatOpenAI |
|
|
from langchain_core.messages import BaseMessage, HumanMessage, SystemMessage |
|
|
from langgraph.graph.message import add_messages |
|
|
from langchain_core.tools import tool |
|
|
from dotenv import load_dotenv |
|
|
from langgraph.checkpoint.memory import MemorySaver |
|
|
import os |
|
|
from langchain_community.vectorstores import FAISS |
|
|
from langchain_community.tools.tavily_search import TavilySearchResults |
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
FAISS_DB_PATH = "vectorstore/db_faiss" |
|
|
embeddings = OpenAIEmbeddings(model='text-embedding-3-small') |
|
|
|
|
|
|
|
|
db = None |
|
|
|
|
|
def reload_vector_store(): |
|
|
""" |
|
|
Reloads the FAISS index from disk. |
|
|
Call this function after a new file is ingested. |
|
|
""" |
|
|
global db |
|
|
if os.path.exists(FAISS_DB_PATH): |
|
|
print(f"Loading FAISS from {FAISS_DB_PATH}...") |
|
|
try: |
|
|
db = FAISS.load_local( |
|
|
FAISS_DB_PATH, |
|
|
embeddings, |
|
|
allow_dangerous_deserialization=True |
|
|
) |
|
|
print("Vector store loaded successfully.") |
|
|
except Exception as e: |
|
|
print(f"Error loading vector store: {e}") |
|
|
db = None |
|
|
else: |
|
|
print("Warning: No Vector DB found. Please run ingestion first.") |
|
|
db = None |
|
|
|
|
|
|
|
|
reload_vector_store() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Ragbot_State(TypedDict): |
|
|
query : str |
|
|
context : list[str] |
|
|
metadata : list[dict] |
|
|
RAG : bool |
|
|
web_search : bool |
|
|
model_name : str |
|
|
web_context : str |
|
|
response : Annotated[list[BaseMessage], add_messages] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
llm_kimi2 = ChatGroq(model='moonshotai/kimi-k2-instruct-0905', streaming=True, temperature=0.4) |
|
|
llm_gpt = ChatOpenAI(model='gpt-4.1-nano', streaming=True, temperature=0.2) |
|
|
llm_gpt_oss = ChatGroq(model='openai/gpt-oss-120b', streaming=True, temperature=0.3) |
|
|
llm_lamma4 = ChatGroq(model='meta-llama/llama-4-scout-17b-16e-instruct', streaming=True, temperature=0.5) |
|
|
llm_qwen3 = ChatGroq(model='qwen/qwen3-32b', streaming=True, temperature=0.5) |
|
|
|
|
|
def get_llm(model_name: str): |
|
|
if model_name == "kimi2": |
|
|
return llm_kimi2 |
|
|
elif model_name == "gpt": |
|
|
return llm_gpt |
|
|
elif model_name == "gpt_oss": |
|
|
return llm_gpt_oss |
|
|
elif model_name == "lamma4": |
|
|
return llm_lamma4 |
|
|
elif model_name == "qwen3": |
|
|
return llm_qwen3 |
|
|
else: |
|
|
return llm_gpt |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
|
def tavily_search(query: str) -> dict: |
|
|
""" |
|
|
Perform a real-time web search using Tavily. |
|
|
""" |
|
|
try: |
|
|
search = TavilySearchResults(max_results=2) |
|
|
results = search.run(query) |
|
|
return {"query": query, "results": results} |
|
|
except Exception as e: |
|
|
return {"error": str(e)} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_web_context(state: Ragbot_State): |
|
|
user_query = state["query"] |
|
|
|
|
|
enriched_query = f""" |
|
|
Fetch the latest, accurate, and up-to-date information about: |
|
|
{user_query} |
|
|
|
|
|
Focus on: |
|
|
- recent news |
|
|
- official announcements |
|
|
- verified sources |
|
|
- factual data |
|
|
""" |
|
|
|
|
|
web_result = tavily_search.run(enriched_query) |
|
|
|
|
|
return { |
|
|
"web_context": str(web_result) |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@tool |
|
|
def faiss_search(query: str) -> str: |
|
|
"""Search the FAISS vectorstore and return relevant documents.""" |
|
|
|
|
|
if db is None: |
|
|
return "No documents have been uploaded yet.", [] |
|
|
|
|
|
try: |
|
|
results = db.similarity_search(query, k=3) |
|
|
context = "\n\n".join([doc.page_content for doc in results]) |
|
|
metadata = [doc.metadata for doc in results] |
|
|
return context, metadata |
|
|
except Exception as e: |
|
|
return f"Error searching vector store: {str(e)}", [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def router(state: Ragbot_State): |
|
|
if state["RAG"]: |
|
|
return "fetch_context" |
|
|
|
|
|
if state["web_search"]: |
|
|
return "fetch_web_context" |
|
|
|
|
|
return "chat" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def fetch_context(state: Ragbot_State): |
|
|
query = state["query"] |
|
|
context, metadata = faiss_search.invoke({"query": query}) |
|
|
return {"context": [context], "metadata": [metadata]} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SYSTEM_PROMPT = SystemMessage( |
|
|
content=""" |
|
|
You are **Cortex AI**, an advanced multi-capability conversational and reasoning assistant created by Junaid. |
|
|
|
|
|
Cortex AI is designed to be: |
|
|
- Highly intelligent, reliable, and context-aware |
|
|
- Capable of natural human-like conversation as well as deep technical reasoning |
|
|
- Adaptive across multiple domains including AI, Machine Learning, Data Science, Software Engineering, and general knowledge |
|
|
|
|
|
You represent a next-generation AI system with the ability to: |
|
|
- Engage in friendly, natural, and professional conversations |
|
|
- Answer questions using your own knowledge when no external context is required |
|
|
- Leverage provided context accurately when available |
|
|
- Dynamically utilize web search when real-time or up-to-date information is needed |
|
|
- Utilize retrieval-based knowledge (RAG) when document or database context is provided |
|
|
- Seamlessly switch between casual chat, technical explanation, and problem-solving modes |
|
|
|
|
|
About your creator: |
|
|
You were built by **Junaid**, an AI & Machine Learning engineer and student specializing in Data Science, Machine Learning, Deep Learning, NLP, Computer Vision, and AI-driven systems. You reflect his focus on practical, production-ready AI solutions and high engineering standards. |
|
|
|
|
|
Core Behavioral Guidelines: |
|
|
|
|
|
1. **Accuracy First** |
|
|
- Always prioritize correctness over speed or verbosity. |
|
|
- If information is uncertain, incomplete, or unavailable, clearly state that. |
|
|
- Never hallucinate or fabricate facts. |
|
|
|
|
|
2. **Context-Aware Intelligence** |
|
|
- If relevant context is provided, treat it as the primary source of truth. |
|
|
- If context is not relevant or not provided, rely on your general knowledge. |
|
|
- Do not mix unrelated context into answers. |
|
|
|
|
|
3. **Adaptive Intelligence** |
|
|
- If web search is enabled, use it for real-time, current, or dynamic information. |
|
|
- If retrieval (RAG) is enabled, use it for document-based or knowledge-base questions. |
|
|
- If neither is enabled or required, respond directly using your internal knowledge. |
|
|
|
|
|
4. **Natural & Professional Communication** |
|
|
- Maintain a clear, human-like, and engaging conversational tone. |
|
|
- Be concise where possible, detailed where necessary. |
|
|
- Avoid robotic, overly verbose, or overly casual language. |
|
|
|
|
|
5. **Multi-Tasking Excellence** |
|
|
- Handle technical explanations, coding help, architectural guidance, reasoning tasks, and casual conversation equally well. |
|
|
- Break down complex concepts into simple, understandable explanations when needed. |
|
|
|
|
|
6. **No Internal Exposure** |
|
|
- Never mention internal implementation details such as embeddings, vector stores, pipelines, system architecture, or model orchestration. |
|
|
- Focus only on delivering the best possible user-facing response. |
|
|
|
|
|
7. **User-Centric Approach** |
|
|
- Be helpful, supportive, and solution-oriented. |
|
|
- Proactively guide the user when appropriate. |
|
|
- Align responses with the user’s skill level and intent. |
|
|
|
|
|
You are not just a chatbot. |
|
|
You are **Cortex AI** — a powerful, intelligent, and reliable AI assistant built to deliver high-quality, real-world value. |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def chat(state:Ragbot_State): |
|
|
query = state['query'] |
|
|
context = state['context'] |
|
|
metadata = state['metadata'] |
|
|
web_context = state['web_context'] |
|
|
model_name = state.get('model_name', 'gpt') |
|
|
|
|
|
history = state.get("response", []) |
|
|
|
|
|
|
|
|
prompt = f""" |
|
|
You are an expert assistant designed to answer user questions using multiple information sources. |
|
|
|
|
|
Source Priority Rules (STRICT): |
|
|
1. **Conversation History**: Check if the answer was provided in previous messages (e.g., user's name, previous topics). |
|
|
2. If the provided Context contains the answer, use ONLY the Context. |
|
|
3. If the Context does not contain the answer and Web Context is available, use the Web Context. |
|
|
4. If neither Context nor Web Context contains the answer, use your general knowledge. |
|
|
5. Do NOT invent or hallucinate facts. |
|
|
6. If the answer cannot be determined, clearly say so. |
|
|
|
|
|
User Question: |
|
|
{query} |
|
|
|
|
|
Retrieved Context (Vector Database): |
|
|
{context} |
|
|
|
|
|
Metadata: |
|
|
{metadata} |
|
|
|
|
|
Web Context (Real-time Search): |
|
|
{web_context} |
|
|
|
|
|
Final Answer: |
|
|
""" |
|
|
|
|
|
selected_llm = get_llm(model_name) |
|
|
messages = [SYSTEM_PROMPT] + history + [HumanMessage(content=prompt)] |
|
|
response = selected_llm.invoke(messages) |
|
|
return { |
|
|
'response': [ |
|
|
HumanMessage(content=query), |
|
|
response |
|
|
] |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
memory = MemorySaver() |
|
|
graph = StateGraph(Ragbot_State) |
|
|
|
|
|
graph.add_node("fetch_context", fetch_context) |
|
|
graph.add_node("fetch_web_context", fetch_web_context) |
|
|
graph.add_node("chat", chat) |
|
|
|
|
|
graph.add_conditional_edges( |
|
|
START, |
|
|
router, |
|
|
{ |
|
|
"fetch_context": "fetch_context", |
|
|
"fetch_web_context": "fetch_web_context", |
|
|
"chat": "chat" |
|
|
} |
|
|
) |
|
|
|
|
|
graph.add_edge("fetch_context", "chat") |
|
|
graph.add_edge("fetch_web_context", "chat") |
|
|
graph.add_edge("chat", END) |
|
|
|
|
|
app = graph.compile(checkpointer=memory) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ask_bot(query: str, use_rag: bool = False, use_web: bool = False, thread_id: str = "1"): |
|
|
config = {"configurable": {"thread_id": thread_id}} |
|
|
inputs = { |
|
|
"query": query, |
|
|
"RAG": use_rag, |
|
|
"web_search": use_web, |
|
|
"context": [], |
|
|
"metadata": [], |
|
|
"web_context": "", |
|
|
} |
|
|
|
|
|
result = app.invoke(inputs, config=config) |
|
|
last_message = result['response'][-1] |
|
|
|
|
|
return last_message.content |
|
|
|
|
|
|
|
|
"""print("--- Conversation 1 ---") |
|
|
# User says hello and gives name |
|
|
response = ask_bot("Hi, my name is Junaid", thread_id="session_A") |
|
|
print(f"Bot: {response}") |
|
|
|
|
|
# User asks for name (RAG and Web are OFF) |
|
|
response = ask_bot("What is my name?", thread_id="session_A") |
|
|
print(f"Bot: {response}")""" |