SyedHasanCronosPMC's picture
Update app.py
f41e964 verified
"""
CronosPMC HR Enterprise Chatbot β€” Hugging Face Space deployment
================================================================
A production RAG chatbot grounded in 4 CronosPMC HR policy PDFs.
- LlamaIndex for retrieval
- Pinecone for the vector store (built once, reused across sessions)
- Gradio for the branded web UI
- OpenAI gpt-4o-mini + text-embedding-3-large
Required HF Space secrets:
OPENAI_API_KEY β€” OpenAI API key
PINECONE_API_KEY β€” Pinecone API key (free tier OK)
"""
import os
import time
import logging
import gradio as gr
from llama_index.core import (
Settings, VectorStoreIndex, SimpleDirectoryReader, StorageContext,
)
from llama_index.readers.file import PDFReader
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.vector_stores.pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
log = logging.getLogger("cronospmc")
# ----------------------------------------------------------------------
# 1) Configuration β€” read secrets from HF Space environment
# ----------------------------------------------------------------------
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY")
if not OPENAI_API_KEY:
raise RuntimeError("Missing OPENAI_API_KEY β€” add it under Space Settings β†’ Secrets.")
if not PINECONE_API_KEY:
raise RuntimeError("Missing PINECONE_API_KEY β€” add it under Space Settings β†’ Secrets.")
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY # LlamaIndex reads from env
INDEX_NAME = "cronospmc-hr"
DIMENSION = 3072
METRIC = "cosine"
DATA_DIR = "data"
LOGO_PATH = "assets/cronospmc_logo.png"
AVATAR_PATH = "assets/cronospmc_avatar.png"
# ----------------------------------------------------------------------
# 2) LlamaIndex global settings
# ----------------------------------------------------------------------
Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.2)
Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large")
Settings.chunk_size = 600
Settings.chunk_overlap = 200
# ----------------------------------------------------------------------
# 3) System prompt β€” the bot's persona and guardrails
# ----------------------------------------------------------------------
SYSTEM_PROMPT = """You are the CronosPMC HR Assistant, an internal chatbot for CronosPMC β€” a Dubai-based construction project management consulting and professional training practice serving GCC clients. Answer questions exclusively using the indexed CronosPMC policy documents (Employee Handbook, Leave Policy, Remote Work Policy, HR FAQ).
SCOPE β€” WHAT YOU ANSWER
Leave (annual, sick, compassionate, maternity/paternity, unpaid); working hours, attendance, core collaboration hours, Ramadan adjustments; remote and hybrid work eligibility, home office setup, travel rules; performance, probation, PIPs, disciplinary process; code of conduct, confidentiality, IP, conflict of interest; data protection and acceptable use of AI tools; harassment and grievance reporting channels; learning, training, and development entitlements; equipment, expenses, TOIL.
SCOPE β€” WHAT YOU DECLINE
Decline politely and redirect for: topics unrelated to CronosPMC HR; binding legal interpretation of UAE labour law (refer to HR and qualified counsel); individual case decisions ("Will my leave be approved?"); other employees' records, salary, or performance; visa/immigration/PRO matters; payroll, tax, or financial advice; medical advice.
SENSITIVE TOPICS
For harassment, discrimination, retaliation, bullying, safety concerns, or mental-health distress: acknowledge with warmth, state that the concern can be raised confidentially, provide reporting channels (their manager, HR at Info@CronosPMC.com, or leadership), emphasise that retaliation is prohibited, and do NOT investigate, judge, or predict outcomes.
WHEN UNSURE
If the answer is not clearly covered by the indexed documents, say so and direct the user to email Info@CronosPMC.com for support.
TONE & FORMAT
Warm, professional, concise. Default to a short paragraph (2–5 sentences). Use a short bullet list only when listing steps, entitlements, or eligibility criteria. Plain English, no legalese. If the user writes in Arabic, respond in Arabic. Never promise outcomes ("your leave will be approved") β€” describe the process. Cite the policy section where helpful (e.g., "Leave Policy, Section 2"). Always close with a clear next step when action is needed.
DISCLAIMER (when asked about legal weight)
These policies are internal templates for guidance. They do not override your employment contract or active UAE labour law. For binding answers, refer to HR and qualified counsel.
"""
# ----------------------------------------------------------------------
# 4) Initialize Pinecone β€” reuse existing index, build only if absent
# ----------------------------------------------------------------------
log.info("Connecting to Pinecone...")
pc = Pinecone(api_key=PINECONE_API_KEY)
existing_indexes = [idx["name"] for idx in pc.list_indexes()]
log.info(f"Existing Pinecone indexes: {existing_indexes}")
if INDEX_NAME not in existing_indexes:
log.info(f"Creating Pinecone index '{INDEX_NAME}' (dim={DIMENSION}, metric={METRIC})...")
pc.create_index(
name=INDEX_NAME,
dimension=DIMENSION,
metric=METRIC,
spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)
needs_population = True
else:
log.info(f"Pinecone index '{INDEX_NAME}' already exists β€” reusing.")
# Check if it has any vectors; if empty, we need to populate it
pinecone_index = pc.Index(INDEX_NAME)
stats = pinecone_index.describe_index_stats()
needs_population = stats.get("total_vector_count", 0) == 0
pinecone_index = pc.Index(INDEX_NAME)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)
# ----------------------------------------------------------------------
# 5) Build the index if it's empty, otherwise reconnect
# ----------------------------------------------------------------------
if needs_population:
log.info(f"Loading PDFs from /{DATA_DIR}/...")
documents = SimpleDirectoryReader(
input_dir=DATA_DIR,
required_exts=[".pdf"],
file_extractor={".pdf": PDFReader()},
).load_data()
if not documents:
raise RuntimeError(f"No PDFs found in /{DATA_DIR}/.")
log.info(f"Loaded {len(documents)} document chunks. Indexing into Pinecone...")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
log.info("Waiting 15s for Pinecone to make vectors queryable...")
time.sleep(15)
else:
log.info("Reconnecting to populated Pinecone index...")
index = VectorStoreIndex.from_vector_store(vector_store)
stats = pinecone_index.describe_index_stats()
log.info(f"Vector count: {stats.get('total_vector_count', 0)}")
# ----------------------------------------------------------------------
# 6) Chat engine β€” multi-turn memory + system prompt
# ----------------------------------------------------------------------
chat_engine = index.as_chat_engine(
chat_mode="condense_plus_context",
system_prompt=SYSTEM_PROMPT,
verbose=False,
)
log.info("Chat engine ready.")
# ----------------------------------------------------------------------
# 7) Gradio UI β€” branded with the CronosPMC logo and palette
# ----------------------------------------------------------------------
import base64
def _img_to_data_url(path):
"""Embed a local image as a data URL so it works inside HTML blocks."""
with open(path, "rb") as f:
b64 = base64.b64encode(f.read()).decode("ascii")
return f"data:image/png;base64,{b64}"
LOGO_DATA_URL = _img_to_data_url(LOGO_PATH)
cronospmc_theme = gr.themes.Soft(
primary_hue=gr.themes.Color(
c50="#E8EEF7", c100="#C5D2E5", c200="#9EB2D0",
c300="#7691BB", c400="#4F71A6", c500="#13315C",
c600="#0F2A4F", c700="#0B2545", c800="#08203C",
c900="#061830", c950="#03101F",
),
secondary_hue=gr.themes.Color(
c50="#FEE7EC", c100="#FCC3CD", c200="#F89BAB",
c300="#F37287", c400="#EE506A", c500="#E63950",
c600="#CC2C42", c700="#A82236", c800="#84192A",
c900="#601220", c950="#3A0915",
),
neutral_hue="slate",
font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"],
).set(
body_background_fill="#F7F8FB",
block_background_fill="white",
block_border_width="1px",
block_radius="14px",
block_shadow="0 1px 3px rgba(11, 37, 69, 0.06), 0 4px 16px rgba(11, 37, 69, 0.04)",
button_primary_background_fill="#0B2545",
button_primary_background_fill_hover="#13315C",
button_primary_text_color="white",
input_background_fill="white",
input_border_color="#D9DEE8",
input_border_color_focus="#0B2545",
input_radius="10px",
)
custom_css = """
.gradio-container { max-width: 980px !important; margin: 0 auto !important; }
#cpmc-header {
background: linear-gradient(135deg, #0B2545 0%, #13315C 70%, #1A3D6B 100%);
color: white; padding: 26px 32px; border-radius: 16px;
margin-bottom: 18px;
box-shadow: 0 8px 24px rgba(11, 37, 69, 0.18);
position: relative; overflow: hidden;
}
#cpmc-header::after {
content: ""; position: absolute; top: -40%; right: -10%;
width: 320px; height: 320px;
background: radial-gradient(circle, rgba(230, 57, 80, 0.12) 0%, transparent 65%);
pointer-events: none;
}
#cpmc-header .logo-row {
display: flex; align-items: center; gap: 14px; margin-bottom: 4px;
position: relative; z-index: 1;
}
#cpmc-header img.logo {
height: 56px; width: auto;
filter: drop-shadow(0 2px 4px rgba(0, 0, 0, 0.25));
}
#cpmc-header .title-block h1 {
color: white !important; font-size: 22px !important; font-weight: 600 !important;
margin: 0 !important; letter-spacing: -0.2px; line-height: 1.2;
}
#cpmc-header .title-block .tagline {
color: #E63950 !important; font-size: 12px; font-weight: 600;
letter-spacing: 1.5px; text-transform: uppercase; margin-top: 2px;
}
#cpmc-header .subtitle {
color: #C5D2E5 !important; font-size: 14px; margin: 8px 0 0 0;
position: relative; z-index: 1;
}
#cpmc-header .badges {
margin-top: 14px; display: flex; gap: 8px; flex-wrap: wrap;
position: relative; z-index: 1;
}
#cpmc-header .badge {
background: rgba(255, 255, 255, 0.12);
border: 1px solid rgba(255, 255, 255, 0.22);
color: white; padding: 4px 11px; border-radius: 999px;
font-size: 12px; font-weight: 500;
}
#cpmc-header .badge-live { background: #10B981; border-color: #10B981; }
#cpmc-header .badge-live::before { content: "● "; color: #D1FAE5; }
#cpmc-header .badge-brand {
background: rgba(230, 57, 80, 0.18);
border-color: rgba(230, 57, 80, 0.4);
}
#cpmc-footer {
margin-top: 18px; padding: 16px 20px;
background: #F1F4F9; border-radius: 12px;
border: 1px solid #E5E9F0; font-size: 13px;
color: #4F5D75; line-height: 1.55;
}
#cpmc-footer strong { color: #0B2545; }
#cpmc-footer a { color: #E63950; font-weight: 500; text-decoration: none; }
#cpmc-footer a:hover { text-decoration: underline; }
#cpmc-footer .footer-brand {
display: inline-flex; align-items: center; gap: 6px;
color: #0B2545; font-weight: 600;
}
#cpmc-footer .footer-brand .accent { color: #E63950; }
footer { display: none !important; }
"""
def chat_with_hrbot(message, history):
if not history:
chat_engine.reset()
try:
response = chat_engine.chat(message)
return str(response)
except Exception as e:
log.exception("Chat error")
return f"⚠️ Sorry, something went wrong: {e}\n\nPlease try again or email Info@CronosPMC.com."
with gr.Blocks(theme=cronospmc_theme, css=custom_css, title="CronosPMC HR Assistant") as demo:
gr.HTML(f"""
<div id="cpmc-header">
<div class="logo-row">
<img class="logo" src="{LOGO_DATA_URL}" alt="CronosPMC" />
<div class="title-block">
<h1>HR Enterprise Assistant</h1>
<div class="tagline">Project Solution Experts Β· Dubai</div>
</div>
</div>
<p class="subtitle">Your AI-powered guide to leave, remote work, conduct, and everything in the CronosPMC handbook.</p>
<div class="badges">
<span class="badge badge-live">Online</span>
<span class="badge">GPT-4o-mini + Pinecone RAG</span>
<span class="badge badge-brand">πŸ“ Dubai Β· GST</span>
<span class="badge">πŸ”’ Internal use only</span>
</div>
</div>
""")
gr.ChatInterface(
fn=chat_with_hrbot,
type="messages",
chatbot=gr.Chatbot(
height=460,
type="messages",
avatar_images=(None, AVATAR_PATH),
show_copy_button=True,
),
textbox=gr.Textbox(
placeholder="Ask about leave, remote work, working hours, conduct, training…",
container=False,
),
examples=[
"What is the annual leave policy?",
"How many sick leave days am I entitled to?",
"Can I work remotely from another country?",
"What are the working hours during Ramadan?",
"How do I report harassment?",
"What training certifications does CronosPMC support?",
],
cache_examples=False,
)
gr.HTML("""
<div id="cpmc-footer">
<strong>About this assistant</strong> β€” answers are grounded in four CronosPMC policy documents:
the Employee Handbook, Leave Policy, Remote Work Policy, and HR FAQ β€” indexed in Pinecone for fast retrieval.
For confidential matters or anything not covered above, email
<a href="mailto:Info@CronosPMC.com">Info@CronosPMC.com</a>.<br><br>
<strong>Disclaimer</strong> β€” this bot provides guidance based on internal policy templates.
It does not override your employment contract or active UAE labour law.
For binding legal answers, consult HR and qualified counsel.<br><br>
<span class="footer-brand">Β© Cronos<span class="accent">PMC</span> Β· Project Solution Experts</span>
</div>
""")
if __name__ == "__main__":
demo.launch()