""" CronosPMC HR Enterprise Chatbot — Hugging Face Space deployment ================================================================ A production RAG chatbot grounded in 4 CronosPMC HR policy PDFs. - LlamaIndex for retrieval - Pinecone for the vector store (built once, reused across sessions) - Gradio for the branded web UI - OpenAI gpt-4o-mini + text-embedding-3-large Required HF Space secrets: OPENAI_API_KEY — OpenAI API key PINECONE_API_KEY — Pinecone API key (free tier OK) """ import os import time import logging import gradio as gr from llama_index.core import ( Settings, VectorStoreIndex, SimpleDirectoryReader, StorageContext, ) from llama_index.readers.file import PDFReader from llama_index.llms.openai import OpenAI from llama_index.embeddings.openai import OpenAIEmbedding from llama_index.vector_stores.pinecone import PineconeVectorStore from pinecone import Pinecone, ServerlessSpec logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") log = logging.getLogger("cronospmc") # ---------------------------------------------------------------------- # 1) Configuration — read secrets from HF Space environment # ---------------------------------------------------------------------- OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") PINECONE_API_KEY = os.environ.get("PINECONE_API_KEY") if not OPENAI_API_KEY: raise RuntimeError("Missing OPENAI_API_KEY — add it under Space Settings → Secrets.") if not PINECONE_API_KEY: raise RuntimeError("Missing PINECONE_API_KEY — add it under Space Settings → Secrets.") os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY # LlamaIndex reads from env INDEX_NAME = "cronospmc-hr" DIMENSION = 3072 METRIC = "cosine" DATA_DIR = "data" LOGO_PATH = "assets/cronospmc_logo.png" AVATAR_PATH = "assets/cronospmc_avatar.png" # ---------------------------------------------------------------------- # 2) LlamaIndex global settings # ---------------------------------------------------------------------- Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.2) Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-large") Settings.chunk_size = 600 Settings.chunk_overlap = 200 # ---------------------------------------------------------------------- # 3) System prompt — the bot's persona and guardrails # ---------------------------------------------------------------------- SYSTEM_PROMPT = """You are the CronosPMC HR Assistant, an internal chatbot for CronosPMC — a Dubai-based construction project management consulting and professional training practice serving GCC clients. Answer questions exclusively using the indexed CronosPMC policy documents (Employee Handbook, Leave Policy, Remote Work Policy, HR FAQ). SCOPE — WHAT YOU ANSWER Leave (annual, sick, compassionate, maternity/paternity, unpaid); working hours, attendance, core collaboration hours, Ramadan adjustments; remote and hybrid work eligibility, home office setup, travel rules; performance, probation, PIPs, disciplinary process; code of conduct, confidentiality, IP, conflict of interest; data protection and acceptable use of AI tools; harassment and grievance reporting channels; learning, training, and development entitlements; equipment, expenses, TOIL. SCOPE — WHAT YOU DECLINE Decline politely and redirect for: topics unrelated to CronosPMC HR; binding legal interpretation of UAE labour law (refer to HR and qualified counsel); individual case decisions ("Will my leave be approved?"); other employees' records, salary, or performance; visa/immigration/PRO matters; payroll, tax, or financial advice; medical advice. SENSITIVE TOPICS For harassment, discrimination, retaliation, bullying, safety concerns, or mental-health distress: acknowledge with warmth, state that the concern can be raised confidentially, provide reporting channels (their manager, HR at Info@CronosPMC.com, or leadership), emphasise that retaliation is prohibited, and do NOT investigate, judge, or predict outcomes. WHEN UNSURE If the answer is not clearly covered by the indexed documents, say so and direct the user to email Info@CronosPMC.com for support. TONE & FORMAT Warm, professional, concise. Default to a short paragraph (2–5 sentences). Use a short bullet list only when listing steps, entitlements, or eligibility criteria. Plain English, no legalese. If the user writes in Arabic, respond in Arabic. Never promise outcomes ("your leave will be approved") — describe the process. Cite the policy section where helpful (e.g., "Leave Policy, Section 2"). Always close with a clear next step when action is needed. DISCLAIMER (when asked about legal weight) These policies are internal templates for guidance. They do not override your employment contract or active UAE labour law. For binding answers, refer to HR and qualified counsel. """ # ---------------------------------------------------------------------- # 4) Initialize Pinecone — reuse existing index, build only if absent # ---------------------------------------------------------------------- log.info("Connecting to Pinecone...") pc = Pinecone(api_key=PINECONE_API_KEY) existing_indexes = [idx["name"] for idx in pc.list_indexes()] log.info(f"Existing Pinecone indexes: {existing_indexes}") if INDEX_NAME not in existing_indexes: log.info(f"Creating Pinecone index '{INDEX_NAME}' (dim={DIMENSION}, metric={METRIC})...") pc.create_index( name=INDEX_NAME, dimension=DIMENSION, metric=METRIC, spec=ServerlessSpec(cloud="aws", region="us-east-1"), ) needs_population = True else: log.info(f"Pinecone index '{INDEX_NAME}' already exists — reusing.") # Check if it has any vectors; if empty, we need to populate it pinecone_index = pc.Index(INDEX_NAME) stats = pinecone_index.describe_index_stats() needs_population = stats.get("total_vector_count", 0) == 0 pinecone_index = pc.Index(INDEX_NAME) vector_store = PineconeVectorStore(pinecone_index=pinecone_index) # ---------------------------------------------------------------------- # 5) Build the index if it's empty, otherwise reconnect # ---------------------------------------------------------------------- if needs_population: log.info(f"Loading PDFs from /{DATA_DIR}/...") documents = SimpleDirectoryReader( input_dir=DATA_DIR, required_exts=[".pdf"], file_extractor={".pdf": PDFReader()}, ).load_data() if not documents: raise RuntimeError(f"No PDFs found in /{DATA_DIR}/.") log.info(f"Loaded {len(documents)} document chunks. Indexing into Pinecone...") storage_context = StorageContext.from_defaults(vector_store=vector_store) index = VectorStoreIndex.from_documents(documents, storage_context=storage_context) log.info("Waiting 15s for Pinecone to make vectors queryable...") time.sleep(15) else: log.info("Reconnecting to populated Pinecone index...") index = VectorStoreIndex.from_vector_store(vector_store) stats = pinecone_index.describe_index_stats() log.info(f"Vector count: {stats.get('total_vector_count', 0)}") # ---------------------------------------------------------------------- # 6) Chat engine — multi-turn memory + system prompt # ---------------------------------------------------------------------- chat_engine = index.as_chat_engine( chat_mode="condense_plus_context", system_prompt=SYSTEM_PROMPT, verbose=False, ) log.info("Chat engine ready.") # ---------------------------------------------------------------------- # 7) Gradio UI — branded with the CronosPMC logo and palette # ---------------------------------------------------------------------- import base64 def _img_to_data_url(path): """Embed a local image as a data URL so it works inside HTML blocks.""" with open(path, "rb") as f: b64 = base64.b64encode(f.read()).decode("ascii") return f"data:image/png;base64,{b64}" LOGO_DATA_URL = _img_to_data_url(LOGO_PATH) cronospmc_theme = gr.themes.Soft( primary_hue=gr.themes.Color( c50="#E8EEF7", c100="#C5D2E5", c200="#9EB2D0", c300="#7691BB", c400="#4F71A6", c500="#13315C", c600="#0F2A4F", c700="#0B2545", c800="#08203C", c900="#061830", c950="#03101F", ), secondary_hue=gr.themes.Color( c50="#FEE7EC", c100="#FCC3CD", c200="#F89BAB", c300="#F37287", c400="#EE506A", c500="#E63950", c600="#CC2C42", c700="#A82236", c800="#84192A", c900="#601220", c950="#3A0915", ), neutral_hue="slate", font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui", "sans-serif"], ).set( body_background_fill="#F7F8FB", block_background_fill="white", block_border_width="1px", block_radius="14px", block_shadow="0 1px 3px rgba(11, 37, 69, 0.06), 0 4px 16px rgba(11, 37, 69, 0.04)", button_primary_background_fill="#0B2545", button_primary_background_fill_hover="#13315C", button_primary_text_color="white", input_background_fill="white", input_border_color="#D9DEE8", input_border_color_focus="#0B2545", input_radius="10px", ) custom_css = """ .gradio-container { max-width: 980px !important; margin: 0 auto !important; } #cpmc-header { background: linear-gradient(135deg, #0B2545 0%, #13315C 70%, #1A3D6B 100%); color: white; padding: 26px 32px; border-radius: 16px; margin-bottom: 18px; box-shadow: 0 8px 24px rgba(11, 37, 69, 0.18); position: relative; overflow: hidden; } #cpmc-header::after { content: ""; position: absolute; top: -40%; right: -10%; width: 320px; height: 320px; background: radial-gradient(circle, rgba(230, 57, 80, 0.12) 0%, transparent 65%); pointer-events: none; } #cpmc-header .logo-row { display: flex; align-items: center; gap: 14px; margin-bottom: 4px; position: relative; z-index: 1; } #cpmc-header img.logo { height: 56px; width: auto; filter: drop-shadow(0 2px 4px rgba(0, 0, 0, 0.25)); } #cpmc-header .title-block h1 { color: white !important; font-size: 22px !important; font-weight: 600 !important; margin: 0 !important; letter-spacing: -0.2px; line-height: 1.2; } #cpmc-header .title-block .tagline { color: #E63950 !important; font-size: 12px; font-weight: 600; letter-spacing: 1.5px; text-transform: uppercase; margin-top: 2px; } #cpmc-header .subtitle { color: #C5D2E5 !important; font-size: 14px; margin: 8px 0 0 0; position: relative; z-index: 1; } #cpmc-header .badges { margin-top: 14px; display: flex; gap: 8px; flex-wrap: wrap; position: relative; z-index: 1; } #cpmc-header .badge { background: rgba(255, 255, 255, 0.12); border: 1px solid rgba(255, 255, 255, 0.22); color: white; padding: 4px 11px; border-radius: 999px; font-size: 12px; font-weight: 500; } #cpmc-header .badge-live { background: #10B981; border-color: #10B981; } #cpmc-header .badge-live::before { content: "● "; color: #D1FAE5; } #cpmc-header .badge-brand { background: rgba(230, 57, 80, 0.18); border-color: rgba(230, 57, 80, 0.4); } #cpmc-footer { margin-top: 18px; padding: 16px 20px; background: #F1F4F9; border-radius: 12px; border: 1px solid #E5E9F0; font-size: 13px; color: #4F5D75; line-height: 1.55; } #cpmc-footer strong { color: #0B2545; } #cpmc-footer a { color: #E63950; font-weight: 500; text-decoration: none; } #cpmc-footer a:hover { text-decoration: underline; } #cpmc-footer .footer-brand { display: inline-flex; align-items: center; gap: 6px; color: #0B2545; font-weight: 600; } #cpmc-footer .footer-brand .accent { color: #E63950; } footer { display: none !important; } """ def chat_with_hrbot(message, history): if not history: chat_engine.reset() try: response = chat_engine.chat(message) return str(response) except Exception as e: log.exception("Chat error") return f"⚠️ Sorry, something went wrong: {e}\n\nPlease try again or email Info@CronosPMC.com." with gr.Blocks(theme=cronospmc_theme, css=custom_css, title="CronosPMC HR Assistant") as demo: gr.HTML(f"""

HR Enterprise Assistant

Project Solution Experts · Dubai

Your AI-powered guide to leave, remote work, conduct, and everything in the CronosPMC handbook.

Online GPT-4o-mini + Pinecone RAG 📍 Dubai · GST 🔒 Internal use only
""") gr.ChatInterface( fn=chat_with_hrbot, type="messages", chatbot=gr.Chatbot( height=460, type="messages", avatar_images=(None, AVATAR_PATH), show_copy_button=True, ), textbox=gr.Textbox( placeholder="Ask about leave, remote work, working hours, conduct, training…", container=False, ), examples=[ "What is the annual leave policy?", "How many sick leave days am I entitled to?", "Can I work remotely from another country?", "What are the working hours during Ramadan?", "How do I report harassment?", "What training certifications does CronosPMC support?", ], cache_examples=False, ) gr.HTML(""" """) if __name__ == "__main__": demo.launch()