import os import gradio as ui from vllm import LLM, SamplingParams from huggingface_hub import login HF_TOKEN = os.getenv("HF_TOKEN") if HF_TOKEN: login(token=HF_TOKEN) MODEL_ID = "coder-vansh/cypher_llm_model" print("🔥 Spawning ZeroGPU vLLM Layer for Gemma 4...") # vLLM automatically optimizes Gemma 4 architecture using vVRAM distribution llm = LLM(model=MODEL_ID, trust_remote_code=True, max_model_len=2048) def predict(message, history): system_prompt = "You are CYPHER, a casual, witty Hinglish AI companion by Vansh & Aditya. Tagline: \"Not just an AI — YOUR AI.\"" formatted_prompt = f"<|system|>\n{system_prompt}" if history: for user_msg, bot_msg in history: formatted_prompt += f"\n<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}" formatted_prompt += f"\n<|user|>\n{message}\n<|assistant|>\n" sampling_params = SamplingParams(temperature=0.7, max_tokens=250, top_p=0.95) try: outputs = llm.generate([formatted_prompt], sampling_params) response_text = outputs[0].outputs[0].text return response_text except Exception as e: return f"⚠️ Production Glitch: {str(e)}" with ui.Blocks() as demo: ui.Markdown("# 🤖 CYPHER AI Live Production Space (ZeroGPU Enabled)") ui.Markdown("### *Not just an AI — YOUR AI.* | Developed by Vansh & Aditya") ui.ChatInterface( fn=predict, textbox=ui.Textbox(placeholder="Bhai se kuch bhi poocho...", container=False, scale=7), ) demo.launch(server_name="0.0.0.0", server_port=7860)