Spaces:
Runtime error
Runtime error
| import os | |
| import gradio as ui | |
| from vllm import LLM, SamplingParams | |
| from huggingface_hub import login | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| if HF_TOKEN: | |
| login(token=HF_TOKEN) | |
| MODEL_ID = "coder-vansh/cypher_llm_model" | |
| print("π₯ Spawning ZeroGPU vLLM Layer for Gemma 4...") | |
| # vLLM automatically optimizes Gemma 4 architecture using vVRAM distribution | |
| llm = LLM(model=MODEL_ID, trust_remote_code=True, max_model_len=2048) | |
| def predict(message, history): | |
| system_prompt = "You are CYPHER, a casual, witty Hinglish AI companion by Vansh & Aditya. Tagline: \"Not just an AI β YOUR AI.\"" | |
| formatted_prompt = f"<|system|>\n{system_prompt}" | |
| if history: | |
| for user_msg, bot_msg in history: | |
| formatted_prompt += f"\n<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}" | |
| formatted_prompt += f"\n<|user|>\n{message}\n<|assistant|>\n" | |
| sampling_params = SamplingParams(temperature=0.7, max_tokens=250, top_p=0.95) | |
| try: | |
| outputs = llm.generate([formatted_prompt], sampling_params) | |
| response_text = outputs[0].outputs[0].text | |
| return response_text | |
| except Exception as e: | |
| return f"β οΈ Production Glitch: {str(e)}" | |
| with ui.Blocks() as demo: | |
| ui.Markdown("# π€ CYPHER AI Live Production Space (ZeroGPU Enabled)") | |
| ui.Markdown("### *Not just an AI β YOUR AI.* | Developed by Vansh & Aditya") | |
| ui.ChatInterface( | |
| fn=predict, | |
| textbox=ui.Textbox(placeholder="Bhai se kuch bhi poocho...", container=False, scale=7), | |
| ) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |