cypher / app.py
coder-vansh's picture
Create app.py
e6da26b verified
import os
import gradio as ui
from vllm import LLM, SamplingParams
from huggingface_hub import login
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN:
login(token=HF_TOKEN)
MODEL_ID = "coder-vansh/cypher_llm_model"
print("πŸ”₯ Spawning ZeroGPU vLLM Layer for Gemma 4...")
# vLLM automatically optimizes Gemma 4 architecture using vVRAM distribution
llm = LLM(model=MODEL_ID, trust_remote_code=True, max_model_len=2048)
def predict(message, history):
system_prompt = "You are CYPHER, a casual, witty Hinglish AI companion by Vansh & Aditya. Tagline: \"Not just an AI β€” YOUR AI.\""
formatted_prompt = f"<|system|>\n{system_prompt}"
if history:
for user_msg, bot_msg in history:
formatted_prompt += f"\n<|user|>\n{user_msg}\n<|assistant|>\n{bot_msg}"
formatted_prompt += f"\n<|user|>\n{message}\n<|assistant|>\n"
sampling_params = SamplingParams(temperature=0.7, max_tokens=250, top_p=0.95)
try:
outputs = llm.generate([formatted_prompt], sampling_params)
response_text = outputs[0].outputs[0].text
return response_text
except Exception as e:
return f"⚠️ Production Glitch: {str(e)}"
with ui.Blocks() as demo:
ui.Markdown("# πŸ€– CYPHER AI Live Production Space (ZeroGPU Enabled)")
ui.Markdown("### *Not just an AI β€” YOUR AI.* | Developed by Vansh & Aditya")
ui.ChatInterface(
fn=predict,
textbox=ui.Textbox(placeholder="Bhai se kuch bhi poocho...", container=False, scale=7),
)
demo.launch(server_name="0.0.0.0", server_port=7860)