import gradio as gr from transformers import pipeline # Load the Zephyr 7B Beta model pipe = pipeline( "text-generation", model="HuggingFaceH4/zephyr-7b-beta", device_map="auto" ) # API function def chat_api(prompt): # Generate a response from the model result = pipe( prompt, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9 ) return result[0]["generated_text"] # Create Gradio Interface for API access (no UI needed) demo = gr.Interface( fn=chat_api, inputs=gr.Textbox(label="Prompt"), outputs=gr.Textbox(label="Response"), allow_flagging="never" ) # Launch with API enabled demo.launch(server_name="0.0.0.0", server_port=7860)