Spaces:

decodingdatascience
/

LLMGenerationControls

Paused

File size: 10,509 Bytes

437a1c7
 
 
 
 
a8ecfcf
 
 
 
 
 
 
 
 
 
 
 
 
437a1c7
 
a8ecfcf
 
 
 
437a1c7
 
 
 
a8ecfcf
 
 
437a1c7
 
 
a8ecfcf
 
437a1c7
a8ecfcf
 
 
 
 
437a1c7
a8ecfcf
437a1c7
 
a8ecfcf
 
437a1c7
 
a8ecfcf
437a1c7
a8ecfcf
437a1c7
a8ecfcf
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
437a1c7
a8ecfcf
437a1c7
 
 
a8ecfcf
 
 
 
 
 
 
 
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
 
 
 
 
 
 
 
437a1c7
 
a8ecfcf
 
 
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
437a1c7
a8ecfcf
 
437a1c7
a8ecfcf
 
 
 
437a1c7
 
 
a8ecfcf
 
 
 
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
 
 
437a1c7
a8ecfcf
 
437a1c7
a8ecfcf
 
437a1c7
 
a8ecfcf
 
 
 
 
 
 
 
 
 
 
437a1c7
a8ecfcf
437a1c7
a8ecfcf
 
 
 
 
437a1c7
a8ecfcf
437a1c7
 
a8ecfcf
 
437a1c7
a8ecfcf
 
 
 
 
 
 
 
 
437a1c7
a8ecfcf
437a1c7
a8ecfcf
437a1c7
 
 
 
a8ecfcf
437a1c7
 
a8ecfcf
 
 
 
 
437a1c7
a8ecfcf
 
437a1c7
 
a8ecfcf
 
 
 
 
437a1c7
a8ecfcf
 
 
 
 
 
 
 
 
 
437a1c7
 
 
 
 
 
 
a8ecfcf
437a1c7
a8ecfcf
437a1c7
a8ecfcf
 
 
437a1c7
a8ecfcf
 
437a1c7
 
 
 
a8ecfcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437a1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
a8ecfcf
 
 
 
 
 
 
437a1c7
 
 
 
 
 
 
 
 
 
a8ecfcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
437a1c7
a8ecfcf
437a1c7
a8ecfcf
437a1c7
a8ecfcf
 
 
437a1c7
a8ecfcf
 
 
437a1c7
 
 
a8ecfcf
 
437a1c7
 
a8ecfcf
 
437a1c7
 
a8ecfcf
 
 
 
 
 
437a1c7
 
 
 
 
 
 
 
 
 
 
 
 
 
a8ecfcf
 
 
437a1c7
 
 
 
 
 
 
a8ecfcf
 
437a1c7
a8ecfcf

import os
import gradio as gr
from openai import OpenAI


# =========================
# Hugging Face Secret
# =========================
# Add this in Hugging Face Spaces:
# Settings → Secrets → New secret
# Name: OPENAI_API_KEY
# Value: your OpenAI API key


DEFAULT_GENERATION_MODEL = os.getenv("OPENAI_GENERATION_MODEL", "gpt-5.5")
DEFAULT_REASONING_MODEL = os.getenv("OPENAI_REASONING_MODEL", "gpt-5.5")


GENERATION_MODELS = [
    "gpt-5.5",
    "gpt-5.1",
    "gpt-5-mini",
    "gpt-4.1",
    "gpt-4.1-mini",
]

REASONING_MODELS = [
    "gpt-5.5",
    "gpt-5.1",
    "gpt-5-mini",
    "gpt-5-pro",
]


def get_openai_client():
    api_key = os.getenv("OPENAI_API_KEY")

    if not api_key:
        raise ValueError(
            "OPENAI_API_KEY is missing. "
            "Please add it in Hugging Face Spaces → Settings → Secrets."
        )

    return OpenAI(api_key=api_key)


def is_gpt5_family(model: str) -> bool:
    return model.startswith("gpt-5")


def extract_output_text(response):
    """
    Safely extract text from OpenAI Responses API output.
    """
    if hasattr(response, "output_text") and response.output_text:
        return response.output_text

    chunks = []

    if hasattr(response, "output") and response.output:
        for item in response.output:
            if hasattr(item, "content") and item.content:
                for content in item.content:
                    if hasattr(content, "text") and content.text:
                        chunks.append(content.text)

    return "\n".join(chunks).strip()


def run_generation(
    prompt,
    model,
    system_message,
    temperature,
    top_p,
    max_output_tokens,
    frequency_penalty,
    presence_penalty,
    show_settings,
):
    try:
        client = get_openai_client()

        request_params = {
            "model": model,
            "instructions": system_message,
            "input": prompt,
            "max_output_tokens": int(max_output_tokens),
        }

        # GPT-5 family models may reject custom temperature/top_p/penalties.
        # Keep defaults for GPT-5 models to avoid unsupported_value errors.
        if not is_gpt5_family(model):
            request_params["temperature"] = float(temperature)
            request_params["top_p"] = float(top_p)
            request_params["frequency_penalty"] = float(frequency_penalty)
            request_params["presence_penalty"] = float(presence_penalty)

        response = client.responses.create(**request_params)
        output = extract_output_text(response)

        if not output:
            output = "No output generated."

        if show_settings:
            settings = f"""
MODEL SETTINGS
--------------
Model: {model}
Max Output Tokens: {max_output_tokens}
"""

            if is_gpt5_family(model):
                settings += """
Temperature: default only for GPT-5 family
Top P: default only for GPT-5 family
Frequency Penalty: default only for GPT-5 family
Presence Penalty: default only for GPT-5 family
"""
            else:
                settings += f"""
Temperature: {temperature}
Top P: {top_p}
Frequency Penalty: {frequency_penalty}
Presence Penalty: {presence_penalty}
"""

            settings += "\nOUTPUT\n------\n"
            return settings + output

        return output

    except Exception as e:
        return f"Error:\n{str(e)}"


def run_reasoning(
    prompt,
    model,
    reasoning_effort,
    max_output_tokens,
    show_settings,
):
    try:
        client = get_openai_client()

        request_params = {
            "model": model,
            "input": prompt,
            "max_output_tokens": int(max_output_tokens),
            "reasoning": {
                "effort": reasoning_effort
            },
        }

        response = client.responses.create(**request_params)
        output = extract_output_text(response)

        if not output:
            output = "No output generated."

        if show_settings:
            settings = f"""
REASONING SETTINGS
------------------
Model: {model}
Reasoning Effort: {reasoning_effort}
Max Output Tokens: {max_output_tokens}

OUTPUT
------
"""
            return settings + output

        return output

    except Exception as e:
        return f"Error:\n{str(e)}"


CSS = """
.gradio-container {
    max-width: 1200px !important;
    margin: auto !important;
}

.main-title {
    text-align: center;
    margin-bottom: 20px;
}

.helper-box {
    padding: 14px;
    border-radius: 12px;
    background: #f7f7f8;
    border: 1px solid #e5e7eb;
    margin-bottom: 16px;
}

.output-box textarea {
    font-family: monospace !important;
}
"""


with gr.Blocks() as demo:
    gr.Markdown(
        """
        <div class="main-title">

        # LLM Generation & Reasoning Controls

        Experiment with OpenAI model settings using a simple Gradio interface.

        </div>
        """
    )

    gr.Markdown(
        """
        <div class="helper-box">

        <b>Important:</b> Add your OpenAI key in Hugging Face Spaces Secrets as:

        <code>OPENAI_API_KEY</code>

        GPT-5 family models may only support default values for temperature, top-p, and penalties.  
        This app automatically skips those settings for GPT-5 models to avoid API errors.

        </div>
        """
    )

    with gr.Tab("Generation Controls"):
        with gr.Row():
            with gr.Column(scale=1):
                gen_prompt = gr.Textbox(
                    lines=7,
                    label="Prompt",
                    value="Write a short LinkedIn post explaining why business leaders should learn AI. Maximum 120 words.",
                )

                gen_model = gr.Dropdown(
                    choices=GENERATION_MODELS,
                    label="Model",
                    value=DEFAULT_GENERATION_MODEL
                    if DEFAULT_GENERATION_MODEL in GENERATION_MODELS
                    else "gpt-5.5",
                )

                gen_system_message = gr.Textbox(
                    lines=3,
                    label="System Message",
                    value="You are a helpful AI instructor. Keep answers clear and practical.",
                )

                gen_temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    step=0.01,
                    value=0.7,
                    label="Temperature",
                )

                gen_top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    step=0.01,
                    value=1.0,
                    label="Top P",
                )

                gen_max_output_tokens = gr.Slider(
                    minimum=50,
                    maximum=4000,
                    step=50,
                    value=500,
                    label="Max Output Tokens",
                )

                gen_frequency_penalty = gr.Slider(
                    minimum=-2.0,
                    maximum=2.0,
                    step=0.01,
                    value=0.0,
                    label="Frequency Penalty",
                )

                gen_presence_penalty = gr.Slider(
                    minimum=-2.0,
                    maximum=2.0,
                    step=0.01,
                    value=0.0,
                    label="Presence Penalty",
                )

                gen_show_settings = gr.Checkbox(
                    value=True,
                    label="Show Settings",
                )

                gen_button = gr.Button("Generate", variant="primary")

            with gr.Column(scale=1):
                gen_output = gr.Textbox(
                    lines=22,
                    label="Output",
                    elem_classes=["output-box"],
                )

        gen_button.click(
            fn=run_generation,
            inputs=[
                gen_prompt,
                gen_model,
                gen_system_message,
                gen_temperature,
                gen_top_p,
                gen_max_output_tokens,
                gen_frequency_penalty,
                gen_presence_penalty,
                gen_show_settings,
            ],
            outputs=gen_output,
        )

    with gr.Tab("Reasoning Controls"):
        with gr.Row():
            with gr.Column(scale=1):
                reason_prompt = gr.Textbox(
                    lines=9,
                    label="Prompt",
                    value="""A telecom company wants to build an AI customer support assistant.

They have:
- 50,000 past support tickets
- A FAQ website
- Billing policies
- A small developer team

Should they start with:
1. Simple prompt-based chatbot
2. RAG chatbot
3. Fine-tuning
4. Agent with tools

Give a practical recommendation with trade-offs.""",
                )

                reason_model = gr.Dropdown(
                    choices=REASONING_MODELS,
                    label="Model",
                    value=DEFAULT_REASONING_MODEL
                    if DEFAULT_REASONING_MODEL in REASONING_MODELS
                    else "gpt-5.5",
                )

                reason_effort = gr.Radio(
                    choices=["low", "medium", "high"],
                    label="Reasoning Effort",
                    value="medium",
                )

                reason_max_output_tokens = gr.Slider(
                    minimum=100,
                    maximum=8000,
                    step=100,
                    value=1000,
                    label="Max Output Tokens",
                )

                reason_show_settings = gr.Checkbox(
                    value=True,
                    label="Show Settings",
                )

                reason_button = gr.Button("Reason", variant="primary")

            with gr.Column(scale=1):
                reason_output = gr.Textbox(
                    lines=22,
                    label="Output",
                    elem_classes=["output-box"],
                )

        reason_button.click(
            fn=run_reasoning,
            inputs=[
                reason_prompt,
                reason_model,
                reason_effort,
                reason_max_output_tokens,
                reason_show_settings,
            ],
            outputs=reason_output,
        )


if __name__ == "__main__":
    demo.launch(
        theme=gr.themes.Soft(),
        css=CSS,
        server_name="0.0.0.0",
        server_port=int(os.getenv("PORT", 7860)),
        debug=False,
        share=False,
    )