Spaces:

hjerpe
/

sql_env

Sleeping

File size: 3,402 Bytes

9e64e71

"""Curated few-shot trajectories for GRPO system prompt guidance.

Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL
eliminates training collapse for 1.5B models. These examples show the
model the complete tool-calling loop with real environment output.
"""

# Example 1: single-table integer question (describe → query → answer)
EXAMPLE_INTEGER = {
    "question": "How many cars have a larger accelerate than the car "
    "with the largest horsepower?",
    "trajectory": [
        {
            "role": "assistant",
            "action": 'describe(table_name="cars_data")',
        },
        {
            "role": "tool",
            "content": (
                "Table 'cars_data' columns:\n"
                "- Id: INTEGER\n"
                "- Horsepower: TEXT\n"
                "- Accelerate: REAL\n"
                "- Year: INTEGER\n"
                "Row count: 406"
            ),
        },
        {
            "role": "assistant",
            "action": (
                'query(sql="SELECT COUNT(*) FROM cars_data '
                "WHERE Accelerate > (SELECT Accelerate FROM "
                'cars_data ORDER BY Horsepower DESC LIMIT 1)")'
            ),
        },
        {"role": "tool", "content": "1. 39"},
        {
            "role": "assistant",
            "action": 'answer(value="39")',
        },
        {"role": "tool", "content": "Answer submitted: correct."},
    ],
}

# Example 2: empty result (describe → query → answer with [])
EXAMPLE_EMPTY = {
    "question": "Show location and name for all stadiums with a "
    "capacity between 5000 and 10000.",
    "trajectory": [
        {
            "role": "assistant",
            "action": 'describe(table_name="stadium")',
        },
        {
            "role": "tool",
            "content": (
                "Table 'stadium' columns:\n"
                "- Stadium_ID: INT\n"
                "- Location: TEXT\n"
                "- Name: TEXT\n"
                "- Capacity: INT\n"
                "Row count: 9"
            ),
        },
        {
            "role": "assistant",
            "action": (
                'query(sql="SELECT Location, Name FROM stadium '
                'WHERE Capacity BETWEEN 5000 AND 10000")'
            ),
        },
        {"role": "tool", "content": "No rows returned."},
        {
            "role": "assistant",
            "action": 'answer(value="[]")',
        },
        {"role": "tool", "content": "Answer submitted: correct."},
    ],
}

ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY]


def format_examples_for_prompt(
    examples: list[dict] | None = None,
) -> str:
    """Render few-shot examples as a compact text block for the system prompt.

    Each example shows: question, then alternating assistant actions and
    tool responses. Compact enough to fit in the system prompt without
    dominating the token budget (~200 tokens total).
    """
    if examples is None:
        examples = ALL_EXAMPLES

    lines = ["Examples:"]
    for i, ex in enumerate(examples, 1):
        lines.append(f"\n{i}. Q: {ex['question']}")
        for step in ex["trajectory"]:
            if step["role"] == "assistant":
                lines.append(f"   → {step['action']}")
            else:
                content = step["content"].replace("\n", " | ")
                lines.append(f"   ← {content}")

    return "\n".join(lines)