File size: 3,402 Bytes
9e64e71 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | """Curated few-shot trajectories for GRPO system prompt guidance.
Based on ToolExpander (arXiv:2510.07737): few-shot guidance during RL
eliminates training collapse for 1.5B models. These examples show the
model the complete tool-calling loop with real environment output.
"""
# Example 1: single-table integer question (describe → query → answer)
EXAMPLE_INTEGER = {
"question": "How many cars have a larger accelerate than the car "
"with the largest horsepower?",
"trajectory": [
{
"role": "assistant",
"action": 'describe(table_name="cars_data")',
},
{
"role": "tool",
"content": (
"Table 'cars_data' columns:\n"
"- Id: INTEGER\n"
"- Horsepower: TEXT\n"
"- Accelerate: REAL\n"
"- Year: INTEGER\n"
"Row count: 406"
),
},
{
"role": "assistant",
"action": (
'query(sql="SELECT COUNT(*) FROM cars_data '
"WHERE Accelerate > (SELECT Accelerate FROM "
'cars_data ORDER BY Horsepower DESC LIMIT 1)")'
),
},
{"role": "tool", "content": "1. 39"},
{
"role": "assistant",
"action": 'answer(value="39")',
},
{"role": "tool", "content": "Answer submitted: correct."},
],
}
# Example 2: empty result (describe → query → answer with [])
EXAMPLE_EMPTY = {
"question": "Show location and name for all stadiums with a "
"capacity between 5000 and 10000.",
"trajectory": [
{
"role": "assistant",
"action": 'describe(table_name="stadium")',
},
{
"role": "tool",
"content": (
"Table 'stadium' columns:\n"
"- Stadium_ID: INT\n"
"- Location: TEXT\n"
"- Name: TEXT\n"
"- Capacity: INT\n"
"Row count: 9"
),
},
{
"role": "assistant",
"action": (
'query(sql="SELECT Location, Name FROM stadium '
'WHERE Capacity BETWEEN 5000 AND 10000")'
),
},
{"role": "tool", "content": "No rows returned."},
{
"role": "assistant",
"action": 'answer(value="[]")',
},
{"role": "tool", "content": "Answer submitted: correct."},
],
}
ALL_EXAMPLES = [EXAMPLE_INTEGER, EXAMPLE_EMPTY]
def format_examples_for_prompt(
examples: list[dict] | None = None,
) -> str:
"""Render few-shot examples as a compact text block for the system prompt.
Each example shows: question, then alternating assistant actions and
tool responses. Compact enough to fit in the system prompt without
dominating the token budget (~200 tokens total).
"""
if examples is None:
examples = ALL_EXAMPLES
lines = ["Examples:"]
for i, ex in enumerate(examples, 1):
lines.append(f"\n{i}. Q: {ex['question']}")
for step in ex["trajectory"]:
if step["role"] == "assistant":
lines.append(f" → {step['action']}")
else:
content = step["content"].replace("\n", " | ")
lines.append(f" ← {content}")
return "\n".join(lines)
|