Reinforcement Learning
Transformers
English
post-training
distillation
agentic-coding
composer-2.5
cursor
kimi-k2
grpo
dapo
diloco
openenv
trl
verl
research
methodology
Instructions to use Codeseys/composer-replication-framework with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Codeseys/composer-replication-framework with Transformers:
# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Codeseys/composer-replication-framework", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """hint_generator.py — Template-based hint generator (v0.1 starter). | |
| Composer 2.5 inserts text hints at error-turn sites: | |
| "Reminder: Available tools are: …" (when a tool-call refs a non-existent tool) | |
| "Reminder: tool arguments must be valid JSON" (on JSONDecodeError) | |
| ... etc. | |
| This module provides a registry of hint templates keyed by error_kind. The | |
| data collator (in trl_path/data_collator.py) calls dispatch(error_kind, ctx) | |
| to get the hint text to splice into ctx_teacher. | |
| v0.2 will replace these templates with an LLM-driven hint generator (likely | |
| Sonnet 4.6 or Opus 4.7 via OpenRouter) for cases where templates are too rigid | |
| (style violations, wasteful explanations). | |
| """ | |
| from __future__ import annotations | |
| from collections.abc import Callable | |
| from typing import TypedDict | |
| class HintContext(TypedDict, total=False): | |
| """Per-error context the hint generator can use.""" | |
| error_kind: str # e.g. "tool_not_found", "json_decode", "type_error" | |
| error_message: str # raw error from the env | |
| available_tools: list[str] # for tool_not_found | |
| tool_name: str # the failing tool, if known | |
| tool_schema: dict # the schema, if known | |
| intent: str # student's apparent intent, if extractable | |
| # --------------------------------------------------------------------------- | |
| # Hint templates | |
| # --------------------------------------------------------------------------- | |
| def hint_tool_not_found(ctx: HintContext) -> str: | |
| tools = ctx.get("available_tools", []) | |
| if tools: | |
| tool_list = ", ".join(f"`{t}`" for t in tools) | |
| return f"Reminder: Available tools are: {tool_list}. Please use one of these." | |
| return "Reminder: the tool you tried to call does not exist. Use only available tools." | |
| def hint_json_decode(ctx: HintContext) -> str: | |
| return ( | |
| "Reminder: tool arguments must be valid JSON. Common mistakes: " | |
| "single quotes (use double), trailing commas, unescaped newlines in strings." | |
| ) | |
| def hint_type_error(ctx: HintContext) -> str: | |
| name = ctx.get("tool_name") | |
| schema = ctx.get("tool_schema") | |
| if name and schema: | |
| return ( | |
| f"Reminder: `{name}` expects arguments matching this schema:\n" | |
| f" {schema}\n" | |
| "Re-issue the call with arguments matching the schema." | |
| ) | |
| return "Reminder: tool arguments do not match the expected types. Check the schema." | |
| def hint_runtime_error(ctx: HintContext) -> str: | |
| msg = ctx.get("error_message", "an exception") | |
| return ( | |
| f"Reminder: the previous tool call raised {msg}. " | |
| "Reconsider the inputs or read the relevant code first to understand state." | |
| ) | |
| def hint_repeated_failure(ctx: HintContext) -> str: | |
| """Triggered when the same kind of error happens 3+ times in a row.""" | |
| return ( | |
| "Reminder: this approach has failed multiple times. " | |
| "Step back and consider an alternative approach: read more files, " | |
| "search for similar patterns elsewhere, or break the task down differently." | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Registry | |
| # --------------------------------------------------------------------------- | |
| HINT_TEMPLATES: dict[str, Callable[[HintContext], str]] = { | |
| "tool_not_found": hint_tool_not_found, | |
| "json_decode": hint_json_decode, | |
| "type_error": hint_type_error, | |
| "runtime_error": hint_runtime_error, | |
| "repeated_failure": hint_repeated_failure, | |
| } | |
| def dispatch(error_kind: str, ctx: HintContext | None = None) -> str | None: | |
| """Generate a hint for the given error_kind. Returns None if unknown.""" | |
| fn = HINT_TEMPLATES.get(error_kind) | |
| if fn is None: | |
| return None | |
| return fn(ctx or {}) | |
| def register(error_kind: str, fn: Callable[[HintContext], str]) -> None: | |
| """Add a custom hint template.""" | |
| HINT_TEMPLATES[error_kind] = fn | |
| __all__ = ["dispatch", "register", "HintContext", "HINT_TEMPLATES"] | |